#[deriving(PartialOrd)] is O(N^2) code size for N enum variants #15375

alexcrichton · 2014-07-03T17:49:22Z

The program below generates a 259MB rlib, taking about two minutes to compile. Something pathological is going on here which shouldn't be going on.

#[deriving(PartialEq, PartialOrd)]
pub enum Key {
    Unknown                 ,
    Backspace               ,
    Tab                     ,
    Return                  ,
    Escape                  ,
    Space                   ,
    Exclaim                 ,
    Quotedbl                ,
    Hash                    ,
    Dollar                  ,
    Percent                 ,
    Ampersand               ,
    Quote                   ,
    LeftParen               ,
    RightParen              ,
    Asterisk                ,
    Plus                    ,
    Comma                   ,
    Minus                   ,
    Period                  ,
    Slash                   ,
    D0                      ,
    D1                      ,
    D2                      ,
    D3                      ,
    D4                      ,
    D5                      ,
    D6                      ,
    D7                      ,
    D8                      ,
    D9                      ,
    Colon                   ,
    Semicolon               ,
    Less                    ,
    Equals                  ,
    Greater                 ,
    Question                ,
    At                      ,
    LeftBracket             ,
    Backslash               ,
    RightBracket            ,
    Caret                   ,
    Underscore              ,
    Backquote               ,
    A                       ,
    B                       ,
    C                       ,
    D                       ,
    E                       ,
    F                       ,
    G                       ,
    H                       ,
    I                       ,
    J                       ,
    K                       ,
    L                       ,
    M                       ,
    N                       ,
    O                       ,
    P                       ,
    Q                       ,
    R                       ,
    S                       ,
    T                       ,
    U                       ,
    V                       ,
    W                       ,
    X                       ,
    Y                       ,
    Z                       ,
    Delete                  ,
    CapsLock                ,
    F1                      ,
    F2                      ,
    F3                      ,
    F4                      ,
    F5                      ,
    F6                      ,
    F7                      ,
    F8                      ,
    F9                      ,
    F10                     ,
    F11                     ,
    F12                     ,
    PrintScreen             ,
    ScrollLock              ,
    Pause                   ,
    Insert                  ,
    Home                    ,
    PageUp                  ,
    End                     ,
    PageDown                ,
    Right                   ,
    Left                    ,
    Down                    ,
    Up                      ,
    NumLockClear            ,
    NumPadDivide            ,
    NumPadMultiply          ,
    NumPadMinus             ,
    NumPadPlus              ,
    NumPadEnter             ,
    NumPad1                 ,
    NumPad2                 ,
    NumPad3                 ,
    NumPad4                 ,
    NumPad5                 ,
    NumPad6                 ,
    NumPad7                 ,
    NumPad8                 ,
    NumPad9                 ,
    NumPad0                 ,
    NumPadPeriod            ,
    Application             ,
    Power                   ,
    NumPadEquals            ,
    F13                     ,
    F14                     ,
    F15                     ,
    F16                     ,
    F17                     ,
    F18                     ,
    F19                     ,
    F20                     ,
    F21                     ,
    F22                     ,
    F23                     ,
    F24                     ,
    Execute                 ,
    Help                    ,
    Menu                    ,
    Select                  ,
    Stop                    ,
    Again                   ,
    Undo                    ,
    Cut                     ,
    Copy                    ,
    Paste                   ,
    Find                    ,
    Mute                    ,
    VolumeUp                ,
    VolumeDown              ,
    NumPadComma             ,
    NumPadEqualsAS400       ,
    AltErase                ,
    Sysreq                  ,
    Cancel                  ,
    Clear                   ,
    Prior                   ,
    Return2                 ,
    Separator               ,
    Out                     ,
    Oper                    ,
    ClearAgain              ,
    CrSel                   ,
    ExSel                   ,
    NumPad00                ,
    NumPad000               ,
    ThousandsSeparator      ,
    DecimalSeparator        ,
    CurrencyUnit            ,
    CurrencySubUnit         ,
    NumPadLeftParen         ,
    NumPadRightParen        ,
    NumPadLeftBrace         ,
    NumPadRightBrace        ,
    NumPadTab               ,
    NumPadBackspace         ,
    NumPadA                 ,
    NumPadB                 ,
    NumPadC                 ,
    NumPadD                 ,
    NumPadE                 ,
    NumPadF                 ,
    NumPadXor               ,
    NumPadPower             ,
    NumPadPercent           ,
    NumPadLess              ,
    NumPadGreater           ,
    NumPadAmpersand         ,
    NumPadDblAmpersand      ,
    NumPadVerticalBar       ,
    NumPadDblVerticalBar    ,
    NumPadColon             ,
    NumPadHash              ,
    NumPadSpace             ,
    NumPadAt                ,
    NumPadExclam            ,
    NumPadMemStore          ,
    NumPadMemRecall         ,
    NumPadMemClear          ,
    NumPadMemAdd            ,
    NumPadMemSubtract       ,
    NumPadMemMultiply       ,
    NumPadMemDivide         ,
    NumPadPlusMinus         ,
    NumPadCear              ,
    NumPadClearEntry        ,
    NumPadBinary            ,
    NumPadOctal             ,
    NumPadDecimal           ,
    NumPadHexadecimal       ,
    LCtrl                   ,
    LShift                  ,
    LAlt                    ,
    LGui                    ,
    RCtrl                   ,
    RShift                  ,
    RAlt                    ,
    RGui                    ,
    Mode                    ,
    AudioNext               ,
    AudioPrev               ,
    AudioStop               ,
    AudioPlay               ,
    AudioMute               ,
    MediaSelect             ,
    Www                     ,
    Mail                    ,
    Calculator              ,
    Computer                ,
    AcSearch                ,
    AcHome                  ,
    AcBack                  ,
    AcForward               ,
    AcStop                  ,
    AcRefresh               ,
    AcBookmarks             ,
    BrightnessDown          ,
    BrightnessUp            ,
    DisplaySwitch           ,
    KbdIllumToggle          ,
    KbdIllumDown            ,
    KbdIllumUp              ,
    Eject                   ,
    Sleep                   ,
}

cc #15346

pnkfelix · 2014-07-03T17:51:36Z

cc me

alexcrichton · 2014-07-03T17:51:49Z

Updated with a more accurate description

alexcrichton · 2014-07-03T17:52:04Z

As in, it appears that deriving(PartialOrd) is generating an absurd amount of code.

alexcrichton · 2014-07-03T17:54:35Z

Yes, --pretty expanded takes the source file from 7K to 11MB. That would help to explain the gargantuan size here. Additionally, each function is marked with #[inline] accounding for the absurd size of the metadata. Fun times!

sfackler · 2014-07-03T18:00:28Z

It could potentially be a regression from the partial_cmp implementation, since I touched the PartialOrd deriving logic.

luqmana · 2014-07-03T20:05:30Z

cc @cmr

huonw · 2014-07-03T20:46:59Z

Pretty sure this is my fault, sorry. :(

It might be worth deriving detecting C-like enums like this and just casting to an integer, rather than matching.

sfackler · 2014-07-03T20:56:21Z

@huonw any idea how long it's been like this? It could explain a blowup in the size of the rust-postgres binary that I've noticed but haven't bothered looking into.

huonw · 2014-07-03T21:13:41Z

It has been creating a huge match ever since I wrote the generalised deriving ages ago; but maybe partial_cmp is particularly bad for this? (I think it's somewhat unlikely that partial_cmp would be special, though.)

pnkfelix · 2014-07-03T21:48:24Z

@huonw yes on the way home I was also thinking that special case handling of C-like enums in the various deriving implementations could be warranted. Its not as nice as figuring out the source of this blowup (but who ever said we can't do both. :) )

huonw · 2014-07-03T21:53:32Z

Have we actually proved that this is a new thing? Maybe @cmr could test some older compilers?

pnkfelix · 2014-07-03T22:04:33Z

@huonw even if its not new (i.e. not an injection), ... I guess I was just assuming that there would still be some way to implement a generic deriving(PartialOrd) that did not have a code explosion.

sfackler · 2014-07-03T22:08:04Z

PartialOrd currently generates impls for all of partial_cmp, lt, le, gt and ge and inlines them all. We could add some logic to detect if the implementee is "too big" and instead generate a non-inlined implementation of only partial_cmp and allow the remaining methods to fall back to their default implementations in terms of partial_cmp. At that size, the function call overhead isn't really something that would be noticeable in comparison with the rest of the method runtime anyway.

sfackler · 2014-07-03T22:12:16Z

We could (should) also rewrite the implementations to avoid expanding into a huge nested stack of matches to something like this:

match self.a.partial_cmp(&other.a) {
    Some(Equal) => {}
    r => return r
}

match self.b.partial_cmp(&other.b) {
    Some(Equal) => {}
    r => return r,
}

...

Even if it's not smaller, it'd at least be more readable.

luqmana · 2014-07-03T22:24:06Z

Is there any reason to not just generate only partial_cmp since the rest have default methods implemented in terms of that?

pnkfelix · 2014-07-03T23:05:05Z

@huonw As you are probably already aware: the current implementation for every method is inherently quadratic growth (O(n^2)). (This is the case for all of the implementation including partial_cmp, so defining the other methods in terms of partial_cmp will only postpone the problem, since the quadratic blowup will still arise with partial_cmp alone.)

The idea of just comparing numbers for the C-like enum case is a good one, and I believe it generalizes: expand into a match that handles all of the cases where both sides are the same variant (and recur for each case); then in a fall back, compare the discriminants directly (which were presumably extracted via unsafe code).

So for this enum

pub enum Key<T> { A(T), B(T), C(T), D(T), ... }

instead of this:

match *self {
  // each of these is ...
  A(ref __self_0) => match *__arg_0 { A(ref__arg_1_0) => /* recur */, B(_) => Less, C(_) => Less, D(_) => Less, ... },
  // ... N elements long
  B(ref __self_0) => match *__arg_0 { A(_) => Greater, B(ref__arg_1_0) => /* recur */, C(_) => Less, D(_) => Less, ... },
  // .. and there are N variants, yielding O(N^2) code.
  ...
}

Expand into something like this:

let discrim_self = ...;
let discrim_arg_0 = ...;
match (*self, *__arg_0) {
  (A(ref __self_0), A(ref __arg_1_0)) => /* recur */,
  (B(ref __self_0), B(ref __arg_1_0)) => /* recur */,
  (C(ref __self_0), C(ref __arg_1_0)) => /* recur */,
  // N variants, but each has code that is bounded in complexity by the particular variant
  // thus we should be O(N) overall.
  ...
  (_, _) => discrim_self.cmp(discrim_arg_0)
}

huonw · 2014-07-03T23:10:50Z

@pnkfelix That's a good idea!

Is there any reason to not just generate only partial_cmp since the rest have default methods implemented in terms of that?

Previously there was only lt, and the fact it was a partial ordering required that every method be overridden to defer to the underlying types' partial ordering (that is, just implementing lt would've been incorrect). I assume this is no longer true, but we would be relying on partial_cmp and the other methods matching exactly.

compare the discriminants directly (which were presumably extracted via unsafe code).

Due to the undefinedness of representation, I would think we need an intrinsic for this to avoid the unsafe being broken by the compiler switching around the enum. (Although, what's the discriminant for something like Option<&T>? It doesn't matter for this case, but it seems it would matter for an intrinsic.)

pnkfelix · 2014-07-03T23:12:40Z

@huonw you are right, a compiler intrinstic is the right way to query for a uint value of the discriminant.

huonw · 2014-07-03T23:13:34Z

Oh, also, I think that the undefinedness of representation may cause the ordering to change, e.g. currently None < Some(_) always, but if #14540 was implemented, along with discriminant comparison, then I would think the following situation is likely

enum Foo { A, B } // 0, 1
Some(A) // a u8 with value 0
None::<Foo> // a u8 with value 2

i.e. None > Some(_) at the discriminant level, disagreeing with the default.

pnkfelix · 2014-07-04T00:12:24Z

@huonw Good point.

So, okay, if extracting the discriminant via an intrinsic and mapping that to an appropriate value becomes unworkable (or not cost-effective), then: Worst case scenario, we don't bother attempting to base the comparison on the internal discriminant. Instead we compute the appropriate number within the partial_cmp method itself, like so:

let discrim_self = match *self { A(..) => 0u, B(..) => 1u, C(..) => 2u, ... }; // N variants
let discrim_arg_0 = match *__arg_0 { A(..) => 0u, B(..) => 1u, C(..) => 2u, ... }; // N variants
match (*self, *__arg_0) {
  (A(ref __self_0), A(ref __arg_1_0)) => /* recur */,
  (B(ref __self_0), B(ref __arg_1_0)) => /* recur */,
  (C(ref __self_0), C(ref __arg_1_0)) => /* recur */,
  // as above, N variants, each bounded by complexity of the tuple of its variant.
  ...
  (_, _) => discrim_self.cmp(discrim_arg_0)
}

I mean, its not great having code size here be proportional to 3*N, but its still O(N), versus what we are dealing with today.

pnkfelix · 2014-07-06T17:14:19Z

I wonder if the reason we had not noticed this problem until now is that we rarely use deriving([Partial]Ord) on enums in our own libraries, and never on enums with more than 6 variants.

Here is an informal survey:

-*- mode: ack; default-directory: "~/Dev/Mozilla/rust.git/src/" -*-
Ack started at Sun Jul  6 19:14:56

ack -A 3 'deriving.*Ord' | grep 'enum '
libcore/option.rs-152-pub enum Option<T> {
libcore/result.rs-288-pub enum Result<T, E> {
libglob/lib.rs-204-enum PatternToken {
libglob/lib.rs-213-enum CharSpecifier {
libnum/bigint.rs-787-pub enum Sign { Minus, Zero, Plus }
librustc/back/link.rs-49-pub enum OutputType {
librustc/driver/config.rs-138-pub enum CrateType {
librustc/lint/mod.rs-208-pub enum Level {
librustc/middle/subst.rs-228-pub enum ParamSpace {
librustc/middle/ty.rs-653-pub enum BoundRegion {
libserialize/json.rs-156-pub enum Json {
libsyntax/attr.rs-372-pub enum StabilityLevel {
test/compile-fail/deriving-span-PartialOrd-enum-struct-variant.rs-20-enum Enum {
test/compile-fail/deriving-span-PartialOrd-enum.rs-20-enum Enum {
test/compile-fail/deriving-span-TotalOrd-enum-struct-variant.rs-20-enum Enum {
test/compile-fail/deriving-span-TotalOrd-enum.rs-20-enum Enum {
test/run-pass/deriving-cmp-generic-enum.rs-14-enum E<T> {
test/run-pass/deriving-cmp-generic-struct-enum.rs-16-enum ES<T> {

Ack finished at Sun Jul  6 19:15:11

(I guess a better survey would use librustc itself to catch macros that expand into enums...)

pnkfelix · 2014-07-06T21:19:45Z

Over the weekend I whipped up a small commit series almost ready that implements the idea I outlined above. I will hopefully have a PR for it put together sometime tomorrow.

…r=huonw Instead of generating a separate case (albeit trivial) for each of the N*N cases when comparing two instances of an enum with N variants, this `deriving` uses the strategy outlined here: #15375 (comment) In particular, it generates code that looks more like this: ```rust match (this, that, ...) { (Variant1, Variant1, Variant1) => ... // delegate Matching on Variant1 (Variant2, Variant2, Variant2) => ... // delegate Matching on Variant2 ... _ => { let index_tup = { let idx_this = match this { Variant1 => 0u, Variant2 => 1u, ... }; let idx_that = match that { Variant1 => 0u, Variant2 => 1u, ... }; ... (idx_this, idx_that, ...) }; ... // delegate to catch-all; it can inspect `index_tup` for its logic } } ``` While adding a new variant to the `const_nonmatching` flag (and renaming it to `on_nonmatching`) to allow expressing the above (while still allowing one to opt back into the old `O(N^2)` and in general `O(N^K)` (where `K` is the number of self arguments) code generation behavior), I had two realizations: 1. Observation: Nothing except for the comparison derivings (`PartialOrd`, `Ord`, `PartialEq`, `Eq`) were even using the old `O(N^K)` code generator. So after this hypothetically lands, *nothing* needs to use them, and thus that code generation strategy could be removed, under the assumption that it is very unlikely that any `deriving` mode will actually need that level of generality. 2. Observation: The new code generator I am adding can actually be unified with all of the other code generators that just dispatch on the variant tag (they all assume that there is only one self argument). These two observations mean that one can get rid of the `const_nonmatching` (aka `on_nonmatching`) entirely. So I did that too in this PR. The question is: Do we actually want to follow through on both of the above observations? I'm pretty sure the second observation is a pure win. But there *might* be someone out there with an example that invalidates the reasoning in the first observation. That is, there might be a client out there with an example of hypothetical deriving mode that wants to opt into the `O(N^K)` behavior. So, if that is true, then I can revise this PR to resurrect the `on_nonmatching` flag and provide a way to access the `O(N^K)` behavior. The manner in which I choose to squash these commits during a post-review rebase depends on the answer to the above question. Fix #15375.

Simplify

alexcrichton added the A-codegen label Jul 3, 2014

alexcrichton mentioned this issue Jul 3, 2014

Rustc Fails to Compile Piston with Illegal Instruction Error #15346

Closed

alexcrichton changed the title ~~Absurd amounts of code with large enums~~ #[deriving(PartialOrd)] generates absurd amounts of code with large enums Jul 3, 2014

gmorenz mentioned this issue Jul 3, 2014

Piston Freezes During Compilation with Cargo Build PistonDevelopers/piston#467

Closed

TyOverby mentioned this issue Jul 3, 2014

Implement Ord, PartialOrd, Eq, PartialEq by hand PistonDevelopers/piston#470

Merged

pnkfelix changed the title ~~#[deriving(PartialOrd)] generates absurd amounts of code with large enums~~ #[deriving(PartialOrd)] is O(N^2) code size for N enum variants Jul 3, 2014

pnkfelix mentioned this issue Jul 7, 2014

New deriving that generates O(n) code for PartialOrd, Ord, etc. #15503

Merged

bors closed this as completed in #15503 Jul 11, 2014

pnkfelix mentioned this issue Jul 11, 2014

deriving codegen: add intrinsic to map enum instance to its variant index #15620

Closed

huonw mentioned this issue Jan 10, 2015

#[deriving(PartialEq, Show)] generates poor implementations for large enums #20856

Closed

pnkfelix mentioned this issue Apr 30, 2015

Bad performing code from #[deriving(Eq)] for enums #13623

Closed

bors added a commit to rust-lang-ci/rust that referenced this issue Aug 7, 2023

Auto merge of rust-lang#15375 - Veykril:hygiene, r=Veykril

5945ef9

Simplify

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

#[deriving(PartialOrd)] is O(N^2) code size for N enum variants #15375

#[deriving(PartialOrd)] is O(N^2) code size for N enum variants #15375

alexcrichton commented Jul 3, 2014

pnkfelix commented Jul 3, 2014

alexcrichton commented Jul 3, 2014

alexcrichton commented Jul 3, 2014

alexcrichton commented Jul 3, 2014

sfackler commented Jul 3, 2014

luqmana commented Jul 3, 2014

huonw commented Jul 3, 2014

sfackler commented Jul 3, 2014

huonw commented Jul 3, 2014

pnkfelix commented Jul 3, 2014

huonw commented Jul 3, 2014

pnkfelix commented Jul 3, 2014

sfackler commented Jul 3, 2014

sfackler commented Jul 3, 2014

luqmana commented Jul 3, 2014

pnkfelix commented Jul 3, 2014

huonw commented Jul 3, 2014

pnkfelix commented Jul 3, 2014

huonw commented Jul 3, 2014

pnkfelix commented Jul 4, 2014

pnkfelix commented Jul 6, 2014

pnkfelix commented Jul 6, 2014

#[deriving(PartialOrd)] is O(N^2) code size for N enum variants #15375

#[deriving(PartialOrd)] is O(N^2) code size for N enum variants #15375

Comments

alexcrichton commented Jul 3, 2014

pnkfelix commented Jul 3, 2014

alexcrichton commented Jul 3, 2014

alexcrichton commented Jul 3, 2014

alexcrichton commented Jul 3, 2014

sfackler commented Jul 3, 2014

luqmana commented Jul 3, 2014

huonw commented Jul 3, 2014

sfackler commented Jul 3, 2014

huonw commented Jul 3, 2014

pnkfelix commented Jul 3, 2014

huonw commented Jul 3, 2014

pnkfelix commented Jul 3, 2014

sfackler commented Jul 3, 2014

sfackler commented Jul 3, 2014

luqmana commented Jul 3, 2014

pnkfelix commented Jul 3, 2014

huonw commented Jul 3, 2014

pnkfelix commented Jul 3, 2014

huonw commented Jul 3, 2014

pnkfelix commented Jul 4, 2014

pnkfelix commented Jul 6, 2014

pnkfelix commented Jul 6, 2014