diff --git a/pkg/sql/opt/memo/testdata/memo b/pkg/sql/opt/memo/testdata/memo index 859cd5b1e4f3..8025e94a9f05 100644 --- a/pkg/sql/opt/memo/testdata/memo +++ b/pkg/sql/opt/memo/testdata/memo @@ -148,7 +148,7 @@ memo (optimized, ~18KB, required=[presentation: y:2,x:3,c:6] [ordering: +2]) ├── G5: (const 10) ├── G6: (plus G11 G12) ├── G7: (project G13 G14 y) - │ ├── [ordering: +2] [limit hint: 10.00] + │ ├── [ordering: +2] [limit hint: 100.00] │ │ ├── best: (sort G7) │ │ └── cost: 1119.26 │ ├── [ordering: +5] @@ -169,7 +169,7 @@ memo (optimized, ~18KB, required=[presentation: y:2,x:3,c:6] [ordering: +2]) ├── G11: (variable y) ├── G12: (const 1) ├── G13: (select G16 G17) - │ ├── [ordering: +2] [limit hint: 10.00] + │ ├── [ordering: +2] [limit hint: 100.00] │ │ ├── best: (sort G13) │ │ └── cost: 1112.58 │ └── [] @@ -178,7 +178,7 @@ memo (optimized, ~18KB, required=[presentation: y:2,x:3,c:6] [ordering: +2]) ├── G14: (projections G18) ├── G15: (eq G19 G20) ├── G16: (scan a) - │ ├── [ordering: +2] [limit hint: 30.00] + │ ├── [ordering: +2] [limit hint: 300.00] │ │ ├── best: (sort G16) │ │ └── cost: 1259.35 │ └── [] diff --git a/pkg/sql/opt/xform/coster.go b/pkg/sql/opt/xform/coster.go index 80d70e8c0077..b22fc8a6c686 100644 --- a/pkg/sql/opt/xform/coster.go +++ b/pkg/sql/opt/xform/coster.go @@ -415,12 +415,8 @@ func (c *coster) computeLookupJoinCost( // expensive lookup join might have a lower cost if its limit hint estimates // that most rows will not be needed. if required.LimitHint != 0 { - // Estimate the number of lookups needed to output LimitHint rows. - expectedLookupCount := required.LimitHint * lookupCount / join.Relational().Stats.RowCount - - // Round up to the nearest multiple of a batch. - expectedLookupCount = math.Ceil(expectedLookupCount/joinReaderBatchSize) * joinReaderBatchSize - lookupCount = math.Min(lookupCount, expectedLookupCount) + outputRows := join.Relational().Stats.RowCount + lookupCount = lookupJoinInputLimitHint(lookupCount, outputRows, required.LimitHint) } // The rows in the (left) input are used to probe into the (right) table. @@ -792,3 +788,14 @@ func localityMatchScore(zone cat.Zone, locality roachpb.Locality) float64 { // Weight the constraintScore twice as much as the lease score. return (constraintScore*2 + leaseScore) / 3 } + +// lookupJoinInputLimitHint calculates an appropriate limit hint for the input +// to a lookup join. +func lookupJoinInputLimitHint(inputRowCount, outputRowCount, outputLimitHint float64) float64 { + // Estimate the number of lookups needed to output LimitHint rows. + expectedLookupCount := outputLimitHint * inputRowCount / outputRowCount + + // Round up to the nearest multiple of a batch. + expectedLookupCount = math.Ceil(expectedLookupCount/joinReaderBatchSize) * joinReaderBatchSize + return math.Min(inputRowCount, expectedLookupCount) +} diff --git a/pkg/sql/opt/xform/physical_props.go b/pkg/sql/opt/xform/physical_props.go index 6de14e7faa2c..7a587fecff3b 100644 --- a/pkg/sql/opt/xform/physical_props.go +++ b/pkg/sql/opt/xform/physical_props.go @@ -123,10 +123,15 @@ func BuildChildPhysicalProps( } if input, ok := parent.Child(nth).(memo.RelExpr); ok { inputRows := input.Relational().Stats.RowCount - // outputRows / inputRows is roughly the number of output rows produced - // for each input row. Reduce the number of required input rows so that - // the expected number of output rows is equal to the parent limit hint. - childProps.LimitHint = parentProps.LimitHint * inputRows / outputRows + switch parent.Op() { + case opt.SelectOp: + // outputRows / inputRows is roughly the number of output rows produced + // for each input row. Reduce the number of required input rows so that + // the expected number of output rows is equal to the parent limit hint. + childProps.LimitHint = parentProps.LimitHint * inputRows / outputRows + case opt.LookupJoinOp: + childProps.LimitHint = lookupJoinInputLimitHint(inputRows, outputRows, parentProps.LimitHint) + } } case opt.OrdinalityOp, opt.ProjectOp, opt.ProjectSetOp: diff --git a/pkg/sql/opt/xform/testdata/coster/join b/pkg/sql/opt/xform/testdata/coster/join index 49ce6607beee..98b31943627b 100644 --- a/pkg/sql/opt/xform/testdata/coster/join +++ b/pkg/sql/opt/xform/testdata/coster/join @@ -112,6 +112,118 @@ inner-join (hash) └── filters └── k:1 = x:5 [outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] +exec-ddl +ALTER TABLE a INJECT STATISTICS '[ + { + "columns": ["k"], + "created_at": "2019-02-08 04:10:40.001179+00:00", + "row_count": 100000, + "distinct_count": 100000 + } +]' +---- + +exec-ddl +ALTER TABLE b INJECT STATISTICS '[ + { + "columns": ["x"], + "created_at": "2019-02-08 04:10:40.001179+00:00", + "row_count": 10000, + "distinct_count": 1000 + } +]' +---- + +# Lookup join with no limit hint. +opt +SELECT * FROM a JOIN b ON k=z WHERE x > 0 AND x <= 5000 +---- +inner-join (lookup a) + ├── columns: k:1!null i:2 s:3 d:4!null x:5!null z:6!null + ├── key columns: [6] = [1] + ├── lookup columns are key + ├── stats: [rows=10000, distinct(1)=1000, null(1)=0, distinct(6)=1000, null(6)=0] + ├── cost: 71400.04 + ├── fd: (1)-->(2-4), (1)==(6), (6)==(1) + ├── select + │ ├── columns: x:5!null z:6!null + │ ├── stats: [rows=10000, distinct(5)=1000, null(5)=0, distinct(6)=1000, null(6)=0] + │ ├── cost: 10600.03 + │ ├── scan b + │ │ ├── columns: x:5 z:6!null + │ │ ├── stats: [rows=10000, distinct(5)=1000, null(5)=0, distinct(6)=1000, null(6)=0] + │ │ └── cost: 10500.02 + │ └── filters + │ └── (x:5 > 0) AND (x:5 <= 5000) [outer=(5), constraints=(/5: [/1 - /5000]; tight)] + └── filters (true) + +# With the limit hint, the cost of the lookup join is reduced. +opt +SELECT * FROM a JOIN b ON k=z WHERE x > 0 AND x <= 5000 LIMIT 6000 +---- +limit + ├── columns: k:1!null i:2 s:3 d:4!null x:5!null z:6!null + ├── cardinality: [0 - 6000] + ├── stats: [rows=6000] + ├── cost: 55460.05 + ├── fd: (1)-->(2-4), (1)==(6), (6)==(1) + ├── inner-join (lookup a) + │ ├── columns: k:1!null i:2 s:3 d:4!null x:5!null z:6!null + │ ├── key columns: [6] = [1] + │ ├── lookup columns are key + │ ├── stats: [rows=10000, distinct(1)=1000, null(1)=0, distinct(6)=1000, null(6)=0] + │ ├── cost: 55400.04 + │ ├── fd: (1)-->(2-4), (1)==(6), (6)==(1) + │ ├── limit hint: 6000.00 + │ ├── select + │ │ ├── columns: x:5!null z:6!null + │ │ ├── stats: [rows=10000, distinct(5)=1000, null(5)=0, distinct(6)=1000, null(6)=0] + │ │ ├── cost: 10600.03 + │ │ ├── limit hint: 6000.00 + │ │ ├── scan b + │ │ │ ├── columns: x:5 z:6!null + │ │ │ ├── stats: [rows=10000, distinct(5)=1000, null(5)=0, distinct(6)=1000, null(6)=0] + │ │ │ ├── cost: 10500.02 + │ │ │ └── limit hint: 6000.00 + │ │ └── filters + │ │ └── (x:5 > 0) AND (x:5 <= 5000) [outer=(5), constraints=(/5: [/1 - /5000]; tight)] + │ └── filters (true) + └── 6000 + +# The limit hint for the lookup join input will be rounded up to the nearest +# multiple of the batch size, so the cost of the lookup join here is the same +# as the test case above. +opt +SELECT * FROM a JOIN b ON k=z WHERE x > 0 AND x <= 5000 LIMIT 5950 +---- +limit + ├── columns: k:1!null i:2 s:3 d:4!null x:5!null z:6!null + ├── cardinality: [0 - 5950] + ├── stats: [rows=5950] + ├── cost: 55459.55 + ├── fd: (1)-->(2-4), (1)==(6), (6)==(1) + ├── inner-join (lookup a) + │ ├── columns: k:1!null i:2 s:3 d:4!null x:5!null z:6!null + │ ├── key columns: [6] = [1] + │ ├── lookup columns are key + │ ├── stats: [rows=10000, distinct(1)=1000, null(1)=0, distinct(6)=1000, null(6)=0] + │ ├── cost: 55400.04 + │ ├── fd: (1)-->(2-4), (1)==(6), (6)==(1) + │ ├── limit hint: 5950.00 + │ ├── select + │ │ ├── columns: x:5!null z:6!null + │ │ ├── stats: [rows=10000, distinct(5)=1000, null(5)=0, distinct(6)=1000, null(6)=0] + │ │ ├── cost: 10600.03 + │ │ ├── limit hint: 6000.00 + │ │ ├── scan b + │ │ │ ├── columns: x:5 z:6!null + │ │ │ ├── stats: [rows=10000, distinct(5)=1000, null(5)=0, distinct(6)=1000, null(6)=0] + │ │ │ ├── cost: 10500.02 + │ │ │ └── limit hint: 6000.00 + │ │ └── filters + │ │ └── (x:5 > 0) AND (x:5 <= 5000) [outer=(5), constraints=(/5: [/1 - /5000]; tight)] + │ └── filters (true) + └── 5950 exec-ddl CREATE TABLE abc (a INT PRIMARY KEY, b INT, c INT, INDEX c_idx (c)) @@ -365,3 +477,133 @@ project └── filters ├── a:12 = 'foo' [outer=(12), constraints=(/12: [/'foo' - /'foo']; tight), fd=()-->(12)] └── b:13 = '2ab23800-06b1-4e19-a3bb-df3768b808d2' [outer=(13), constraints=(/13: [/'2ab23800-06b1-4e19-a3bb-df3768b808d2' - /'2ab23800-06b1-4e19-a3bb-df3768b808d2']; tight), fd=()-->(13)] + +exec-ddl +DROP TABLE abcde +---- + +exec-ddl +DROP TABLE wxyz +---- + +exec-ddl +CREATE TABLE abcde ( + a TEXT NOT NULL, + b UUID NOT NULL, + c UUID NOT NULL, + d VARCHAR(255) NOT NULL, + e TEXT NOT NULL, + CONSTRAINT "primary" PRIMARY KEY (a, b, c), + UNIQUE INDEX idx_abd (a, b, d), + UNIQUE INDEX idx_abcd (a, b, c, d) +) +---- + +exec-ddl +ALTER TABLE abcde INJECT STATISTICS '[ + { + "columns": ["a"], + "created_at": "2019-02-08 04:10:40.001179+00:00", + "row_count": 250000, + "distinct_count": 1 + }, + { + "columns": ["b"], + "created_at": "2019-02-08 04:10:40.119954+00:00", + "row_count": 250000, + "distinct_count": 2 + }, + { + "columns": ["d"], + "created_at": "2019-02-08 04:10:40.119954+00:00", + "row_count": 250000, + "distinct_count": 125000 + } +]' +---- + +exec-ddl +CREATE TABLE wxyz ( + w TEXT NOT NULL, + x UUID NOT NULL, + y UUID NOT NULL, + z TEXT NOT NULL, + CONSTRAINT "primary" PRIMARY KEY (w, x, y), + CONSTRAINT "foreign" FOREIGN KEY (w, x, y) REFERENCES abcde (a, b, c) +) +---- + +exec-ddl +ALTER TABLE wxyz INJECT STATISTICS '[ + { + "columns": ["w"], + "created_at": "2019-02-08 04:10:40.001179+00:00", + "row_count": 10000, + "distinct_count": 1 + }, + { + "columns": ["x"], + "created_at": "2019-02-08 04:10:40.119954+00:00", + "row_count": 10000, + "distinct_count": 1 + }, + { + "columns": ["y"], + "created_at": "2019-02-08 04:10:40.119954+00:00", + "row_count": 10000, + "distinct_count": 2500 + } +]' +---- + +# Regression test for #34811. Ensure the soft limit propagation causes us to +# select a lookup join. +opt +SELECT w, x, y, z +FROM wxyz +INNER JOIN abcde +ON w = a AND x = b AND y = c +WHERE w = 'foo' AND x = '2AB23800-06B1-4E19-A3BB-DF3768B808D2' +ORDER BY d +LIMIT 10 +---- +project + ├── columns: w:1!null x:2!null y:3!null z:4!null [hidden: d:8!null] + ├── cardinality: [0 - 10] + ├── stats: [rows=10] + ├── cost: 104717.922 + ├── key: (8) + ├── fd: ()-->(1,2), (3)-->(4,8), (8)-->(3,4) + ├── ordering: +8 opt(1,2) [actual: +8] + └── limit + ├── columns: w:1!null x:2!null y:3!null z:4!null a:5!null b:6!null c:7!null d:8!null + ├── internal-ordering: +8 opt(1,2,5,6) + ├── cardinality: [0 - 10] + ├── stats: [rows=10] + ├── cost: 104717.812 + ├── key: (7) + ├── fd: ()-->(1,2,5,6), (3)-->(4), (7)-->(8), (8)-->(7), (1)==(5), (5)==(1), (2)==(6), (6)==(2), (3)==(7), (7)==(3) + ├── ordering: +8 opt(1,2,5,6) [actual: +8] + ├── inner-join (lookup wxyz) + │ ├── columns: w:1!null x:2!null y:3!null z:4!null a:5!null b:6!null c:7!null d:8!null + │ ├── key columns: [5 6 7] = [1 2 3] + │ ├── lookup columns are key + │ ├── stats: [rows=50048.8759, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=2500, null(3)=0, distinct(5)=1, null(5)=0, distinct(6)=1, null(6)=0, distinct(7)=2500, null(7)=0] + │ ├── cost: 104717.702 + │ ├── key: (7) + │ ├── fd: ()-->(1,2,5,6), (3)-->(4), (7)-->(8), (8)-->(7), (1)==(5), (5)==(1), (2)==(6), (6)==(2), (3)==(7), (7)==(3) + │ ├── ordering: +8 opt(1,2,5,6) [actual: +8] + │ ├── limit hint: 10.00 + │ ├── scan abcde@idx_abd + │ │ ├── columns: a:5!null b:6!null c:7!null d:8!null + │ │ ├── constraint: /5/6/8: [/'foo'/'2ab23800-06b1-4e19-a3bb-df3768b808d2' - /'foo'/'2ab23800-06b1-4e19-a3bb-df3768b808d2'] + │ │ ├── stats: [rows=125000, distinct(5)=1, null(5)=0, distinct(6)=1, null(6)=0, distinct(7)=24975.5859, null(7)=0, distinct(8)=93750, null(8)=0] + │ │ ├── cost: 216.01 + │ │ ├── key: (7) + │ │ ├── fd: ()-->(5,6), (7)-->(8), (8)-->(7) + │ │ ├── ordering: +8 opt(5,6) [actual: +8] + │ │ └── limit hint: 100.00 + │ └── filters + │ ├── w:1 = 'foo' [outer=(1), constraints=(/1: [/'foo' - /'foo']; tight), fd=()-->(1)] + │ └── x:2 = '2ab23800-06b1-4e19-a3bb-df3768b808d2' [outer=(2), constraints=(/2: [/'2ab23800-06b1-4e19-a3bb-df3768b808d2' - /'2ab23800-06b1-4e19-a3bb-df3768b808d2']; tight), fd=()-->(2)] + └── 10 diff --git a/pkg/sql/opt/xform/testdata/coster/scan b/pkg/sql/opt/xform/testdata/coster/scan index d43989910df4..7a82ac57da68 100644 --- a/pkg/sql/opt/xform/testdata/coster/scan +++ b/pkg/sql/opt/xform/testdata/coster/scan @@ -12,6 +12,87 @@ scan a ├── key: (1) └── fd: (1)-->(3) +exec-ddl +ALTER TABLE a INJECT STATISTICS '[ + { + "columns": ["k"], + "created_at": "2019-02-08 04:10:40.001179+00:00", + "row_count": 100000, + "distinct_count": 100000 + }, + { + "columns": ["i"], + "created_at": "2019-02-08 04:10:40.001179+00:00", + "row_count": 100000, + "distinct_count": 100 + } +]' +---- + +# The limit hint is propagated to the scan and the cost is slightly more than +# the limit hint * 2 (the scan soft limit multiplier). +opt +SELECT * FROM a WHERE k > 5 AND i IN (1, 3, 5, 7, 9) LIMIT 20 +---- +limit + ├── columns: k:1!null i:2!null s:3 d:4!null + ├── cardinality: [0 - 20] + ├── stats: [rows=20] + ├── cost: 1197.56333 + ├── key: (1) + ├── fd: (1)-->(2-4) + ├── select + │ ├── columns: k:1!null i:2!null s:3 d:4!null + │ ├── stats: [rows=1666.66667, distinct(1)=1666.66667, null(1)=0, distinct(2)=5, null(2)=0] + │ ├── cost: 1197.35333 + │ ├── key: (1) + │ ├── fd: (1)-->(2-4) + │ ├── limit hint: 20.00 + │ ├── scan a + │ │ ├── columns: k:1!null i:2 s:3 d:4!null + │ │ ├── constraint: /1: [/6 - ] + │ │ ├── stats: [rows=33333.3333, distinct(1)=33333.3333, null(1)=0] + │ │ ├── cost: 864.01 + │ │ ├── key: (1) + │ │ ├── fd: (1)-->(2-4) + │ │ └── limit hint: 400.00 + │ └── filters + │ └── i:2 IN (1, 3, 5, 7, 9) [outer=(2), constraints=(/2: [/1 - /1] [/3 - /3] [/5 - /5] [/7 - /7] [/9 - /9]; tight)] + └── 20 + +# The limit hint is propagated, but the cost is not multiplied by 2 (the scan +# soft limit multiplier) since the row count is known to be less than 400 * 2. +opt +SELECT * FROM a WHERE k > 0 AND k <= 450 AND i IN (1, 3, 5, 7, 9) LIMIT 20 +---- +limit + ├── columns: k:1!null i:2!null s:3 d:4!null + ├── cardinality: [0 - 20] + ├── stats: [rows=20] + ├── cost: 490.73 + ├── key: (1) + ├── fd: (1)-->(2-4) + ├── select + │ ├── columns: k:1!null i:2!null s:3 d:4!null + │ ├── cardinality: [0 - 450] + │ ├── stats: [rows=22.5, distinct(1)=22.5, null(1)=0, distinct(2)=5, null(2)=0] + │ ├── cost: 490.52 + │ ├── key: (1) + │ ├── fd: (1)-->(2-4) + │ ├── limit hint: 20.00 + │ ├── scan a + │ │ ├── columns: k:1!null i:2 s:3 d:4!null + │ │ ├── constraint: /1: [/1 - /450] + │ │ ├── cardinality: [0 - 450] + │ │ ├── stats: [rows=450, distinct(1)=450, null(1)=0] + │ │ ├── cost: 486.01 + │ │ ├── key: (1) + │ │ ├── fd: (1)-->(2-4) + │ │ └── limit hint: 400.00 + │ └── filters + │ └── i:2 IN (1, 3, 5, 7, 9) [outer=(2), constraints=(/2: [/1 - /1] [/3 - /3] [/5 - /5] [/7 - /7] [/9 - /9]; tight)] + └── 20 + # Regression test for #35042. Ensure we always prefer constrained scans. exec-ddl CREATE TABLE speed_test (id INT PRIMARY KEY DEFAULT unique_rowid()) diff --git a/pkg/sql/opt/xform/testdata/physprops/limit_hint b/pkg/sql/opt/xform/testdata/physprops/limit_hint index 3d309ef74496..671c3bac2f90 100644 --- a/pkg/sql/opt/xform/testdata/physprops/limit_hint +++ b/pkg/sql/opt/xform/testdata/physprops/limit_hint @@ -402,6 +402,95 @@ limit │ └── generate_series(1, x:1) [outer=(1), side-effects] └── 10 +# -------------------------------------------------- +# Lookup join. +# -------------------------------------------------- + +exec-ddl +CREATE TABLE a (k INT PRIMARY KEY, i INT, s STRING, d DECIMAL NOT NULL) +---- + +exec-ddl +CREATE TABLE b (x INT, z INT NOT NULL) +---- + +exec-ddl +ALTER TABLE a INJECT STATISTICS '[ + { + "columns": ["k"], + "created_at": "2019-02-08 04:10:40.001179+00:00", + "row_count": 100000, + "distinct_count": 100000 + } +]' +---- + +exec-ddl +ALTER TABLE b INJECT STATISTICS '[ + { + "columns": ["x"], + "created_at": "2019-02-08 04:10:40.001179+00:00", + "row_count": 10000, + "distinct_count": 1000 + } +]' +---- + +# Ensure the limit hint is propagated to the lookup join input as a multiple +# of the batch size. +opt +SELECT * FROM a JOIN b ON k=z WHERE x > 0 AND x <= 5000 LIMIT 6003 +---- +limit + ├── columns: k:1!null i:2 s:3 d:4!null x:5!null z:6!null + ├── cardinality: [0 - 6003] + ├── fd: (1)-->(2-4), (1)==(6), (6)==(1) + ├── inner-join (lookup a) + │ ├── columns: k:1!null i:2 s:3 d:4!null x:5!null z:6!null + │ ├── key columns: [6] = [1] + │ ├── lookup columns are key + │ ├── fd: (1)-->(2-4), (1)==(6), (6)==(1) + │ ├── limit hint: 6003.00 + │ ├── select + │ │ ├── columns: x:5!null z:6!null + │ │ ├── limit hint: 6100.00 + │ │ ├── scan b + │ │ │ ├── columns: x:5 z:6!null + │ │ │ └── limit hint: 6100.00 + │ │ └── filters + │ │ └── (x:5 > 0) AND (x:5 <= 5000) [outer=(5)] + │ └── filters (true) + └── 6003 + +# The limit hint for the lookup join input must be at least the batch size. +opt +SELECT * FROM a JOIN b ON k=z WHERE x > 0 AND x <= 5000 LIMIT 3 +---- +limit + ├── columns: k:1!null i:2 s:3 d:4!null x:5!null z:6!null + ├── cardinality: [0 - 3] + ├── fd: (1)-->(2-4), (1)==(6), (6)==(1) + ├── inner-join (lookup a) + │ ├── columns: k:1!null i:2 s:3 d:4!null x:5!null z:6!null + │ ├── key columns: [6] = [1] + │ ├── lookup columns are key + │ ├── fd: (1)-->(2-4), (1)==(6), (6)==(1) + │ ├── limit hint: 3.00 + │ ├── select + │ │ ├── columns: x:5!null z:6!null + │ │ ├── limit hint: 100.00 + │ │ ├── scan b + │ │ │ ├── columns: x:5 z:6!null + │ │ │ └── limit hint: 100.00 + │ │ └── filters + │ │ └── (x:5 > 0) AND (x:5 <= 5000) [outer=(5)] + │ └── filters (true) + └── 3 + +# -------------------------------------------------- +# Negative limits. +# -------------------------------------------------- + # Regression test for #44683. exec-ddl CREATE TABLE t44683(c0 INT)