From f3144aa8cb714c2159519be4a7a25e14c8f7082e Mon Sep 17 00:00:00 2001 From: Jordan Lewis Date: Sun, 1 Mar 2020 17:57:47 -0500 Subject: [PATCH] sql: add inverted indexes on arrays This commit adds inverted index support to arrays. Inverted index entries are created from arrays by simply encoding a key that contains the array element's table key encoding. Nulls are not indexed, since in SQL, ARRAY[1, NULL] @> ARRAY[NULL] returns false. For example, in a table t(int, int[]) with an inverted index with id 3 on the int[] column the row (10, [1, NULL, 2]) produces 2 index keys: ``` /tableId/3/1/10 /tableId/3/2/10 ``` This encoding scheme is much simpler than the one for JSON, since arrays don't have "paths": their elements are simply ordinary datums. Release note (sql change): The inverted index implementation now supports indexing array columns. This permits accelerating containment queries (@> and <@) on array columns by adding an index to them. --- docs/generated/sql/functions.md | 6 +- pkg/cli/testdata/dump/inverted_index | 18 +- pkg/sql/backfill.go | 2 +- .../testdata/logic_test/inverted_index | 106 +++++++++ pkg/sql/opt/cat/index.go | 2 +- .../exec/execbuilder/testdata/inverted_index | 157 ++++++++++++++ .../opt/idxconstraint/index_constraints.go | 201 ++++++++++++------ pkg/sql/opt/idxconstraint/testdata/inverted | 37 ++++ pkg/sql/opt/xform/testdata/rules/select | 81 +++++++ pkg/sql/sem/builtins/builtins.go | 32 ++- pkg/sql/sqlbase/index_encoding.go | 51 ++++- pkg/sql/sqlbase/structured.go | 3 +- 12 files changed, 607 insertions(+), 89 deletions(-) diff --git a/docs/generated/sql/functions.md b/docs/generated/sql/functions.md index 8d699f0df569..51c6808db2ae 100644 --- a/docs/generated/sql/functions.md +++ b/docs/generated/sql/functions.md @@ -1095,8 +1095,6 @@ SELECT * FROM crdb_internal.check_consistency(true, ‘\x02’, ‘\x04’)

crdb_internal.get_zone_config(namespace_id: int) → bytes crdb_internal.is_admin() → bool

Retrieves the current user’s admin status.

-crdb_internal.json_num_index_entries(val: jsonb) → int

This function is used only by CockroachDB’s developers for testing purposes.

-
crdb_internal.lease_holder(key: bytes) → int

This function is used to fetch the leaseholder corresponding to a request key

crdb_internal.locality_value(key: string) → string

Returns the value of the specified locality key.

@@ -1105,6 +1103,10 @@ SELECT * FROM crdb_internal.check_consistency(true, ‘\x02’, ‘\x04’)

crdb_internal.node_executable_version() → string

Returns the version of CockroachDB this node is running.

+crdb_internal.num_inverted_index_entries(val: anyelement[]) → int

This function is used only by CockroachDB’s developers for testing purposes.

+
+crdb_internal.num_inverted_index_entries(val: jsonb) → int

This function is used only by CockroachDB’s developers for testing purposes.

+
crdb_internal.pretty_key(raw_key: bytes, skip_fields: int) → string

This function is used only by CockroachDB’s developers for testing purposes.

crdb_internal.range_stats(key: bytes) → jsonb

This function is used to retrieve range statistics information as a JSON object.

diff --git a/pkg/cli/testdata/dump/inverted_index b/pkg/cli/testdata/dump/inverted_index index 19e44fbdb15e..6809c490d536 100644 --- a/pkg/cli/testdata/dump/inverted_index +++ b/pkg/cli/testdata/dump/inverted_index @@ -3,12 +3,16 @@ CREATE DATABASE d; CREATE TABLE d.t ( a JSON, b JSON, - INVERTED INDEX idx (a) + c INT[], + d INT[], + INVERTED INDEX idx (a), + INVERTED INDEX idx3 (c) ); CREATE INVERTED INDEX idx2 ON d.t (b); +CREATE INVERTED INDEX idx4 ON d.t (d); -INSERT INTO d.t VALUES ('{"a": "b"}', '{"c": "d"}'); +INSERT INTO d.t VALUES ('{"a": "b"}', '{"c": "d"}', ARRAY[1], ARRAY[2]); ---- INSERT 1 @@ -18,12 +22,16 @@ dump d t CREATE TABLE t ( a JSONB NULL, b JSONB NULL, + c INT8[] NULL, + d INT8[] NULL, INVERTED INDEX idx (a), + INVERTED INDEX idx3 (c), INVERTED INDEX idx2 (b), - FAMILY "primary" (a, b, rowid) + INVERTED INDEX idx4 (d), + FAMILY "primary" (a, b, c, d, rowid) ); -INSERT INTO t (a, b) VALUES - ('{"a": "b"}', '{"c": "d"}'); +INSERT INTO t (a, b, c, d) VALUES + ('{"a": "b"}', '{"c": "d"}', ARRAY[1], ARRAY[2]); ---- ---- diff --git a/pkg/sql/backfill.go b/pkg/sql/backfill.go index ff7057b71621..035d12facd2b 100644 --- a/pkg/sql/backfill.go +++ b/pkg/sql/backfill.go @@ -1243,7 +1243,7 @@ func (sc *SchemaChanger) validateInvertedIndexes( row, err := ie.QueryRowEx(ctx, "verify-inverted-idx-count", txn, sqlbase.InternalExecutorSessionDataOverride{}, fmt.Sprintf( - `SELECT coalesce(sum_int(crdb_internal.json_num_index_entries(%s)), 0) FROM [%d AS t]`, + `SELECT coalesce(sum_int(crdb_internal.num_inverted_index_entries(%s)), 0) FROM [%d AS t]`, col, tableDesc.ID, ), ) diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_index b/pkg/sql/logictest/testdata/logic_test/inverted_index index 7fc265c272f0..a37d314007fd 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_index +++ b/pkg/sql/logictest/testdata/logic_test/inverted_index @@ -694,3 +694,109 @@ CREATE INVERTED INDEX ON table_with_nulls (a) statement ok DROP TABLE table_with_nulls + +statement ok +DROP TABLE c + +subtest arrays + +statement ok +CREATE TABLE c ( + id INT PRIMARY KEY, + foo INT[], + bar STRING[], + INVERTED INDEX (foo), + FAMILY "primary" (id, foo, bar) +) + +statement ok +INSERT INTO c VALUES(0, NULL, NULL) + +statement ok +INSERT INTO c VALUES(1, ARRAY[], ARRAY['foo', 'bar', 'baz']) + +statement ok +CREATE INDEX ON c USING GIN (bar) + +query TT +SHOW CREATE TABLE c +---- +c CREATE TABLE c ( + id INT8 NOT NULL, + foo INT8[] NULL, + bar STRING[] NULL, + CONSTRAINT "primary" PRIMARY KEY (id ASC), + INVERTED INDEX c_foo_idx (foo), + INVERTED INDEX c_bar_idx (bar), + FAMILY "primary" (id, foo, bar) +) + +query ITT +SELECT * from c WHERE bar @> ARRAY['foo'] +---- +1 {} {foo,bar,baz} + +query ITT +SELECT * from c WHERE bar @> ARRAY['bar', 'baz'] +---- +1 {} {foo,bar,baz} + +query ITT +SELECT * from c WHERE bar @> ARRAY['bar', 'qux'] +---- + +statement ok +INSERT INTO c VALUES(2, NULL, NULL) + +statement ok +INSERT INTO c VALUES(3, ARRAY[0,1,NULL], ARRAY['a',NULL,'b',NULL]) + +statement ok +INSERT INTO c VALUES(4, ARRAY[1,2,3], ARRAY['b',NULL,'c']) + +statement ok +INSERT INTO c VALUES(5, ARRAY[], ARRAY[NULL, NULL]) + +query ITT +SELECT * FROM c WHERE foo @> ARRAY[0] +---- +3 {0,1,NULL} {a,NULL,b,NULL} + +query error unsupported comparison operator +SELECT * FROM c WHERE foo @> 0 + +query ITT +SELECT * FROM c WHERE foo @> ARRAY[1] +---- +3 {0,1,NULL} {a,NULL,b,NULL} +4 {1,2,3} {b,NULL,c} + +# This is expected, although it looks odd, because in SQL, +# ARRAY[NULL] @> ARRAY[NULL] returns false. +query ITT +SELECT * FROM c WHERE foo @> ARRAY[NULL]::INT[] +---- + +query ITT +SELECT * FROM c WHERE bar @> ARRAY['a'] +---- +3 {0,1,NULL} {a,NULL,b,NULL} + +query ITT +SELECT * FROM c WHERE bar @> ARRAY['b'] +---- +3 {0,1,NULL} {a,NULL,b,NULL} +4 {1,2,3} {b,NULL,c} + +query ITT +SELECT * FROM c WHERE bar @> ARRAY['c'] +---- +4 {1,2,3} {b,NULL,c} + +query ITT +SELECT * FROM c WHERE bar @> ARRAY[]::TEXT[] +---- +1 {} {foo,bar,baz} +3 {0,1,NULL} {a,NULL,b,NULL} +4 {1,2,3} {b,NULL,c} +5 {} {NULL,NULL} diff --git a/pkg/sql/opt/cat/index.go b/pkg/sql/opt/cat/index.go index 7f4343623c0e..693a45d08315 100644 --- a/pkg/sql/opt/cat/index.go +++ b/pkg/sql/opt/cat/index.go @@ -48,7 +48,7 @@ type Index interface { // IsUnique returns true if this index is declared as UNIQUE in the schema. IsUnique() bool - // IsInverted returns true if this is a JSON inverted index. + // IsInverted returns true if this is an inverted index. IsInverted() bool // ColumnCount returns the number of columns in the index. This includes diff --git a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index index 6ce9e2e076e5..152ea499da31 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index @@ -7,9 +7,27 @@ CREATE TABLE d ( FAMILY (a,b) ) +statement ok +CREATE TABLE e ( + a INT PRIMARY KEY, + b INT[], + FAMILY (a,b), + INVERTED INDEX(b) +) + +statement ok +CREATE TABLE f ( + a INT PRIMARY KEY, + b DECIMAL[], + FAMILY (a,b), + INVERTED INDEX(b) +) + statement ok CREATE INVERTED INDEX foo_inv ON d(b) +# Tests for json inverted indexes. + query T kvtrace INSERT INTO d VALUES(0, '{"a": "b"}') ---- @@ -46,6 +64,52 @@ INSERT INTO d VALUES(4, NULL) ---- CPut /Table/53/1/4/0 -> /TUPLE/ +# Tests for array inverted indexes. + +# Make sure that duplicate entries do not get emitted more than once. +query T kvtrace +INSERT INTO e VALUES(0, ARRAY[7,0,0,1,10,0,1,7]) +---- +CPut /Table/54/1/0/0 -> /TUPLE/ +InitPut /Table/54/2/0/0/0 -> /BYTES/ +InitPut /Table/54/2/1/0/0 -> /BYTES/ +InitPut /Table/54/2/7/0/0 -> /BYTES/ +InitPut /Table/54/2/10/0/0 -> /BYTES/ + +# Make sure that empty arrays do not emit any keys at all. +query T kvtrace +INSERT INTO e VALUES(1, ARRAY[]) +---- +CPut /Table/54/1/1/0 -> /TUPLE/ + +# Make sure that NULL arrays do not emit any keys at all. +query T kvtrace +INSERT INTO e VALUES(2, NULL) +---- +CPut /Table/54/1/2/0 -> /TUPLE/ + +# Test that array inverted indexes work okay with decimals (a type with +# composite encoding). + +query T kvtrace +INSERT INTO f VALUES(0, ARRAY[7,0,0,1,10,0,1,7]) +---- +CPut /Table/55/1/0/0 -> /TUPLE/ +InitPut /Table/55/2/0/0/0 -> /BYTES/ +InitPut /Table/55/2/1/0/0 -> /BYTES/ +InitPut /Table/55/2/7/0/0 -> /BYTES/ +InitPut /Table/55/2/1E+1/0/0 -> /BYTES/ + +query T kvtrace +INSERT INTO f VALUES(1, ARRAY[]) +---- +CPut /Table/55/1/1/0 -> /TUPLE/ + +query T kvtrace +INSERT INTO f VALUES(2, NULL) +---- +CPut /Table/55/1/2/0 -> /TUPLE/ + query TTTTT EXPLAIN (VERBOSE) SELECT * from d where b @>'{"a": "b"}' ---- @@ -368,3 +432,96 @@ scan · · (a, b) · · table d@primary · · · spans ALL · · · filter b @> '{"a": {}, "b": {}}' · · + +subtest array + +query TTTTT +EXPLAIN (VERBOSE) SELECT * from e where b @> ARRAY[1] +---- +· distributed false · · +· vectorized false · · +index-join · · (a, b) · + │ table e@primary · · + │ key columns a · · + └── scan · · (a) · +· table e@e_b_idx · · +· spans /1-/2 · · + +query TTTTT +EXPLAIN (VERBOSE) SELECT * from e where b @> ARRAY[]::INT[] +---- +· distributed false · · +· vectorized false · · +scan · · (a, b) · +· table e@primary · · +· spans ALL · · +· filter b @> ARRAY[] · · + +query TTTTT +EXPLAIN (VERBOSE) SELECT * from e where b @> ARRAY[NULL]::INT[] +---- +· distributed false · · +· vectorized false · · +index-join · · (a, b) · + │ table e@primary · · + │ key columns a · · + └── norows · · (a) · + +query TTTTT +EXPLAIN (VERBOSE) SELECT * from e where b @> NULL +---- +· distributed false · · +· vectorized false · · +norows · · (a, b) · + +query TTTTT +EXPLAIN (VERBOSE) SELECT * from e where b IS NULL +---- +· distributed false · · +· vectorized false · · +scan · · (a, b) · +· table e@primary · · +· spans ALL · · +· filter b IS NULL · · + +query TTTTT +EXPLAIN (VERBOSE) SELECT * from e where b @> ARRAY[1,2] +---- +· distributed false · · +· vectorized false · · +lookup-join · · (a, b) · + │ table e@primary · · + │ type inner · · + │ equality (a) = (a) · · + │ equality cols are key · · · + │ parallel · · · + │ pred @2 @> ARRAY[1,2] · · + └── zigzag-join · · (a) · + │ type inner · · + ├── scan · · (a) · + │ table e@e_b_idx · · + │ fixedvals 1 column · · + └── scan · · () · +· table e@e_b_idx · · +· fixedvals 1 column · · + +query TTTTT +EXPLAIN (VERBOSE) SELECT * from e where b @> ARRAY[1] AND b @> ARRAY[2] +---- +· distributed false · · +· vectorized false · · +lookup-join · · (a, b) · + │ table e@primary · · + │ type inner · · + │ equality (a) = (a) · · + │ equality cols are key · · · + │ parallel · · · + │ pred (@2 @> ARRAY[1]) AND (@2 @> ARRAY[2]) · · + └── zigzag-join · · (a) · + │ type inner · · + ├── scan · · (a) · + │ table e@e_b_idx · · + │ fixedvals 1 column · · + └── scan · · () · +· table e@e_b_idx · · +· fixedvals 1 column · · diff --git a/pkg/sql/opt/idxconstraint/index_constraints.go b/pkg/sql/opt/idxconstraint/index_constraints.go index 668a090d5638..86de7e3414cb 100644 --- a/pkg/sql/opt/idxconstraint/index_constraints.go +++ b/pkg/sql/opt/idxconstraint/index_constraints.go @@ -756,6 +756,131 @@ func (c *indexConstraintCtx) makeSpansForOr( return tight } +// makeInvertedIndexSpansForJSONExpr is the implementation of +// makeInvertedIndexSpans for JSON inverted indexes. The input datum is the JSON +// to produce spans for. +func (c *indexConstraintCtx) makeInvertedIndexSpansForJSONExpr( + datum *tree.DJSON, constraints []*constraint.Constraint, allPaths bool, +) (bool, []*constraint.Constraint) { + out := &constraint.Constraint{} + constrained := false + + rd := datum.JSON + + switch rd.Type() { + case json.ArrayJSONType, json.ObjectJSONType: + // First, check if there's more than one path through the datum. + paths, err := json.AllPaths(rd) + if err != nil { + log.Errorf(context.TODO(), "unexpected JSON error: %+v", err) + c.unconstrained(0 /* offset */, out) + return false, append(constraints, out) + } + for i := range paths { + hasContainerLeaf, err := paths[i].HasContainerLeaf() + if err != nil { + log.Errorf(context.TODO(), "unexpected JSON error: %+v", err) + c.unconstrained(0 /* offset */, out) + return false, append(constraints, out) + } + if hasContainerLeaf { + // We want to have a full index scan if the RHS contains either [] or {}. + continue + } + pathDatum, err := tree.MakeDJSON(paths[i]) + if err != nil { + log.Errorf(context.TODO(), "unexpected JSON error: %+v", err) + c.unconstrained(0 /* offset */, out) + return false, append(constraints, out) + } + c.eqSpan(0 /* offset */, pathDatum, out) + constraints = append(constraints, out) + // The span is tight if we just had 1 path through the index constraint. + constrained = true + if !allPaths { + return len(paths) == 1, constraints + } + // Reset out for next iteration + out = &constraint.Constraint{} + } + + // We found no paths that could constrain the scan. + if !constrained { + c.unconstrained(0 /* offset */, out) + return false, append(constraints, out) + } + return len(paths) == 1, constraints + + default: + // If we find a scalar on the right side of the @> operator it means that we need to find + // both matching scalars and arrays that contain that value. In order to do this we generate + // two logical spans, one for the original scalar and one for arrays containing the scalar. + // This is valid because in JSON something can either be an array or scalar so the spans are + // guaranteed not to overlap when mapped onto the primary key space. Therefore there won't be + // any duplicate primary keys when we retrieve rows for both sets. + j := json.NewArrayBuilder(1) + j.Add(rd) + dJSON, err := tree.MakeDJSON(j.Build()) + if err != nil { + break + } + + // This is the span for the scalar. + c.eqSpan(0 /* offset */, datum, out) + + // This is the span to match arrays. + var other constraint.Constraint + c.eqSpan(0 /* offset */, dJSON, &other) + out.UnionWith(c.evalCtx, &other) + return true, append(constraints, out) + } + + // Just assume that we didn't find any constraints if we ran into an error. + return false, constraints +} + +// makeInvertedIndexSpansForArrayExpr is the implementation of +// makeInvertedIndexSpans for array inverted indexes. The input arr is the array +// to produce spans for. +func (c *indexConstraintCtx) makeInvertedIndexSpansForArrayExpr( + arr *tree.DArray, constraints []*constraint.Constraint, allPaths bool, +) (bool, []*constraint.Constraint) { + constrained := false + out := &constraint.Constraint{} + // We're going to make one span to search for every value inside of the + // array datum. + for i := range arr.Array { + elt := arr.Array[i] + if elt == tree.DNull { + // In SQL: + // SELECT ARRAY[1, NULL, 2] @> ARRAY[NULL] + // returns false. + c.contradiction(0 /* offset */, out) + return false, append(constraints, out) + } + array := tree.NewDArray(arr.ParamTyp) + array.Array = make(tree.Datums, 1) + array.Array[0] = arr.Array[i] + c.eqSpan(0 /* offset */, array, out) + + constraints = append(constraints, out) + + constrained = true + if !allPaths { + // The span is tight if we just had 1 path through the index constraint. + return len(arr.Array) == 1, constraints + } + + // Reset out for next iteration + out = &constraint.Constraint{} + } + if !constrained { + c.unconstrained(0 /* offset */, out) + return false, append(constraints, out) + } + return len(arr.Array) == 1, constraints +} + // makeInvertedIndexSpansForExpr is analogous to makeSpansForExpr, but it is // used for inverted indexes. If allPaths is true, the slice is populated with // all constraints found. Otherwise, this function stops at the first @@ -769,10 +894,10 @@ func (c *indexConstraintCtx) makeInvertedIndexSpansForExpr( nd opt.Expr, constraints []*constraint.Constraint, allPaths bool, ) (bool, []*constraint.Constraint) { var tight bool - out := &constraint.Constraint{} constrained := false switch nd.Op() { case opt.ContainsOp: + out := &constraint.Constraint{} lhs, rhs := nd.Child(0), nd.Child(1) if !c.isIndexColumn(lhs, 0 /* index */) || !opt.IsConstValueOp(rhs) { @@ -787,77 +912,18 @@ func (c *indexConstraintCtx) makeInvertedIndexSpansForExpr( return false, append(constraints, out) } - rd := rightDatum.(*tree.DJSON).JSON - - switch rd.Type() { - case json.ArrayJSONType, json.ObjectJSONType: - // First, check if there's more than one path through the datum. - paths, err := json.AllPaths(rd) - if err != nil { - log.Errorf(context.TODO(), "unexpected JSON error: %+v", err) - c.unconstrained(0 /* offset */, out) - return false, append(constraints, out) - } - for i := range paths { - hasContainerLeaf, err := paths[i].HasContainerLeaf() - if err != nil { - log.Errorf(context.TODO(), "unexpected JSON error: %+v", err) - c.unconstrained(0 /* offset */, out) - return false, append(constraints, out) - } - if hasContainerLeaf { - // We want to have a full index scan if the RHS contains either [] or {}. - continue - } - pathDatum, err := tree.MakeDJSON(paths[i]) - if err != nil { - log.Errorf(context.TODO(), "unexpected JSON error: %+v", err) - c.unconstrained(0 /* offset */, out) - return false, append(constraints, out) - } - c.eqSpan(0 /* offset */, pathDatum, out) - constraints = append(constraints, out) - // The span is tight if we just had 1 path through the index constraint. - constrained = true - if !allPaths { - return len(paths) == 1, constraints - } - // Reset out for next iteration - out = &constraint.Constraint{} - } - - // We found no paths that could constrain the scan. - if !constrained { - c.unconstrained(0 /* offset */, out) - return false, append(constraints, out) - } - return len(paths) == 1, constraints + switch rightDatum.ResolvedType().Family() { + case types.JsonFamily: + return c.makeInvertedIndexSpansForJSONExpr(rightDatum.(*tree.DJSON), constraints, allPaths) + case types.ArrayFamily: + return c.makeInvertedIndexSpansForArrayExpr(rightDatum.(*tree.DArray), constraints, allPaths) default: - // If we find a scalar on the right side of the @> operator it means that we need to find - // both matching scalars and arrays that contain that value. In order to do this we generate - // two logical spans, one for the original scalar and one for arrays containing the scalar. - // This is valid because in JSON something can either be an array or scalar so the spans are - // guaranteed not to overlap when mapped onto the primary key space. Therefore there won't be - // any duplicate primary keys when we retrieve rows for both sets. - j := json.NewArrayBuilder(1) - j.Add(rd) - dJSON, err := tree.MakeDJSON(j.Build()) - if err != nil { - break - } - - // This is the span for the scalar. - c.eqSpan(0 /* offset */, rightDatum, out) - - // This is the span to match arrays. - var other constraint.Constraint - c.eqSpan(0 /* offset */, dJSON, &other) - out.UnionWith(c.evalCtx, &other) - return true, append(constraints, out) + log.Errorf(context.TODO(), "unexpected type in inverted index: %s", rightDatum.ResolvedType()) } case opt.AndOp, opt.FiltersOp: + out := &constraint.Constraint{} for i, n := 0, nd.ChildCount(); i < n; i++ { tight, constraints = c.makeInvertedIndexSpansForExpr( nd.Child(i), constraints, allPaths, @@ -892,6 +958,7 @@ func (c *indexConstraintCtx) makeInvertedIndexSpansForExpr( return c.makeInvertedIndexSpansForExpr(nd.Child(0), constraints, allPaths) } if !constrained { + out := &constraint.Constraint{} c.unconstrained(0 /* offset */, out) constraints = append(constraints, out) } diff --git a/pkg/sql/opt/idxconstraint/testdata/inverted b/pkg/sql/opt/idxconstraint/testdata/inverted index e7cfc5d5fa43..b535e24ea5c4 100644 --- a/pkg/sql/opt/idxconstraint/testdata/inverted +++ b/pkg/sql/opt/idxconstraint/testdata/inverted @@ -44,3 +44,40 @@ index-constraints vars=(jsonb, int) inverted-index=@1 ---- [/'{"a": 1}' - /'{"a": 1}'] Remaining filter: (@2 = 1) AND (@1 @> '{"b": 1}') + +index-constraints vars=(int[]) inverted-index=@1 +@1 @> ARRAY[1] +---- +[/ARRAY[1] - /ARRAY[1]] + +index-constraints vars=(int[]) inverted-index=@1 +ARRAY[1] <@ @1 +---- +[/ARRAY[1] - /ARRAY[1]] + +index-constraints vars=(int[]) inverted-index=@1 +@1 @> ARRAY[1,2] +---- +[/ARRAY[1] - /ARRAY[1]] +Remaining filter: @1 @> ARRAY[1,2] + +# Currently we only generate spans from one of the @> expressions. +index-constraints vars=(int[]) inverted-index=@1 +@1 @> ARRAY[1] AND @1 @> ARRAY[2] +---- +[/ARRAY[1] - /ARRAY[1]] +Remaining filter: @1 @> ARRAY[2] + +# This could be better - @1 @> ARRAY[] is always true, but we currently +# don't remove the extra filter. +index-constraints vars=(int[]) inverted-index=@1 +@1 @> ARRAY[]::INT[] +---- +[ - ] +Remaining filter: @1 @> ARRAY[] + +index-constraints vars=(int[]) inverted-index=@1 +@1 @> NULL +---- +[ - ] +Remaining filter: NULL diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index b9c9763e839a..edc5814d0074 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -22,6 +22,15 @@ CREATE TABLE b ) ---- +exec-ddl +CREATE TABLE c +( + k INT PRIMARY KEY, + a INT[], + INVERTED INDEX inv_idx(a) +) +---- + # -------------------------------------------------- # GenerateConstrainedScans # -------------------------------------------------- @@ -919,3 +928,75 @@ project ├── constraint: /4/1: [/'{"a": "b"}' - /'{"a": "b"}'] ├── locking: for-update └── key: (1) + +# Write some tests for array inverted indexes. + +opt +SELECT k FROM c WHERE a @> ARRAY[1] +---- +project + ├── columns: k:1!null + ├── key: (1) + └── index-join c + ├── columns: k:1!null a:2 + ├── key: (1) + ├── fd: (1)-->(2) + └── scan c@inv_idx + ├── columns: k:1!null + ├── constraint: /2/1: [/ARRAY[1] - /ARRAY[1]] + └── key: (1) + +opt +SELECT k FROM c WHERE a @> ARRAY[1,3,1,5] +---- +project + ├── columns: k:1!null + ├── key: (1) + └── inner-join (lookup c) + ├── columns: k:1!null a:2 + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── key: (1) + ├── fd: (1)-->(2) + ├── inner-join (zigzag c@inv_idx c@inv_idx) + │ ├── columns: k:1!null + │ ├── eq columns: [1] = [1] + │ ├── left fixed columns: [2] = [ARRAY[1]] + │ ├── right fixed columns: [2] = [ARRAY[3]] + │ └── filters (true) + └── filters + └── a:2 @> ARRAY[1,3,1,5] [outer=(2)] + +opt +SELECT k FROM c WHERE a @> ARRAY[]::INT[] +---- +project + ├── columns: k:1!null + ├── key: (1) + └── select + ├── columns: k:1!null a:2 + ├── key: (1) + ├── fd: (1)-->(2) + ├── scan c + │ ├── columns: k:1!null a:2 + │ ├── key: (1) + │ └── fd: (1)-->(2) + └── filters + └── a:2 @> ARRAY[] [outer=(2)] + +opt +SELECT k FROM c WHERE a IS NULL +---- +project + ├── columns: k:1!null + ├── key: (1) + └── select + ├── columns: k:1!null a:2 + ├── key: (1) + ├── fd: ()-->(2) + ├── scan c + │ ├── columns: k:1!null a:2 + │ ├── key: (1) + │ └── fd: (1)-->(2) + └── filters + └── a:2 IS NULL [outer=(2), constraints=(/2: [/NULL - /NULL]; tight), fd=()-->(2)] diff --git a/pkg/sql/sem/builtins/builtins.go b/pkg/sql/sem/builtins/builtins.go index 1753a3e68010..b01e146ad9da 100644 --- a/pkg/sql/sem/builtins/builtins.go +++ b/pkg/sql/sem/builtins/builtins.go @@ -3600,8 +3600,9 @@ may increase either contention or retry errors, or both.`, }, ), - // Returns the number of distinct inverted index entries that would be generated for a JSON value. - "crdb_internal.json_num_index_entries": makeBuiltin( + // Returns the number of distinct inverted index entries that would be + // generated for a value. + "crdb_internal.num_inverted_index_entries": makeBuiltin( tree.FunctionProperties{ Category: categorySystemInfo, NullableArgs: true, @@ -3622,6 +3623,33 @@ may increase either contention or retry errors, or both.`, }, Info: "This function is used only by CockroachDB's developers for testing purposes.", }, + tree.Overload{ + Types: tree.ArgTypes{{"val", types.AnyArray}}, + ReturnType: tree.FixedReturnType(types.Int), + Fn: func(_ *tree.EvalContext, args tree.Datums) (tree.Datum, error) { + arg := args[0] + if arg == tree.DNull { + return tree.DZero, nil + } + // Inverted indexes on arrays don't contain entries for null array + // elements. + arr := tree.MustBeDArray(arg) + if !arr.HasNonNulls { + return tree.DZero, nil + } + if !arr.HasNulls { + var ret tree.DInt + for i := range arr.Array { + if arr.Array[i] != tree.DNull { + ret++ + } + } + return tree.NewDInt(ret), nil + } + return tree.NewDInt(tree.DInt(len(arr.Array))), nil + }, + Info: "This function is used only by CockroachDB's developers for testing purposes.", + }, ), // Returns true iff the current user has admin role. diff --git a/pkg/sql/sqlbase/index_encoding.go b/pkg/sql/sqlbase/index_encoding.go index 1467dc7758d6..88cbb95bb7ec 100644 --- a/pkg/sql/sqlbase/index_encoding.go +++ b/pkg/sql/sqlbase/index_encoding.go @@ -22,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/encoding" "github.com/cockroachdb/cockroach/pkg/util/json" + "github.com/cockroachdb/cockroach/pkg/util/unique" "github.com/cockroachdb/errors" ) @@ -853,21 +854,51 @@ func EncodeInvertedIndexKeys( return EncodeInvertedIndexTableKeys(val, keyPrefix) } -// EncodeInvertedIndexTableKeys encodes the paths in a JSON `val` and -// concatenates it with `inKey`and returns a list of buffers per -// path. The encoded values is guaranteed to be lexicographically -// sortable, but not guaranteed to be round-trippable during decoding. -// A (SQL) NULL input Datum produces no keys, because inverted indexes -// cannot and do not need to satisfy the predicate col IS NULL. +// EncodeInvertedIndexTableKeys produces one inverted index key per element in +// the input datum, which should be a container (either JSON or Array). For +// JSON, "element" means unique path through the document. Each output key is +// prefixed by inKey, and is guaranteed to be lexicographically sortable, but +// not guaranteed to be round-trippable during decoding. If the input Datum +// is (SQL) NULL, no inverted index keys will be produced, because inverted +// indexes cannot and do not need to satisfy the predicate col IS NULL. func EncodeInvertedIndexTableKeys(val tree.Datum, inKey []byte) (key [][]byte, err error) { if val == tree.DNull { return nil, nil } - switch t := tree.UnwrapDatum(nil, val).(type) { - case *tree.DJSON: - return json.EncodeInvertedIndexKeys(inKey, (t.JSON)) + datum := tree.UnwrapDatum(nil, val) + switch val.ResolvedType().Family() { + case types.JsonFamily: + return json.EncodeInvertedIndexKeys(inKey, val.(*tree.DJSON).JSON) + case types.ArrayFamily: + return encodeArrayInvertedIndexTableKeys(val.(*tree.DArray), inKey) } - return nil, errors.AssertionFailedf("trying to apply inverted index to non JSON type") + return nil, errors.AssertionFailedf("trying to apply inverted index to unsupported type %s", datum.ResolvedType()) +} + +// encodeArrayInvertedIndexTableKeys returns a list of inverted index keys for +// the given input array, one per entry in the array. The input inKey is +// prefixed to all returned keys. +// N.B.: This won't return any keys for +func encodeArrayInvertedIndexTableKeys(val *tree.DArray, inKey []byte) (key [][]byte, err error) { + outKeys := make([][]byte, len(val.Array)) + for i := range val.Array { + d := val.Array[i] + if d == tree.DNull { + // We don't need to make keys for NULL, since in SQL: + // SELECT ARRAY[1, NULL, 2] @> ARRAY[NULL] + // returns false. + continue + } + outKey := make([]byte, len(inKey)) + copy(outKey, inKey) + newKey, err := EncodeTableKey(outKey, d, encoding.Ascending) + if err != nil { + return nil, err + } + outKeys[i] = newKey + } + outKeys = unique.UniquifyByteSlices(outKeys) + return outKeys, nil } // EncodePrimaryIndex constructs a list of k/v pairs for a row encoded as a primary index. diff --git a/pkg/sql/sqlbase/structured.go b/pkg/sql/sqlbase/structured.go index 62f55bf0e1dd..344d0cefb79b 100644 --- a/pkg/sql/sqlbase/structured.go +++ b/pkg/sql/sqlbase/structured.go @@ -2231,7 +2231,8 @@ func ColumnTypeIsIndexable(t *types.T) bool { // ColumnTypeIsInvertedIndexable returns whether the type t is valid to be indexed // using an inverted index. func ColumnTypeIsInvertedIndexable(t *types.T) bool { - return t.Family() == types.JsonFamily + family := t.Family() + return family == types.JsonFamily || family == types.ArrayFamily } func notIndexableError(cols []ColumnDescriptor, inverted bool) error {