From f3144aa8cb714c2159519be4a7a25e14c8f7082e Mon Sep 17 00:00:00 2001
From: Jordan Lewis <jordanthelewis@gmail.com>
Date: Sun, 1 Mar 2020 17:57:47 -0500
Subject: [PATCH] sql: add inverted indexes on arrays

This commit adds inverted index support to arrays. Inverted index
entries are created from arrays by simply encoding a key that contains
the array element's table key encoding. Nulls are not indexed, since in
SQL, ARRAY[1, NULL] @> ARRAY[NULL] returns false.

For example, in a table t(int, int[]) with an inverted index with id 3
on the int[] column the row (10, [1, NULL, 2]) produces 2 index keys:

```
/tableId/3/1/10
/tableId/3/2/10
```

This encoding scheme is much simpler than the one for JSON, since arrays
don't have "paths": their elements are simply ordinary datums.

Release note (sql change): The inverted index implementation now
supports indexing array columns. This permits accelerating containment
queries (@> and <@) on array columns by adding an index to them.
---
 docs/generated/sql/functions.md               |   6 +-
 pkg/cli/testdata/dump/inverted_index          |  18 +-
 pkg/sql/backfill.go                           |   2 +-
 .../testdata/logic_test/inverted_index        | 106 +++++++++
 pkg/sql/opt/cat/index.go                      |   2 +-
 .../exec/execbuilder/testdata/inverted_index  | 157 ++++++++++++++
 .../opt/idxconstraint/index_constraints.go    | 201 ++++++++++++------
 pkg/sql/opt/idxconstraint/testdata/inverted   |  37 ++++
 pkg/sql/opt/xform/testdata/rules/select       |  81 +++++++
 pkg/sql/sem/builtins/builtins.go              |  32 ++-
 pkg/sql/sqlbase/index_encoding.go             |  51 ++++-
 pkg/sql/sqlbase/structured.go                 |   3 +-
 12 files changed, 607 insertions(+), 89 deletions(-)
diff --git a/docs/generated/sql/functions.md b/docs/generated/sql/functions.md
index 8d699f0df569..51c6808db2ae 100644
--- a/docs/generated/sql/functions.md
+++ b/docs/generated/sql/functions.md
@@ -1095,8 +1095,6 @@ SELECT * FROM crdb_internal.check_consistency(true, ‘\x02’, ‘\x04’)</p>
 <tr><td><a name="crdb_internal.get_zone_config"></a><code>crdb_internal.get_zone_config(namespace_id: <a href="int.html">int</a>) &rarr; <a href="bytes.html">bytes</a></code></td><td></td></tr>
 <tr><td><a name="crdb_internal.is_admin"></a><code>crdb_internal.is_admin() &rarr; <a href="bool.html">bool</a></code></td><td><span class="funcdesc"><p>Retrieves the current user’s admin status.</p>
 </span></td></tr>
-<tr><td><a name="crdb_internal.json_num_index_entries"></a><code>crdb_internal.json_num_index_entries(val: jsonb) &rarr; <a href="int.html">int</a></code></td><td><span class="funcdesc"><p>This function is used only by CockroachDB’s developers for testing purposes.</p>
-</span></td></tr>
 <tr><td><a name="crdb_internal.lease_holder"></a><code>crdb_internal.lease_holder(key: <a href="bytes.html">bytes</a>) &rarr; <a href="int.html">int</a></code></td><td><span class="funcdesc"><p>This function is used to fetch the leaseholder corresponding to a request key</p>
 </span></td></tr>
 <tr><td><a name="crdb_internal.locality_value"></a><code>crdb_internal.locality_value(key: <a href="string.html">string</a>) &rarr; <a href="string.html">string</a></code></td><td><span class="funcdesc"><p>Returns the value of the specified locality key.</p>
@@ -1105,6 +1103,10 @@ SELECT * FROM crdb_internal.check_consistency(true, ‘\x02’, ‘\x04’)</p>
 </span></td></tr>
 <tr><td><a name="crdb_internal.node_executable_version"></a><code>crdb_internal.node_executable_version() &rarr; <a href="string.html">string</a></code></td><td><span class="funcdesc"><p>Returns the version of CockroachDB this node is running.</p>
 </span></td></tr>
+<tr><td><a name="crdb_internal.num_inverted_index_entries"></a><code>crdb_internal.num_inverted_index_entries(val: anyelement[]) &rarr; <a href="int.html">int</a></code></td><td><span class="funcdesc"><p>This function is used only by CockroachDB’s developers for testing purposes.</p>
+</span></td></tr>
+<tr><td><a name="crdb_internal.num_inverted_index_entries"></a><code>crdb_internal.num_inverted_index_entries(val: jsonb) &rarr; <a href="int.html">int</a></code></td><td><span class="funcdesc"><p>This function is used only by CockroachDB’s developers for testing purposes.</p>
+</span></td></tr>
 <tr><td><a name="crdb_internal.pretty_key"></a><code>crdb_internal.pretty_key(raw_key: <a href="bytes.html">bytes</a>, skip_fields: <a href="int.html">int</a>) &rarr; <a href="string.html">string</a></code></td><td><span class="funcdesc"><p>This function is used only by CockroachDB’s developers for testing purposes.</p>
 </span></td></tr>
 <tr><td><a name="crdb_internal.range_stats"></a><code>crdb_internal.range_stats(key: <a href="bytes.html">bytes</a>) &rarr; jsonb</code></td><td><span class="funcdesc"><p>This function is used to retrieve range statistics information as a JSON object.</p>
diff --git a/pkg/cli/testdata/dump/inverted_index b/pkg/cli/testdata/dump/inverted_index
index 19e44fbdb15e..6809c490d536 100644
--- a/pkg/cli/testdata/dump/inverted_index
+++ b/pkg/cli/testdata/dump/inverted_index
@@ -3,12 +3,16 @@ CREATE DATABASE d;
 CREATE TABLE d.t (
 	a JSON,
 	b JSON,
-	INVERTED INDEX idx (a)
+	c INT[],
+	d INT[],
+	INVERTED INDEX idx (a),
+	INVERTED INDEX idx3 (c)
 );
 
 CREATE INVERTED INDEX idx2 ON d.t (b);
+CREATE INVERTED INDEX idx4 ON d.t (d);
 
-INSERT INTO d.t VALUES ('{"a": "b"}', '{"c": "d"}');
+INSERT INTO d.t VALUES ('{"a": "b"}', '{"c": "d"}', ARRAY[1], ARRAY[2]);
 ----
 INSERT 1
 
@@ -18,12 +22,16 @@ dump d t
 CREATE TABLE t (
 	a JSONB NULL,
 	b JSONB NULL,
+	c INT8[] NULL,
+	d INT8[] NULL,
 	INVERTED INDEX idx (a),
+	INVERTED INDEX idx3 (c),
 	INVERTED INDEX idx2 (b),
-	FAMILY "primary" (a, b, rowid)
+	INVERTED INDEX idx4 (d),
+	FAMILY "primary" (a, b, c, d, rowid)
 );
 
-INSERT INTO t (a, b) VALUES
-	('{"a": "b"}', '{"c": "d"}');
+INSERT INTO t (a, b, c, d) VALUES
+	('{"a": "b"}', '{"c": "d"}', ARRAY[1], ARRAY[2]);
 ----
 ----
diff --git a/pkg/sql/backfill.go b/pkg/sql/backfill.go
index ff7057b71621..035d12facd2b 100644
--- a/pkg/sql/backfill.go
+++ b/pkg/sql/backfill.go
@@ -1243,7 +1243,7 @@ func (sc *SchemaChanger) validateInvertedIndexes(
 				row, err := ie.QueryRowEx(ctx, "verify-inverted-idx-count", txn,
 					sqlbase.InternalExecutorSessionDataOverride{},
 					fmt.Sprintf(
-						`SELECT coalesce(sum_int(crdb_internal.json_num_index_entries(%s)), 0) FROM [%d AS t]`,
+						`SELECT coalesce(sum_int(crdb_internal.num_inverted_index_entries(%s)), 0) FROM [%d AS t]`,
 						col, tableDesc.ID,
 					),
 				)
diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_index b/pkg/sql/logictest/testdata/logic_test/inverted_index
index 7fc265c272f0..a37d314007fd 100644
--- a/pkg/sql/logictest/testdata/logic_test/inverted_index
+++ b/pkg/sql/logictest/testdata/logic_test/inverted_index
@@ -694,3 +694,109 @@ CREATE INVERTED INDEX ON table_with_nulls (a)
 
 statement ok
 DROP TABLE table_with_nulls
+
+statement ok
+DROP TABLE c
+
+subtest arrays
+
+statement ok
+CREATE TABLE c (
+  id INT PRIMARY KEY,
+  foo INT[],
+  bar STRING[],
+  INVERTED INDEX (foo),
+  FAMILY "primary" (id, foo, bar)
+)
+
+statement ok
+INSERT INTO c VALUES(0, NULL, NULL)
+
+statement ok
+INSERT INTO c VALUES(1, ARRAY[], ARRAY['foo', 'bar', 'baz'])
+
+statement ok
+CREATE INDEX ON c USING GIN (bar)
+
+query TT
+SHOW CREATE TABLE c
+----
+c  CREATE TABLE c (
+   id INT8 NOT NULL,
+   foo INT8[] NULL,
+   bar STRING[] NULL,
+   CONSTRAINT "primary" PRIMARY KEY (id ASC),
+   INVERTED INDEX c_foo_idx (foo),
+   INVERTED INDEX c_bar_idx (bar),
+   FAMILY "primary" (id, foo, bar)
+)
+
+query ITT
+SELECT * from c WHERE bar @> ARRAY['foo']
+----
+1  {}  {foo,bar,baz}
+
+query ITT
+SELECT * from c WHERE bar @> ARRAY['bar', 'baz']
+----
+1  {}  {foo,bar,baz}
+
+query ITT
+SELECT * from c WHERE bar @> ARRAY['bar', 'qux']
+----
+
+statement ok
+INSERT INTO c VALUES(2, NULL, NULL)
+
+statement ok
+INSERT INTO c VALUES(3, ARRAY[0,1,NULL], ARRAY['a',NULL,'b',NULL])
+
+statement ok
+INSERT INTO c VALUES(4, ARRAY[1,2,3], ARRAY['b',NULL,'c'])
+
+statement ok
+INSERT INTO c VALUES(5, ARRAY[], ARRAY[NULL, NULL])
+
+query ITT
+SELECT * FROM c WHERE foo @> ARRAY[0]
+----
+3  {0,1,NULL}  {a,NULL,b,NULL}
+
+query error unsupported comparison operator
+SELECT * FROM c WHERE foo @> 0
+
+query ITT
+SELECT * FROM c WHERE foo @> ARRAY[1]
+----
+3  {0,1,NULL}  {a,NULL,b,NULL}
+4  {1,2,3}     {b,NULL,c}
+
+# This is expected, although it looks odd, because in SQL,
+# ARRAY[NULL] @> ARRAY[NULL] returns false.
+query ITT
+SELECT * FROM c WHERE foo @> ARRAY[NULL]::INT[]
+----
+
+query ITT
+SELECT * FROM c WHERE bar @> ARRAY['a']
+----
+3  {0,1,NULL}  {a,NULL,b,NULL}
+
+query ITT
+SELECT * FROM c WHERE bar @> ARRAY['b']
+----
+3  {0,1,NULL}  {a,NULL,b,NULL}
+4  {1,2,3}     {b,NULL,c}
+
+query ITT
+SELECT * FROM c WHERE bar @> ARRAY['c']
+----
+4  {1,2,3}  {b,NULL,c}
+
+query ITT
+SELECT * FROM c WHERE bar @> ARRAY[]::TEXT[]
+----
+1  {}          {foo,bar,baz}
+3  {0,1,NULL}  {a,NULL,b,NULL}
+4  {1,2,3}     {b,NULL,c}
+5  {}          {NULL,NULL}
diff --git a/pkg/sql/opt/cat/index.go b/pkg/sql/opt/cat/index.go
index 7f4343623c0e..693a45d08315 100644
--- a/pkg/sql/opt/cat/index.go
+++ b/pkg/sql/opt/cat/index.go
@@ -48,7 +48,7 @@ type Index interface {
 	// IsUnique returns true if this index is declared as UNIQUE in the schema.
 	IsUnique() bool
 
-	// IsInverted returns true if this is a JSON inverted index.
+	// IsInverted returns true if this is an inverted index.
 	IsInverted() bool
 
 	// ColumnCount returns the number of columns in the index. This includes
diff --git a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index
index 6ce9e2e076e5..152ea499da31 100644
--- a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index
+++ b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index
@@ -7,9 +7,27 @@ CREATE TABLE d (
   FAMILY (a,b)
 )
 
+statement ok
+CREATE TABLE e (
+  a INT PRIMARY KEY,
+  b INT[],
+  FAMILY (a,b),
+  INVERTED INDEX(b)
+)
+
+statement ok
+CREATE TABLE f (
+  a INT PRIMARY KEY,
+  b DECIMAL[],
+  FAMILY (a,b),
+  INVERTED INDEX(b)
+)
+
 statement ok
 CREATE INVERTED INDEX foo_inv ON d(b)
 
+# Tests for json inverted indexes.
+
 query T kvtrace
 INSERT INTO d VALUES(0, '{"a": "b"}')
 ----
@@ -46,6 +64,52 @@ INSERT INTO d VALUES(4, NULL)
 ----
 CPut /Table/53/1/4/0 -> /TUPLE/
 
+# Tests for array inverted indexes.
+
+# Make sure that duplicate entries do not get emitted more than once.
+query T kvtrace
+INSERT INTO e VALUES(0, ARRAY[7,0,0,1,10,0,1,7])
+----
+CPut /Table/54/1/0/0 -> /TUPLE/
+InitPut /Table/54/2/0/0/0 -> /BYTES/
+InitPut /Table/54/2/1/0/0 -> /BYTES/
+InitPut /Table/54/2/7/0/0 -> /BYTES/
+InitPut /Table/54/2/10/0/0 -> /BYTES/
+
+# Make sure that empty arrays do not emit any keys at all.
+query T kvtrace
+INSERT INTO e VALUES(1, ARRAY[])
+----
+CPut /Table/54/1/1/0 -> /TUPLE/
+
+# Make sure that NULL arrays do not emit any keys at all.
+query T kvtrace
+INSERT INTO e VALUES(2, NULL)
+----
+CPut /Table/54/1/2/0 -> /TUPLE/
+
+# Test that array inverted indexes work okay with decimals (a type with
+# composite encoding).
+
+query T kvtrace
+INSERT INTO f VALUES(0, ARRAY[7,0,0,1,10,0,1,7])
+----
+CPut /Table/55/1/0/0 -> /TUPLE/
+InitPut /Table/55/2/0/0/0 -> /BYTES/
+InitPut /Table/55/2/1/0/0 -> /BYTES/
+InitPut /Table/55/2/7/0/0 -> /BYTES/
+InitPut /Table/55/2/1E+1/0/0 -> /BYTES/
+
+query T kvtrace
+INSERT INTO f VALUES(1, ARRAY[])
+----
+CPut /Table/55/1/1/0 -> /TUPLE/
+
+query T kvtrace
+INSERT INTO f VALUES(2, NULL)
+----
+CPut /Table/55/1/2/0 -> /TUPLE/
+
 query TTTTT
 EXPLAIN (VERBOSE) SELECT * from d where b @>'{"a": "b"}'
 ----
@@ -368,3 +432,96 @@ scan  ·            ·                          (a, b)  ·
 ·     table        d@primary                  ·       ·
 ·     spans        ALL                        ·       ·
 ·     filter       b @> '{"a": {}, "b": {}}'  ·       ·
+
+subtest array
+
+query TTTTT
+EXPLAIN (VERBOSE) SELECT * from e where b @> ARRAY[1]
+----
+·           distributed  false      ·       ·
+·           vectorized   false      ·       ·
+index-join  ·            ·          (a, b)  ·
+ │          table        e@primary  ·       ·
+ │          key columns  a          ·       ·
+ └── scan   ·            ·          (a)     ·
+·           table        e@e_b_idx  ·       ·
+·           spans        /1-/2      ·       ·
+
+query TTTTT
+EXPLAIN (VERBOSE) SELECT * from e where b @> ARRAY[]::INT[]
+----
+·     distributed  false         ·       ·
+·     vectorized   false         ·       ·
+scan  ·            ·             (a, b)  ·
+·     table        e@primary     ·       ·
+·     spans        ALL           ·       ·
+·     filter       b @> ARRAY[]  ·       ·
+
+query TTTTT
+EXPLAIN (VERBOSE) SELECT * from e where b @> ARRAY[NULL]::INT[]
+----
+·            distributed  false      ·       ·
+·            vectorized   false      ·       ·
+index-join   ·            ·          (a, b)  ·
+ │           table        e@primary  ·       ·
+ │           key columns  a          ·       ·
+ └── norows  ·            ·          (a)     ·
+
+query TTTTT
+EXPLAIN (VERBOSE) SELECT * from e where b @> NULL
+----
+·       distributed  false  ·       ·
+·       vectorized   false  ·       ·
+norows  ·            ·      (a, b)  ·
+
+query TTTTT
+EXPLAIN (VERBOSE) SELECT * from e where b IS NULL
+----
+·     distributed  false      ·       ·
+·     vectorized   false      ·       ·
+scan  ·            ·          (a, b)  ·
+·     table        e@primary  ·       ·
+·     spans        ALL        ·       ·
+·     filter       b IS NULL  ·       ·
+
+query TTTTT
+EXPLAIN (VERBOSE) SELECT * from e where b @> ARRAY[1,2]
+----
+·                 distributed            false             ·       ·
+·                 vectorized             false             ·       ·
+lookup-join       ·                      ·                 (a, b)  ·
+ │                table                  e@primary         ·       ·
+ │                type                   inner             ·       ·
+ │                equality               (a) = (a)         ·       ·
+ │                equality cols are key  ·                 ·       ·
+ │                parallel               ·                 ·       ·
+ │                pred                   @2 @> ARRAY[1,2]  ·       ·
+ └── zigzag-join  ·                      ·                 (a)     ·
+      │           type                   inner             ·       ·
+      ├── scan    ·                      ·                 (a)     ·
+      │           table                  e@e_b_idx         ·       ·
+      │           fixedvals              1 column          ·       ·
+      └── scan    ·                      ·                 ()      ·
+·                 table                  e@e_b_idx         ·       ·
+·                 fixedvals              1 column          ·       ·
+
+query TTTTT
+EXPLAIN (VERBOSE) SELECT * from e where b @> ARRAY[1] AND b @> ARRAY[2]
+----
+·                 distributed            false                                  ·       ·
+·                 vectorized             false                                  ·       ·
+lookup-join       ·                      ·                                      (a, b)  ·
+ │                table                  e@primary                              ·       ·
+ │                type                   inner                                  ·       ·
+ │                equality               (a) = (a)                              ·       ·
+ │                equality cols are key  ·                                      ·       ·
+ │                parallel               ·                                      ·       ·
+ │                pred                   (@2 @> ARRAY[1]) AND (@2 @> ARRAY[2])  ·       ·
+ └── zigzag-join  ·                      ·                                      (a)     ·
+      │           type                   inner                                  ·       ·
+      ├── scan    ·                      ·                                      (a)     ·
+      │           table                  e@e_b_idx                              ·       ·
+      │           fixedvals              1 column                               ·       ·
+      └── scan    ·                      ·                                      ()      ·
+·                 table                  e@e_b_idx                              ·       ·
+·                 fixedvals              1 column                               ·       ·
diff --git a/pkg/sql/opt/idxconstraint/index_constraints.go b/pkg/sql/opt/idxconstraint/index_constraints.go
index 668a090d5638..86de7e3414cb 100644
--- a/pkg/sql/opt/idxconstraint/index_constraints.go
+++ b/pkg/sql/opt/idxconstraint/index_constraints.go
@@ -756,6 +756,131 @@ func (c *indexConstraintCtx) makeSpansForOr(
 	return tight
 }
 
+// makeInvertedIndexSpansForJSONExpr is the implementation of
+// makeInvertedIndexSpans for JSON inverted indexes. The input datum is the JSON
+// to produce spans for.
+func (c *indexConstraintCtx) makeInvertedIndexSpansForJSONExpr(
+	datum *tree.DJSON, constraints []*constraint.Constraint, allPaths bool,
+) (bool, []*constraint.Constraint) {
+	out := &constraint.Constraint{}
+	constrained := false
+
+	rd := datum.JSON
+
+	switch rd.Type() {
+	case json.ArrayJSONType, json.ObjectJSONType:
+		// First, check if there's more than one path through the datum.
+		paths, err := json.AllPaths(rd)
+		if err != nil {
+			log.Errorf(context.TODO(), "unexpected JSON error: %+v", err)
+			c.unconstrained(0 /* offset */, out)
+			return false, append(constraints, out)
+		}
+		for i := range paths {
+			hasContainerLeaf, err := paths[i].HasContainerLeaf()
+			if err != nil {
+				log.Errorf(context.TODO(), "unexpected JSON error: %+v", err)
+				c.unconstrained(0 /* offset */, out)
+				return false, append(constraints, out)
+			}
+			if hasContainerLeaf {
+				// We want to have a full index scan if the RHS contains either [] or {}.
+				continue
+			}
+			pathDatum, err := tree.MakeDJSON(paths[i])
+			if err != nil {
+				log.Errorf(context.TODO(), "unexpected JSON error: %+v", err)
+				c.unconstrained(0 /* offset */, out)
+				return false, append(constraints, out)
+			}
+			c.eqSpan(0 /* offset */, pathDatum, out)
+			constraints = append(constraints, out)
+			// The span is tight if we just had 1 path through the index constraint.
+			constrained = true
+			if !allPaths {
+				return len(paths) == 1, constraints
+			}
+			// Reset out for next iteration
+			out = &constraint.Constraint{}
+		}
+
+		// We found no paths that could constrain the scan.
+		if !constrained {
+			c.unconstrained(0 /* offset */, out)
+			return false, append(constraints, out)
+		}
+		return len(paths) == 1, constraints
+
+	default:
+		// If we find a scalar on the right side of the @> operator it means that we need to find
+		// both matching scalars and arrays that contain that value. In order to do this we generate
+		// two logical spans, one for the original scalar and one for arrays containing the scalar.
+		// This is valid because in JSON something can either be an array or scalar so the spans are
+		// guaranteed not to overlap when mapped onto the primary key space. Therefore there won't be
+		// any duplicate primary keys when we retrieve rows for both sets.
+		j := json.NewArrayBuilder(1)
+		j.Add(rd)
+		dJSON, err := tree.MakeDJSON(j.Build())
+		if err != nil {
+			break
+		}
+
+		// This is the span for the scalar.
+		c.eqSpan(0 /* offset */, datum, out)
+
+		// This is the span to match arrays.
+		var other constraint.Constraint
+		c.eqSpan(0 /* offset */, dJSON, &other)
+		out.UnionWith(c.evalCtx, &other)
+		return true, append(constraints, out)
+	}
+
+	// Just assume that we didn't find any constraints if we ran into an error.
+	return false, constraints
+}
+
+// makeInvertedIndexSpansForArrayExpr is the implementation of
+// makeInvertedIndexSpans for array inverted indexes. The input arr is the array
+// to produce spans for.
+func (c *indexConstraintCtx) makeInvertedIndexSpansForArrayExpr(
+	arr *tree.DArray, constraints []*constraint.Constraint, allPaths bool,
+) (bool, []*constraint.Constraint) {
+	constrained := false
+	out := &constraint.Constraint{}
+	// We're going to make one span to search for every value inside of the
+	// array datum.
+	for i := range arr.Array {
+		elt := arr.Array[i]
+		if elt == tree.DNull {
+			// In SQL:
+			// SELECT ARRAY[1, NULL, 2] @> ARRAY[NULL]
+			// returns false.
+			c.contradiction(0 /* offset */, out)
+			return false, append(constraints, out)
+		}
+		array := tree.NewDArray(arr.ParamTyp)
+		array.Array = make(tree.Datums, 1)
+		array.Array[0] = arr.Array[i]
+		c.eqSpan(0 /* offset */, array, out)
+
+		constraints = append(constraints, out)
+
+		constrained = true
+		if !allPaths {
+			// The span is tight if we just had 1 path through the index constraint.
+			return len(arr.Array) == 1, constraints
+		}
+
+		// Reset out for next iteration
+		out = &constraint.Constraint{}
+	}
+	if !constrained {
+		c.unconstrained(0 /* offset */, out)
+		return false, append(constraints, out)
+	}
+	return len(arr.Array) == 1, constraints
+}
+
 // makeInvertedIndexSpansForExpr is analogous to makeSpansForExpr, but it is
 // used for inverted indexes. If allPaths is true, the slice is populated with
 // all constraints found. Otherwise, this function stops at the first
@@ -769,10 +894,10 @@ func (c *indexConstraintCtx) makeInvertedIndexSpansForExpr(
 	nd opt.Expr, constraints []*constraint.Constraint, allPaths bool,
 ) (bool, []*constraint.Constraint) {
 	var tight bool
-	out := &constraint.Constraint{}
 	constrained := false
 	switch nd.Op() {
 	case opt.ContainsOp:
+		out := &constraint.Constraint{}
 		lhs, rhs := nd.Child(0), nd.Child(1)
 
 		if !c.isIndexColumn(lhs, 0 /* index */) || !opt.IsConstValueOp(rhs) {
@@ -787,77 +912,18 @@ func (c *indexConstraintCtx) makeInvertedIndexSpansForExpr(
 			return false, append(constraints, out)
 		}
 
-		rd := rightDatum.(*tree.DJSON).JSON
-
-		switch rd.Type() {
-		case json.ArrayJSONType, json.ObjectJSONType:
-			// First, check if there's more than one path through the datum.
-			paths, err := json.AllPaths(rd)
-			if err != nil {
-				log.Errorf(context.TODO(), "unexpected JSON error: %+v", err)
-				c.unconstrained(0 /* offset */, out)
-				return false, append(constraints, out)
-			}
-			for i := range paths {
-				hasContainerLeaf, err := paths[i].HasContainerLeaf()
-				if err != nil {
-					log.Errorf(context.TODO(), "unexpected JSON error: %+v", err)
-					c.unconstrained(0 /* offset */, out)
-					return false, append(constraints, out)
-				}
-				if hasContainerLeaf {
-					// We want to have a full index scan if the RHS contains either [] or {}.
-					continue
-				}
-				pathDatum, err := tree.MakeDJSON(paths[i])
-				if err != nil {
-					log.Errorf(context.TODO(), "unexpected JSON error: %+v", err)
-					c.unconstrained(0 /* offset */, out)
-					return false, append(constraints, out)
-				}
-				c.eqSpan(0 /* offset */, pathDatum, out)
-				constraints = append(constraints, out)
-				// The span is tight if we just had 1 path through the index constraint.
-				constrained = true
-				if !allPaths {
-					return len(paths) == 1, constraints
-				}
-				// Reset out for next iteration
-				out = &constraint.Constraint{}
-			}
-
-			// We found no paths that could constrain the scan.
-			if !constrained {
-				c.unconstrained(0 /* offset */, out)
-				return false, append(constraints, out)
-			}
-			return len(paths) == 1, constraints
+		switch rightDatum.ResolvedType().Family() {
+		case types.JsonFamily:
+			return c.makeInvertedIndexSpansForJSONExpr(rightDatum.(*tree.DJSON), constraints, allPaths)
+		case types.ArrayFamily:
+			return c.makeInvertedIndexSpansForArrayExpr(rightDatum.(*tree.DArray), constraints, allPaths)
 
 		default:
-			// If we find a scalar on the right side of the @> operator it means that we need to find
-			// both matching scalars and arrays that contain that value. In order to do this we generate
-			// two logical spans, one for the original scalar and one for arrays containing the scalar.
-			// This is valid because in JSON something can either be an array or scalar so the spans are
-			// guaranteed not to overlap when mapped onto the primary key space. Therefore there won't be
-			// any duplicate primary keys when we retrieve rows for both sets.
-			j := json.NewArrayBuilder(1)
-			j.Add(rd)
-			dJSON, err := tree.MakeDJSON(j.Build())
-			if err != nil {
-				break
-			}
-
-			// This is the span for the scalar.
-			c.eqSpan(0 /* offset */, rightDatum, out)
-
-			// This is the span to match arrays.
-			var other constraint.Constraint
-			c.eqSpan(0 /* offset */, dJSON, &other)
-			out.UnionWith(c.evalCtx, &other)
-			return true, append(constraints, out)
+			log.Errorf(context.TODO(), "unexpected type in inverted index: %s", rightDatum.ResolvedType())
 		}
 
 	case opt.AndOp, opt.FiltersOp:
+		out := &constraint.Constraint{}
 		for i, n := 0, nd.ChildCount(); i < n; i++ {
 			tight, constraints = c.makeInvertedIndexSpansForExpr(
 				nd.Child(i), constraints, allPaths,
@@ -892,6 +958,7 @@ func (c *indexConstraintCtx) makeInvertedIndexSpansForExpr(
 		return c.makeInvertedIndexSpansForExpr(nd.Child(0), constraints, allPaths)
 	}
 	if !constrained {
+		out := &constraint.Constraint{}
 		c.unconstrained(0 /* offset */, out)
 		constraints = append(constraints, out)
 	}
diff --git a/pkg/sql/opt/idxconstraint/testdata/inverted b/pkg/sql/opt/idxconstraint/testdata/inverted
index e7cfc5d5fa43..b535e24ea5c4 100644
--- a/pkg/sql/opt/idxconstraint/testdata/inverted
+++ b/pkg/sql/opt/idxconstraint/testdata/inverted
@@ -44,3 +44,40 @@ index-constraints vars=(jsonb, int) inverted-index=@1
 ----
 [/'{"a": 1}' - /'{"a": 1}']
 Remaining filter: (@2 = 1) AND (@1 @> '{"b": 1}')
+
+index-constraints vars=(int[]) inverted-index=@1
+@1 @> ARRAY[1]
+----
+[/ARRAY[1] - /ARRAY[1]]
+
+index-constraints vars=(int[]) inverted-index=@1
+ARRAY[1] <@ @1
+----
+[/ARRAY[1] - /ARRAY[1]]
+
+index-constraints vars=(int[]) inverted-index=@1
+@1 @> ARRAY[1,2]
+----
+[/ARRAY[1] - /ARRAY[1]]
+Remaining filter: @1 @> ARRAY[1,2]
+
+# Currently we only generate spans from one of the @> expressions.
+index-constraints vars=(int[]) inverted-index=@1
+@1 @> ARRAY[1] AND @1 @> ARRAY[2]
+----
+[/ARRAY[1] - /ARRAY[1]]
+Remaining filter: @1 @> ARRAY[2]
+
+# This could be better - @1 @> ARRAY[] is always true, but we currently
+# don't remove the extra filter.
+index-constraints vars=(int[]) inverted-index=@1
+@1 @> ARRAY[]::INT[]
+----
+[ - ]
+Remaining filter: @1 @> ARRAY[]
+
+index-constraints vars=(int[]) inverted-index=@1
+@1 @> NULL
+----
+[ - ]
+Remaining filter: NULL
diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select
index b9c9763e839a..edc5814d0074 100644
--- a/pkg/sql/opt/xform/testdata/rules/select
+++ b/pkg/sql/opt/xform/testdata/rules/select
@@ -22,6 +22,15 @@ CREATE TABLE b
 )
 ----
 
+exec-ddl
+CREATE TABLE c
+(
+    k INT PRIMARY KEY,
+    a INT[],
+    INVERTED INDEX inv_idx(a)
+)
+----
+
 # --------------------------------------------------
 # GenerateConstrainedScans
 # --------------------------------------------------
@@ -919,3 +928,75 @@ project
            ├── constraint: /4/1: [/'{"a": "b"}' - /'{"a": "b"}']
            ├── locking: for-update
            └── key: (1)
+
+# Write some tests for array inverted indexes.
+
+opt
+SELECT k FROM c WHERE a @> ARRAY[1]
+----
+project
+ ├── columns: k:1!null
+ ├── key: (1)
+ └── index-join c
+      ├── columns: k:1!null a:2
+      ├── key: (1)
+      ├── fd: (1)-->(2)
+      └── scan c@inv_idx
+           ├── columns: k:1!null
+           ├── constraint: /2/1: [/ARRAY[1] - /ARRAY[1]]
+           └── key: (1)
+
+opt
+SELECT k FROM c WHERE a @> ARRAY[1,3,1,5]
+----
+project
+ ├── columns: k:1!null
+ ├── key: (1)
+ └── inner-join (lookup c)
+      ├── columns: k:1!null a:2
+      ├── key columns: [1] = [1]
+      ├── lookup columns are key
+      ├── key: (1)
+      ├── fd: (1)-->(2)
+      ├── inner-join (zigzag c@inv_idx c@inv_idx)
+      │    ├── columns: k:1!null
+      │    ├── eq columns: [1] = [1]
+      │    ├── left fixed columns: [2] = [ARRAY[1]]
+      │    ├── right fixed columns: [2] = [ARRAY[3]]
+      │    └── filters (true)
+      └── filters
+           └── a:2 @> ARRAY[1,3,1,5] [outer=(2)]
+
+opt
+SELECT k FROM c WHERE a @> ARRAY[]::INT[]
+----
+project
+ ├── columns: k:1!null
+ ├── key: (1)
+ └── select
+      ├── columns: k:1!null a:2
+      ├── key: (1)
+      ├── fd: (1)-->(2)
+      ├── scan c
+      │    ├── columns: k:1!null a:2
+      │    ├── key: (1)
+      │    └── fd: (1)-->(2)
+      └── filters
+           └── a:2 @> ARRAY[] [outer=(2)]
+
+opt
+SELECT k FROM c WHERE a IS NULL
+----
+project
+ ├── columns: k:1!null
+ ├── key: (1)
+ └── select
+      ├── columns: k:1!null a:2
+      ├── key: (1)
+      ├── fd: ()-->(2)
+      ├── scan c
+      │    ├── columns: k:1!null a:2
+      │    ├── key: (1)
+      │    └── fd: (1)-->(2)
+      └── filters
+           └── a:2 IS NULL [outer=(2), constraints=(/2: [/NULL - /NULL]; tight), fd=()-->(2)]
diff --git a/pkg/sql/sem/builtins/builtins.go b/pkg/sql/sem/builtins/builtins.go
index 1753a3e68010..b01e146ad9da 100644
--- a/pkg/sql/sem/builtins/builtins.go
+++ b/pkg/sql/sem/builtins/builtins.go
@@ -3600,8 +3600,9 @@ may increase either contention or retry errors, or both.`,
 		},
 	),
 
-	// Returns the number of distinct inverted index entries that would be generated for a JSON value.
-	"crdb_internal.json_num_index_entries": makeBuiltin(
+	// Returns the number of distinct inverted index entries that would be
+	// generated for a value.
+	"crdb_internal.num_inverted_index_entries": makeBuiltin(
 		tree.FunctionProperties{
 			Category:     categorySystemInfo,
 			NullableArgs: true,
@@ -3622,6 +3623,33 @@ may increase either contention or retry errors, or both.`,
 			},
 			Info: "This function is used only by CockroachDB's developers for testing purposes.",
 		},
+		tree.Overload{
+			Types:      tree.ArgTypes{{"val", types.AnyArray}},
+			ReturnType: tree.FixedReturnType(types.Int),
+			Fn: func(_ *tree.EvalContext, args tree.Datums) (tree.Datum, error) {
+				arg := args[0]
+				if arg == tree.DNull {
+					return tree.DZero, nil
+				}
+				// Inverted indexes on arrays don't contain entries for null array
+				// elements.
+				arr := tree.MustBeDArray(arg)
+				if !arr.HasNonNulls {
+					return tree.DZero, nil
+				}
+				if !arr.HasNulls {
+					var ret tree.DInt
+					for i := range arr.Array {
+						if arr.Array[i] != tree.DNull {
+							ret++
+						}
+					}
+					return tree.NewDInt(ret), nil
+				}
+				return tree.NewDInt(tree.DInt(len(arr.Array))), nil
+			},
+			Info: "This function is used only by CockroachDB's developers for testing purposes.",
+		},
 	),
 
 	// Returns true iff the current user has admin role.
diff --git a/pkg/sql/sqlbase/index_encoding.go b/pkg/sql/sqlbase/index_encoding.go
index 1467dc7758d6..88cbb95bb7ec 100644
--- a/pkg/sql/sqlbase/index_encoding.go
+++ b/pkg/sql/sqlbase/index_encoding.go
@@ -22,6 +22,7 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/util"
 	"github.com/cockroachdb/cockroach/pkg/util/encoding"
 	"github.com/cockroachdb/cockroach/pkg/util/json"
+	"github.com/cockroachdb/cockroach/pkg/util/unique"
 	"github.com/cockroachdb/errors"
 )
 
@@ -853,21 +854,51 @@ func EncodeInvertedIndexKeys(
 	return EncodeInvertedIndexTableKeys(val, keyPrefix)
 }
 
-// EncodeInvertedIndexTableKeys encodes the paths in a JSON `val` and
-// concatenates it with `inKey`and returns a list of buffers per
-// path. The encoded values is guaranteed to be lexicographically
-// sortable, but not guaranteed to be round-trippable during decoding.
-// A (SQL) NULL input Datum produces no keys, because inverted indexes
-// cannot and do not need to satisfy the predicate col IS NULL.
+// EncodeInvertedIndexTableKeys produces one inverted index key per element in
+// the input datum, which should be a container (either JSON or Array). For
+// JSON, "element" means unique path through the document. Each output key is
+// prefixed by inKey, and is guaranteed to be lexicographically sortable, but
+// not guaranteed to be round-trippable during decoding. If the input Datum
+// is (SQL) NULL, no inverted index keys will be produced, because inverted
+// indexes cannot and do not need to satisfy the predicate col IS NULL.
 func EncodeInvertedIndexTableKeys(val tree.Datum, inKey []byte) (key [][]byte, err error) {
 	if val == tree.DNull {
 		return nil, nil
 	}
-	switch t := tree.UnwrapDatum(nil, val).(type) {
-	case *tree.DJSON:
-		return json.EncodeInvertedIndexKeys(inKey, (t.JSON))
+	datum := tree.UnwrapDatum(nil, val)
+	switch val.ResolvedType().Family() {
+	case types.JsonFamily:
+		return json.EncodeInvertedIndexKeys(inKey, val.(*tree.DJSON).JSON)
+	case types.ArrayFamily:
+		return encodeArrayInvertedIndexTableKeys(val.(*tree.DArray), inKey)
 	}
-	return nil, errors.AssertionFailedf("trying to apply inverted index to non JSON type")
+	return nil, errors.AssertionFailedf("trying to apply inverted index to unsupported type %s", datum.ResolvedType())
+}
+
+// encodeArrayInvertedIndexTableKeys returns a list of inverted index keys for
+// the given input array, one per entry in the array. The input inKey is
+// prefixed to all returned keys.
+// N.B.: This won't return any keys for
+func encodeArrayInvertedIndexTableKeys(val *tree.DArray, inKey []byte) (key [][]byte, err error) {
+	outKeys := make([][]byte, len(val.Array))
+	for i := range val.Array {
+		d := val.Array[i]
+		if d == tree.DNull {
+			// We don't need to make keys for NULL, since in SQL:
+			// SELECT ARRAY[1, NULL, 2] @> ARRAY[NULL]
+			// returns false.
+			continue
+		}
+		outKey := make([]byte, len(inKey))
+		copy(outKey, inKey)
+		newKey, err := EncodeTableKey(outKey, d, encoding.Ascending)
+		if err != nil {
+			return nil, err
+		}
+		outKeys[i] = newKey
+	}
+	outKeys = unique.UniquifyByteSlices(outKeys)
+	return outKeys, nil
 }
 
 // EncodePrimaryIndex constructs a list of k/v pairs for a row encoded as a primary index.
diff --git a/pkg/sql/sqlbase/structured.go b/pkg/sql/sqlbase/structured.go
index 62f55bf0e1dd..344d0cefb79b 100644
--- a/pkg/sql/sqlbase/structured.go
+++ b/pkg/sql/sqlbase/structured.go
@@ -2231,7 +2231,8 @@ func ColumnTypeIsIndexable(t *types.T) bool {
 // ColumnTypeIsInvertedIndexable returns whether the type t is valid to be indexed
 // using an inverted index.
 func ColumnTypeIsInvertedIndexable(t *types.T) bool {
-	return t.Family() == types.JsonFamily
+	family := t.Family()
+	return family == types.JsonFamily || family == types.ArrayFamily
 }
 
 func notIndexableError(cols []ColumnDescriptor, inverted bool) error {