Skip to content

Commit

Permalink
sql: make secondary indexes not write empty k/v's + bugfixes for primary
Browse files Browse the repository at this point in the history
key changes

Fixes cockroachdb#45223.

Depends on cockroachdb#45226 to land first.

This PR fixes many bugs around secondary index encodings
and CRUD operations k/v reads and writes.

* Fixes a problem secondary indexes would write empty
  k/v's if it contained a family that had all null values.
* Fix multiple bugs where updates to a table during an online
  primary key change could result an inconsistent
  new primary key.
* Fix an assumption in the updater that assumed indexes
  always had the same number of k/v's. The logic has been
  updated to perform a sort of merge operation to decide
  what k/v's to insert/delete during the update operation.
* Increased testing around secondary indexes k/vs and
  schema change operations.

The release note is None because these are all bugs
introduced in 20.1.

Release note: None
  • Loading branch information
rohany committed Feb 25, 2020
1 parent ca557e0 commit 0462816
Show file tree
Hide file tree
Showing 13 changed files with 548 additions and 61 deletions.
20 changes: 20 additions & 0 deletions pkg/sql/alter_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ func (n *alterTableNode) startExec(params runParams) error {
CreatedExplicitly: true,
EncodingType: sqlbase.PrimaryIndexEncoding,
Type: sqlbase.IndexDescriptor_FORWARD,
Version: sqlbase.SecondaryIndexFamilyFormatVersion,
}

// If the new index is requested to be sharded, set up the index descriptor
Expand Down Expand Up @@ -444,6 +445,25 @@ func (n *alterTableNode) startExec(params runParams) error {
return err
}

// Ensure that the new primary index stores all columns in the table. We can't
// use AllocateID's to fill the stored columns here because it assumes
// that the indexed columns are n.PrimaryIndex.ColumnIDs, but here we want
// to consider the indexed columns to be newPrimaryIndexDesc.ColumnIDs.
newPrimaryIndexDesc.StoreColumnNames, newPrimaryIndexDesc.StoreColumnIDs = nil, nil
for _, col := range n.tableDesc.Columns {
containsCol := false
for _, colID := range newPrimaryIndexDesc.ColumnIDs {
if colID == col.ID {
containsCol = true
break
}
}
if !containsCol {
newPrimaryIndexDesc.StoreColumnIDs = append(newPrimaryIndexDesc.StoreColumnIDs, col.ID)
newPrimaryIndexDesc.StoreColumnNames = append(newPrimaryIndexDesc.StoreColumnNames, col.Name)
}
}

if t.Interleave != nil {
if err := params.p.addInterleave(params.ctx, n.tableDesc, newPrimaryIndexDesc, t.Interleave); err != nil {
return err
Expand Down
5 changes: 3 additions & 2 deletions pkg/sql/backfill/backfill.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,11 +422,12 @@ func (ib *IndexBackfiller) BuildIndexEntriesChunk(
// We're resetting the length of this slice for variable length indexes such as inverted
// indexes which can append entries to the end of the slice. If we don't do this, then everything
// EncodeSecondaryIndexes appends to secondaryIndexEntries for a row, would stay in the slice for
// subsequent rows and we would then have duplicates in entries on output.
// subsequent rows and we would then have duplicates in entries on output. Additionally, we do
// not want to include empty k/v pairs while backfilling.
buffer = buffer[:len(ib.added)]
if buffer, err = sqlbase.EncodeSecondaryIndexes(
tableDesc.TableDesc(), ib.added, ib.colIdxMap,
ib.rowVals, buffer); err != nil {
ib.rowVals, buffer, false /* includeEmpty */); err != nil {
return nil, nil, err
}
entries = append(entries, buffer...)
Expand Down
193 changes: 193 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/secondary_index_column_families
Original file line number Diff line number Diff line change
Expand Up @@ -354,3 +354,196 @@ message LIKE 'Scan%'
ORDER BY message
----
Scan /Table/57/2/2/{0-1}

# Ensure that when backfilling an index we only insert the needed k/vs.
statement ok
DROP TABLE IF EXISTS t;
CREATE TABLE t (
x INT PRIMARY KEY, y INT, z INT, w INT,
FAMILY (y), FAMILY (x), FAMILY (z), FAMILY (w)
);
INSERT INTO t VALUES (1, 2, NULL, 3), (4, 5, 6, NULL), (8, 9, NULL, NULL);
CREATE INDEX i ON t (y) STORING (z, w)

query IIII rowsort
SET TRACING=on,kv,results;
SELECT * FROM t@i;
SET TRACING=off
----
1 2 NULL 3
4 5 6 NULL
8 9 NULL NULL

# Ensure by scanning that we fetch 2 k/v's for row (1, 2, NULL, 3),
# 2 k/v's for row (4, 5, 6, NULL), and 1 k/v for row (8, 9, NULL, NULL).
query T
SELECT message FROM [SHOW KV TRACE FOR SESSION] WHERE
message LIKE 'fetched%'
ORDER BY message
----
fetched: /t/i/2/1 -> NULL
fetched: /t/i/2/1/w -> /3
fetched: /t/i/5/4 -> NULL
fetched: /t/i/5/4/z -> /6
fetched: /t/i/9/8 -> NULL

statement ok
DROP TABLE IF EXISTS t;
CREATE TABLE t (
x INT PRIMARY KEY, y INT, z INT, w INT,
FAMILY (y), FAMILY (x), FAMILY (z), FAMILY (w)
);
INSERT INTO t VALUES (1, 2, NULL, NULL)

statement ok
BEGIN

# Place i on the mutations queue in a delete only state.
statement ok
CREATE INDEX i ON t (y) STORING (z, w)

statement ok
SET TRACING=on,kv,results;
UPDATE t SET z = 3 WHERE y = 2;
SET TRACING=off

# Because i is in a delete only state, we should see a delete
# for each k/v for i for the row (1, 2, NULL, NULL).
query T
SELECT message FROM [SHOW KV TRACE FOR SESSION] WHERE
message LIKE 'Del%'
ORDER BY message
----
Del /Table/59/2/2/1/0
Del /Table/59/2/2/1/2/1
Del /Table/59/2/2/1/3/1

statement ok
COMMIT

query IIII
SELECT * FROM t@i
----
1 2 3 NULL

statement ok
DROP TABLE IF EXISTS t;
CREATE TABLE t (
x INT PRIMARY KEY, y INT, a INT, b INT, c INT, d INT, e INT, f INT,
FAMILY (x), FAMILY (y), FAMILY (a, b), FAMILY (c, d), FAMILY (e), FAMILY (f),
INDEX i1 (y) STORING (a, b, c, d, e, f),
UNIQUE INDEX i2 (y) STORING (a, b, c, d, e, f)
);

# Ensure we only insert the correct keys.
statement ok
SET TRACING=on,kv,results;
INSERT INTO t VALUES (1, 2, 3, NULL, 5, 6, NULL, 8);
SET TRACING=off

query T
SELECT message FROM [SHOW KV TRACE FOR SESSION] WHERE
message LIKE 'InitPut%'
ORDER BY message
----
InitPut /Table/60/2/2/1/0 -> /BYTES/
InitPut /Table/60/2/2/1/2/1 -> /TUPLE/3:3:Int/3
InitPut /Table/60/2/2/1/3/1 -> /TUPLE/5:5:Int/5/1:6:Int/6
InitPut /Table/60/2/2/1/5/1 -> /TUPLE/8:8:Int/8
InitPut /Table/60/3/2/0 -> /BYTES/0x89
InitPut /Table/60/3/2/2/1 -> /TUPLE/3:3:Int/3
InitPut /Table/60/3/2/3/1 -> /TUPLE/5:5:Int/5/1:6:Int/6
InitPut /Table/60/3/2/5/1 -> /TUPLE/8:8:Int/8

# Test some cases of the updater.
statement ok
SET TRACING=on,kv,results;
UPDATE t SET b = 4, c = NULL, d = NULL, e = 7, f = NULL WHERE y = 2;
SET TRACING=off

query IIIIIIII
SELECT * FROM t@i2
----
1 2 3 4 NULL NULL 7 NULL

query T
SELECT message FROM [SHOW KV TRACE FOR SESSION] WHERE
message LIKE 'Put /Table/60/2/%' OR
message LIKE 'Del /Table/60/2/%' OR
message LIKE 'CPut /Table/60/2/%'
----
CPut /Table/60/2/2/1/2/1 -> /TUPLE/3:3:Int/3/1:4:Int/4 (replacing raw_bytes:"\000\000\000\000\n3\006" timestamp:<> , if exists)
Del /Table/60/2/2/1/3/1
CPut /Table/60/2/2/1/4/1 -> /TUPLE/7:7:Int/7 (expecting does not exist)
Del /Table/60/2/2/1/5/1

statement ok
INSERT INTO t VALUES (3, 3, NULL, NULL, NULL, NULL, NULL, NULL)

statement ok
SET TRACING=on,kv,results;
UPDATE t SET a = 10, b = 11, c = 12, d = 13, e = 14, f = 15 WHERE y = 3;
SET TRACING=off

query T
SELECT message FROM [SHOW KV TRACE FOR SESSION] WHERE
message LIKE 'Put /Table/60/2/%' OR
message LIKE 'Del /Table/60/2/%' OR
message LIKE 'CPut /Table/60/2/%'
----
CPut /Table/60/2/3/3/2/1 -> /TUPLE/3:3:Int/10/1:4:Int/11 (expecting does not exist)
CPut /Table/60/2/3/3/3/1 -> /TUPLE/5:5:Int/12/1:6:Int/13 (expecting does not exist)
CPut /Table/60/2/3/3/4/1 -> /TUPLE/7:7:Int/14 (expecting does not exist)
CPut /Table/60/2/3/3/5/1 -> /TUPLE/8:8:Int/15 (expecting does not exist)

statement ok
SET TRACING=on,kv,results;
UPDATE t SET a = NULL, b = NULL, c = NULL, d = NULL, e = NULL, f = NULL WHERE y = 3;
SET TRACING=off

query T
SELECT message FROM [SHOW KV TRACE FOR SESSION] WHERE
message LIKE 'Put /Table/60/2/%' OR
message LIKE 'Del /Table/60/2/%' OR
message LIKE 'CPut /Table/60/2/%'
----
Del /Table/60/2/3/3/2/1
Del /Table/60/2/3/3/3/1
Del /Table/60/2/3/3/4/1
Del /Table/60/2/3/3/5/1


statement ok
INSERT INTO t VALUES (20, 21, 22, NULL, NULL, 25, NULL, 27);
SET TRACING=on,kv,results;
UPDATE t SET y = 22 WHERE y = 21;
SET TRACING=off

query T
SELECT message FROM [SHOW KV TRACE FOR SESSION] WHERE
message LIKE 'Put /Table/60/2/%' OR
message LIKE 'Del /Table/60/2/%' OR
message LIKE 'CPut /Table/60/2/%'
----
Del /Table/60/2/21/20/0
CPut /Table/60/2/22/20/0 -> /BYTES/ (expecting does not exist)
Del /Table/60/2/21/20/2/1
CPut /Table/60/2/22/20/2/1 -> /TUPLE/3:3:Int/22 (expecting does not exist)
Del /Table/60/2/21/20/3/1
CPut /Table/60/2/22/20/3/1 -> /TUPLE/6:6:Int/25 (expecting does not exist)
Del /Table/60/2/21/20/5/1
CPut /Table/60/2/22/20/5/1 -> /TUPLE/8:8:Int/27 (expecting does not exist)

query IIIIIIII rowsort
SELECT * FROM t@i1
----
1 2 3 4 NULL NULL 7 NULL
3 3 NULL NULL NULL NULL NULL NULL
20 22 22 NULL NULL 25 NULL 27

query IIIIIIII rowsort
SELECT * FROM t@i2
----
1 2 3 4 NULL NULL 7 NULL
3 3 NULL NULL NULL NULL NULL NULL
20 22 22 NULL NULL 25 NULL 27
6 changes: 4 additions & 2 deletions pkg/sql/row/deleter.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,9 @@ func (rd *Deleter) DeleteRow(

// Delete the row from any secondary indices.
for i := range rd.Helper.Indexes {
// We want to include empty k/v pairs because we want to delete all k/v's for this row.
entries, err := sqlbase.EncodeSecondaryIndex(
rd.Helper.TableDesc.TableDesc(), &rd.Helper.Indexes[i], rd.FetchColIDtoRowIndex, values)
rd.Helper.TableDesc.TableDesc(), &rd.Helper.Indexes[i], rd.FetchColIDtoRowIndex, values, true /* includeEmpty */)
if err != nil {
return err
}
Expand Down Expand Up @@ -212,8 +213,9 @@ func (rd *Deleter) DeleteIndexRow(
return err
}
}
// We want to include empty k/v pairs because we want to delete all k/v's for this row.
secondaryIndexEntry, err := sqlbase.EncodeSecondaryIndex(
rd.Helper.TableDesc.TableDesc(), idx, rd.FetchColIDtoRowIndex, values)
rd.Helper.TableDesc.TableDesc(), idx, rd.FetchColIDtoRowIndex, values, true /* includeEmpty */)
if err != nil {
return err
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/sql/row/fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -1358,7 +1358,9 @@ func (rf *Fetcher) checkSecondaryIndexDatumEncodings(ctx context.Context) error
values[i] = table.row[i].Datum
}

indexEntries, err := sqlbase.EncodeSecondaryIndex(table.desc.TableDesc(), table.index, table.colIdxMap, values)
// The below code makes incorrect checks (#45256).
indexEntries, err := sqlbase.EncodeSecondaryIndex(
table.desc.TableDesc(), table.index, table.colIdxMap, values, false /* includeEmpty */)
if err != nil {
return err
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/row/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ func newRowHelper(
// secondaryIndexEntries are only valid until the next call to encodeIndexes or
// encodeSecondaryIndexes.
func (rh *rowHelper) encodeIndexes(
colIDtoRowIndex map[sqlbase.ColumnID]int, values []tree.Datum,
colIDtoRowIndex map[sqlbase.ColumnID]int, values []tree.Datum, includeEmpty bool,
) (primaryIndexKey []byte, secondaryIndexEntries []sqlbase.IndexEntry, err error) {
primaryIndexKey, err = rh.encodePrimaryIndex(colIDtoRowIndex, values)
if err != nil {
return nil, nil, err
}
secondaryIndexEntries, err = rh.encodeSecondaryIndexes(colIDtoRowIndex, values)
secondaryIndexEntries, err = rh.encodeSecondaryIndexes(colIDtoRowIndex, values, includeEmpty)
if err != nil {
return nil, nil, err
}
Expand All @@ -86,13 +86,13 @@ func (rh *rowHelper) encodePrimaryIndex(
// secondaryIndexEntries are only valid until the next call to encodeIndexes or
// encodeSecondaryIndexes.
func (rh *rowHelper) encodeSecondaryIndexes(
colIDtoRowIndex map[sqlbase.ColumnID]int, values []tree.Datum,
colIDtoRowIndex map[sqlbase.ColumnID]int, values []tree.Datum, includeEmpty bool,
) (secondaryIndexEntries []sqlbase.IndexEntry, err error) {
if len(rh.indexEntries) != len(rh.Indexes) {
rh.indexEntries = make([]sqlbase.IndexEntry, len(rh.Indexes))
}
rh.indexEntries, err = sqlbase.EncodeSecondaryIndexes(
rh.TableDesc.TableDesc(), rh.Indexes, colIDtoRowIndex, values, rh.indexEntries)
rh.TableDesc.TableDesc(), rh.Indexes, colIDtoRowIndex, values, rh.indexEntries, includeEmpty)
if err != nil {
return nil, err
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/sql/row/inserter.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,9 @@ func (ri *Inserter) InsertRow(
}
}

primaryIndexKey, secondaryIndexEntries, err := ri.Helper.encodeIndexes(ri.InsertColIDtoRowIndex, values)
// We don't want to insert any empty k/v's, so set includeEmpty to false.
primaryIndexKey, secondaryIndexEntries, err := ri.Helper.encodeIndexes(
ri.InsertColIDtoRowIndex, values, false /* includeEmpty */)
if err != nil {
return err
}
Expand Down
Loading

0 comments on commit 0462816

Please sign in to comment.