Skip to content

Commit 3faaec6

Browse files
craig[bot]jbowensjeffswenson
committed
145096: storage: always write RANGEKEYDELs in ClearRangeWithHeuristic r=sumeerbhola a=jbowens Previously, ClearRangeWithHeuristic applied a heuristic that would switch between writing RANGEKEYUNSETs versus RANGEKEYDELs. When multiple range keys overlap, unsetting each individual range key with RANGEKEYUNSET is strictly worse than writing a single RANGEKEYDEL over the same span. This commit adapts ClearRangeWithHeuristic to write a single RANGEKEYDEL over the cleared span, but only if there are indeed range keys within the span. Setting a broad RANGEKEYDEL increases the likelihood that data can be removed using a delete-only compaction. Close #144954. Epic: none Release note: none 146098: logical: set origin id in tombstone updater r=jeffswenson a=jeffswenson Previously, the tombstone updater did not set an origin id when updating a tombstone. One critical use of the origin id is its used to filter LDR replication events. Events with an origin id are not replicated because replicating a replication write causes an infinite replication loop. Fixes: #146008 Release note: none Co-authored-by: Jackson Owens <[email protected]> Co-authored-by: Jeff Swenson <[email protected]>
3 parents 496efc3 + 614868e + b622042 commit 3faaec6

File tree

9 files changed

+126
-93
lines changed

9 files changed

+126
-93
lines changed

pkg/crosscluster/logical/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ go_test(
132132
"sql_row_reader_test.go",
133133
"sql_row_writer_test.go",
134134
"table_batch_handler_test.go",
135+
"tombstone_updater_test.go",
135136
"udf_row_processor_test.go",
136137
],
137138
data = glob(["testdata/**"]) + [

pkg/crosscluster/logical/tombstone_updater.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ func (tu *tombstoneUpdater) updateTombstone(
9797
// If updateTombstone is called in a transaction, create and run a batch
9898
// in the transaction.
9999
batch := txn.KV().NewBatch()
100+
batch.Header.WriteOptions = originID1Options
100101
if err := tu.addToBatch(ctx, txn.KV(), batch, mvccTimestamp, afterRow); err != nil {
101102
return err
102103
}
@@ -106,6 +107,7 @@ func (tu *tombstoneUpdater) updateTombstone(
106107
// 1pc transaction.
107108
return tu.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
108109
batch := txn.NewBatch()
110+
batch.Header.WriteOptions = originID1Options
109111
if err := tu.addToBatch(ctx, txn, batch, mvccTimestamp, afterRow); err != nil {
110112
return err
111113
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package logical
7+
8+
import (
9+
"context"
10+
"testing"
11+
12+
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
13+
"github.com/cockroachdb/cockroach/pkg/sql"
14+
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descs"
15+
"github.com/cockroachdb/cockroach/pkg/sql/catalog/desctestutils"
16+
"github.com/cockroachdb/cockroach/pkg/sql/catalog/lease"
17+
"github.com/cockroachdb/cockroach/pkg/sql/isql"
18+
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
19+
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
20+
"github.com/cockroachdb/cockroach/pkg/util/log"
21+
"github.com/stretchr/testify/require"
22+
)
23+
24+
func TestTombstoneUpdaterSetsOriginID(t *testing.T) {
25+
defer leaktest.AfterTest(t)()
26+
defer log.Scope(t).Close(t)
27+
28+
// This is a regression test for a bug in the tombstone updater. The
29+
// tombstone updater should always set the origin ID. Previously, it would
30+
// not set the origin id which caused logical replication to pick up the
31+
// tombstone update as a deletion event. This is undesirable because the
32+
// tombstone update case is only used when replicating deletes and if a
33+
// replicated write generates an LDR event, it leads to looping.
34+
35+
// Start server with two databases
36+
ctx := context.Background()
37+
server, s, runners, dbNames := setupServerWithNumDBs(t, ctx, testClusterBaseClusterArgs, 1, 2)
38+
defer server.Stopper().Stop(ctx)
39+
40+
// Create test table on both databases
41+
destRunner := runners[1]
42+
43+
// Create a tombstone updater
44+
desc := desctestutils.TestingGetMutableExistingTableDescriptor(
45+
s.DB(), s.Codec(), dbNames[0], "tab")
46+
sd := sql.NewInternalSessionData(ctx, s.ClusterSettings(), "" /* opName */)
47+
tu := newTombstoneUpdater(s.Codec(), s.DB(), s.LeaseManager().(*lease.Manager), desc.GetID(), sd, s.ClusterSettings())
48+
defer tu.ReleaseLeases(ctx)
49+
50+
// Set up 1 way logical replication. The replication stream is used to ensure
51+
// that the tombstone update will not be replicated as a deletion event.
52+
var jobID jobspb.JobID
53+
destRunner.QueryRow(t,
54+
"CREATE LOGICAL REPLICATION STREAM FROM TABLE a.tab ON $1 INTO TABLE b.tab",
55+
GetPGURLs(t, s, dbNames)[0].String()).Scan(&jobID)
56+
57+
// Write the row to the destination
58+
destRunner.Exec(t, "INSERT INTO tab VALUES (1, 42)")
59+
60+
row := tree.Datums{
61+
tree.NewDInt(tree.DInt(1)), // k
62+
tree.DNull, // v (deleted)
63+
}
64+
65+
_, err := tu.updateTombstone(ctx, nil, s.Clock().Now(), row)
66+
require.NoError(t, err)
67+
68+
config := s.ExecutorConfig().(sql.ExecutorConfig)
69+
err = sql.DescsTxn(ctx, &config, func(
70+
ctx context.Context, txn isql.Txn, descriptors *descs.Collection,
71+
) error {
72+
_, err = tu.updateTombstone(ctx, txn, s.Clock().Now(), row)
73+
return err
74+
})
75+
require.NoError(t, err)
76+
77+
// Wait for replication to advance
78+
WaitUntilReplicatedTime(t, s.Clock().Now(), destRunner, jobID)
79+
80+
// Verify the row still exists in the destination
81+
destRunner.CheckQueryResults(t, "SELECT pk, payload FROM tab", [][]string{
82+
{"1", "42"},
83+
})
84+
}

pkg/kv/kvserver/batcheval/cmd_clear_range.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,9 @@ func ClearRange(
150150
// If we're writing Pebble range tombstones, use ClearRangeWithHeuristic to
151151
// avoid writing tombstones across empty spans -- in particular, across the
152152
// range key span, since we expect range keys to be rare.
153-
const pointKeyThreshold, rangeKeyThreshold = 2, 2
153+
const pointKeyThreshold = 2
154154
if err := storage.ClearRangeWithHeuristic(
155-
ctx, readWriter, readWriter, from, to, pointKeyThreshold, rangeKeyThreshold,
155+
ctx, readWriter, readWriter, from, to, pointKeyThreshold,
156156
); err != nil {
157157
return result.Result{}, err
158158
}

pkg/kv/kvserver/client_merge_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4023,7 +4023,7 @@ func TestStoreRangeMergeRaftSnapshot(t *testing.T) {
40234023
EndKey: roachpb.RKey(keyEnd),
40244024
}
40254025
if err := storage.ClearRangeWithHeuristic(
4026-
ctx, receivingEng, &sst, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey(), 64, 8,
4026+
ctx, receivingEng, &sst, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey(), 64,
40274027
); err != nil {
40284028
return err
40294029
}

pkg/kv/kvserver/kvstorage/destroy.go

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,6 @@ const (
3131
// perhaps we should fix Pebble to handle large numbers of range tombstones in
3232
// an sstable better.
3333
ClearRangeThresholdPointKeys = 64
34-
35-
// ClearRangeThresholdRangeKeys is the threshold (as number of range keys)
36-
// beyond which we'll clear range data using a single RANGEKEYDEL across the
37-
// span rather than clearing individual range keys.
38-
ClearRangeThresholdRangeKeys = 8
3934
)
4035

4136
// ClearRangeDataOptions specify which parts of a Replica are to be destroyed.
@@ -78,14 +73,14 @@ func ClearRangeData(
7873
UnreplicatedByRangeID: opts.ClearUnreplicatedByRangeID,
7974
})
8075

81-
pointKeyThreshold, rangeKeyThreshold := ClearRangeThresholdPointKeys, ClearRangeThresholdRangeKeys
76+
pointKeyThreshold := ClearRangeThresholdPointKeys
8277
if opts.MustUseClearRange {
83-
pointKeyThreshold, rangeKeyThreshold = 1, 1
78+
pointKeyThreshold = 1
8479
}
8580

8681
for _, keySpan := range keySpans {
8782
if err := storage.ClearRangeWithHeuristic(
88-
ctx, reader, writer, keySpan.Key, keySpan.EndKey, pointKeyThreshold, rangeKeyThreshold,
83+
ctx, reader, writer, keySpan.Key, keySpan.EndKey, pointKeyThreshold,
8984
); err != nil {
9085
return err
9186
}

pkg/kv/kvserver/snapshot_apply_prepare.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,6 @@ func clearSubsumedReplicaDiskData(
272272
keySpans[i].EndKey,
273273
totalKeySpans[i].EndKey,
274274
kvstorage.ClearRangeThresholdPointKeys,
275-
kvstorage.ClearRangeThresholdRangeKeys,
276275
); err != nil {
277276
subsumedReplSST.Close()
278277
return nil, err

pkg/storage/engine.go

Lines changed: 30 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1589,24 +1589,22 @@ func WriteSyncNoop(eng Engine) error {
15891589
// either write a Pebble range tombstone or clear individual keys. If it uses
15901590
// a range tombstone, it will tighten the span to the first encountered key.
15911591
//
1592-
// pointKeyThreshold and rangeKeyThreshold specify the number of point/range
1593-
// keys respectively where it will switch from clearing individual keys to
1594-
// Pebble range tombstones (RANGEDEL or RANGEKEYDEL respectively). A threshold
1595-
// of 0 disables checking for and clearing that key type.
1592+
// The pointKeyThreshold parameter specifies the number of point keys where it
1593+
// will switch from clearing individual keys using point tombstones to clearing
1594+
// the entire range using Pebble range tombstones (RANGEDELs). The
1595+
// pointKeyThreshold value must be at least 1. NB: An initial scan will be done
1596+
// to determine the type of clear, so a large threshold will potentially involve
1597+
// scanning a large number of keys.
15961598
//
1597-
// NB: An initial scan will be done to determine the type of clear, so a large
1598-
// threshold will potentially involve scanning a large number of keys twice.
1599-
//
1600-
// TODO(erikgrinaker): Consider tightening the end of the range tombstone span
1601-
// too, by doing a SeekLT when we reach the threshold. It's unclear whether it's
1602-
// really worth it.
1599+
// ClearRangeWithHeuristic will also check for the existence of range keys, and
1600+
// if any exist, it will write a RANGEKEYDEL clearing all range keys in the span.
16031601
func ClearRangeWithHeuristic(
1604-
ctx context.Context,
1605-
r Reader,
1606-
w Writer,
1607-
start, end roachpb.Key,
1608-
pointKeyThreshold, rangeKeyThreshold int,
1602+
ctx context.Context, r Reader, w Writer, start, end roachpb.Key, pointKeyThreshold int,
16091603
) error {
1604+
if pointKeyThreshold < 1 {
1605+
return errors.AssertionFailedf("pointKeyThreshold must be at least 1")
1606+
}
1607+
16101608
clearPointKeys := func(r Reader, w Writer, start, end roachpb.Key, threshold int) error {
16111609
iter, err := r.NewEngineIterator(ctx, IterOptions{
16121610
KeyTypes: IterKeyTypePointsOnly,
@@ -1655,7 +1653,7 @@ func ClearRangeWithHeuristic(
16551653
return err
16561654
}
16571655

1658-
clearRangeKeys := func(r Reader, w Writer, start, end roachpb.Key, threshold int) error {
1656+
clearRangeKeys := func(r Reader, w Writer, start, end roachpb.Key) error {
16591657
iter, err := r.NewEngineIterator(ctx, IterOptions{
16601658
KeyTypes: IterKeyTypeRangesOnly,
16611659
LowerBound: start,
@@ -1666,51 +1664,29 @@ func ClearRangeWithHeuristic(
16661664
}
16671665
defer iter.Close()
16681666

1669-
// Scan, and drop a RANGEKEYDEL if we reach the threshold.
1670-
var ok bool
1671-
var count int
1672-
var firstKey roachpb.Key
1673-
for ok, err = iter.SeekEngineKeyGE(EngineKey{Key: start}); ok; ok, err = iter.NextEngineKey() {
1674-
count += len(iter.EngineRangeKeys())
1675-
if len(firstKey) == 0 {
1676-
bounds, err := iter.EngineRangeBounds()
1677-
if err != nil {
1678-
return err
1679-
}
1680-
firstKey = bounds.Key.Clone()
1681-
}
1682-
if count >= threshold {
1683-
return w.ClearRawRange(firstKey, end, false /* pointKeys */, true /* rangeKeys */)
1684-
}
1685-
}
1686-
if err != nil || count == 0 {
1667+
ok, err := iter.SeekEngineKeyGE(EngineKey{Key: start})
1668+
if err != nil {
16871669
return err
16881670
}
1689-
// Clear individual range keys.
1690-
for ok, err = iter.SeekEngineKeyGE(EngineKey{Key: start}); ok; ok, err = iter.NextEngineKey() {
1691-
bounds, err := iter.EngineRangeBounds()
1692-
if err != nil {
1693-
return err
1694-
}
1695-
for _, v := range iter.EngineRangeKeys() {
1696-
if err := w.ClearEngineRangeKey(bounds.Key, bounds.EndKey, v.Version); err != nil {
1697-
return err
1698-
}
1699-
}
1671+
if !ok {
1672+
// No range keys in the span.
1673+
return nil
17001674
}
1701-
return err
1702-
}
1703-
1704-
if pointKeyThreshold > 0 {
1705-
if err := clearPointKeys(r, w, start, end, pointKeyThreshold); err != nil {
1675+
bounds, err := iter.EngineRangeBounds()
1676+
if err != nil {
17061677
return err
17071678
}
1679+
// TODO(erikgrinaker): Consider tightening the end of the range
1680+
// tombstone span too, by doing a SeekLT when we reach the threshold.
1681+
// It's unclear whether it's really worth it.
1682+
return w.ClearRawRange(bounds.Key, end, false /* pointKeys */, true /* rangeKeys */)
17081683
}
17091684

1710-
if rangeKeyThreshold > 0 {
1711-
if err := clearRangeKeys(r, w, start, end, rangeKeyThreshold); err != nil {
1712-
return err
1713-
}
1685+
if err := clearPointKeys(r, w, start, end, pointKeyThreshold); err != nil {
1686+
return err
1687+
}
1688+
if err := clearRangeKeys(r, w, start, end); err != nil {
1689+
return err
17141690
}
17151691

17161692
return nil

pkg/storage/engine_test.go

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1813,49 +1813,25 @@ func TestEngineClearRange(t *testing.T) {
18131813

18141814
"ClearRangeWithHeuristic individual": {
18151815
clearRange: func(rw ReadWriter, start, end roachpb.Key) error {
1816-
return ClearRangeWithHeuristic(ctx, rw, rw, start, end, math.MaxInt, math.MaxInt)
1816+
return ClearRangeWithHeuristic(ctx, rw, rw, start, end, math.MaxInt)
18171817
},
18181818
clearsPointKeys: true,
18191819
clearsRangeKeys: true,
18201820
clearsIntents: false,
18211821
},
18221822
"ClearRangeWithHeuristic ranged": {
18231823
clearRange: func(rw ReadWriter, start, end roachpb.Key) error {
1824-
return ClearRangeWithHeuristic(ctx, rw, rw, start, end, 1, 1)
1824+
return ClearRangeWithHeuristic(ctx, rw, rw, start, end, 1)
18251825
},
18261826
clearsPointKeys: true,
18271827
clearsRangeKeys: true,
18281828
clearsIntents: false,
18291829
},
18301830
"ClearRangeWithHeuristic point keys individual": {
18311831
clearRange: func(rw ReadWriter, start, end roachpb.Key) error {
1832-
return ClearRangeWithHeuristic(ctx, rw, rw, start, end, math.MaxInt, 0)
1832+
return ClearRangeWithHeuristic(ctx, rw, rw, start, end, math.MaxInt)
18331833
},
18341834
clearsPointKeys: true,
1835-
clearsRangeKeys: false,
1836-
clearsIntents: false,
1837-
},
1838-
"ClearRangeWithHeuristic point keys ranged": {
1839-
clearRange: func(rw ReadWriter, start, end roachpb.Key) error {
1840-
return ClearRangeWithHeuristic(ctx, rw, rw, start, end, 1, 0)
1841-
},
1842-
clearsPointKeys: true,
1843-
clearsRangeKeys: false,
1844-
clearsIntents: false,
1845-
},
1846-
"ClearRangeWithHeuristic range keys individual": {
1847-
clearRange: func(rw ReadWriter, start, end roachpb.Key) error {
1848-
return ClearRangeWithHeuristic(ctx, rw, rw, start, end, 0, math.MaxInt)
1849-
},
1850-
clearsPointKeys: false,
1851-
clearsRangeKeys: true,
1852-
clearsIntents: false,
1853-
},
1854-
"ClearRangeWithHeuristic range keys ranged": {
1855-
clearRange: func(rw ReadWriter, start, end roachpb.Key) error {
1856-
return ClearRangeWithHeuristic(ctx, rw, rw, start, end, 0, 1)
1857-
},
1858-
clearsPointKeys: false,
18591835
clearsRangeKeys: true,
18601836
clearsIntents: false,
18611837
},

0 commit comments

Comments
 (0)