Skip to content

Commit c6860d3

Browse files
committed
rowblk: fix IsLowerBound to account for synthetic suffix
`Iter.firstUserKey` has the synthetic prefix prepended but the on-disk suffix is preserved; suffix replacement is only applied per-returned key via `maybeReplaceSuffix`. Comparing `firstUserKey` directly against the probe could return true even though the iterator's actual first key (after synthetic suffix replacement) sorts strictly less than the probe, violating the `blockiter.Data.IsLowerBound` contract. When a synthetic suffix is in effect, only compare the prefix portion of `firstUserKey` and require it to be strictly greater than the probe. The contract permits false negatives, so this conservative comparison is sound and avoids reasoning about the unknown suffix. Adds `TestIsLowerBoundRandomized` which cross-checks `IsLowerBound` against `First()` across random blocks and transform combinations, and documents the transform state of `firstUserKey`.
1 parent 36a5551 commit c6860d3

2 files changed

Lines changed: 89 additions & 0 deletions

File tree

sstable/rowblk/rowblk_iter.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,10 @@ type Iter struct {
187187
cachedBuf []byte
188188
handle block.BufferHandle
189189
// for block iteration for already loaded blocks.
190+
//
191+
// firstUserKey is the user key of the first KV in the block. It has the
192+
// synthetic prefix applied (if any), but does NOT have the synthetic suffix
193+
// applied — the on-disk suffix is preserved.
190194
firstUserKey []byte
191195
lazyValueHandling struct {
192196
getValue block.GetInternalValueForPrefixAndValueHandler
@@ -541,6 +545,15 @@ func (i *Iter) cacheEntry() {
541545
// IsLowerBound implements the block.DataBlockIterator interface.
542546
func (i *Iter) IsLowerBound(k []byte) bool {
543547
// Note: we ignore HideObsoletePoints, but false negatives are allowed.
548+
if i.transforms.HasSyntheticSuffix() {
549+
// firstUserKey has the synthetic prefix applied, but its suffix is the
550+
// original on-disk suffix — not the synthetic suffix that the iterator will
551+
// actually return. Instead of applying the synthetic suffix, we
552+
// conservatively require firstUserKey's prefix portion to be strictly
553+
// greater than k (false negatives are allowed).
554+
firstPrefix := i.firstUserKey[:i.split(i.firstUserKey)]
555+
return i.cmp(firstPrefix, k) > 0
556+
}
544557
return i.cmp(i.firstUserKey, k) >= 0
545558
}
546559

sstable/rowblk/rowblk_iter_test.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,11 @@ package rowblk
77
import (
88
"bytes"
99
"fmt"
10+
"math/rand/v2"
11+
"slices"
1012
"strings"
1113
"testing"
14+
"time"
1215
"unsafe"
1316

1417
"github.com/cockroachdb/datadriven"
@@ -465,3 +468,76 @@ func TestBlockSyntheticSuffix(t *testing.T) {
465468
func ikey(s string) base.InternalKey {
466469
return base.InternalKey{UserKey: []byte(s)}
467470
}
471+
472+
func TestIsLowerBoundRand(t *testing.T) {
473+
cmp := testkeys.Comparer.Compare
474+
suffixCmp := testkeys.Comparer.ComparePointSuffixes
475+
split := testkeys.Comparer.Split
476+
477+
prefixes := []string{"a", "b", "c", "d", "e", "p/", "zzz/"}
478+
suffixes := []string{"", "@1", "@5", "@10", "@20", "@100"}
479+
480+
seed := uint64(time.Now().UnixNano())
481+
t.Logf("seed: %d", seed)
482+
rng := rand.New(rand.NewPCG(seed, seed))
483+
randKey := func() string {
484+
return prefixes[rng.IntN(len(prefixes))] + suffixes[rng.IntN(len(suffixes))]
485+
}
486+
487+
for iter := 0; iter < 1000; iter++ {
488+
// Construct a block with a random combination of prefixes and suffixes.
489+
n := 1 + rng.IntN(20)
490+
keySet := make(map[string]struct{}, n)
491+
for len(keySet) < n {
492+
keySet[randKey()] = struct{}{}
493+
}
494+
keys := make([]string, 0, n)
495+
for k := range keySet {
496+
keys = append(keys, k)
497+
}
498+
slices.SortFunc(keys, func(a, b string) int { return cmp([]byte(a), []byte(b)) })
499+
500+
w := &Writer{RestartInterval: 1 + rng.IntN(4)}
501+
for _, k := range keys {
502+
require.NoError(t, w.Add(ikey(k), nil))
503+
}
504+
blk := w.Finish()
505+
506+
// Pick a random prefix and suffix for the transform.
507+
var synthPrefix, synthSuffix []byte
508+
if rand.IntN(2) == 0 {
509+
synthPrefix = []byte(prefixes[rng.IntN(len(prefixes))])
510+
}
511+
if rand.IntN(2) == 0 {
512+
synthSuffix = []byte(suffixes[rng.IntN(len(suffixes))])
513+
}
514+
transforms := block.IterTransforms{
515+
SyntheticPrefixAndSuffix: block.MakeSyntheticPrefixAndSuffix(synthPrefix, synthSuffix),
516+
}
517+
518+
it, err := NewIter(cmp, suffixCmp, split, blk, transforms)
519+
require.NoError(t, err)
520+
521+
kv := it.First()
522+
require.NotNil(t, kv)
523+
// firstKey is the key after the transform.
524+
firstKey := slices.Clone(kv.K.UserKey)
525+
526+
for probe := 0; probe < 50; probe++ {
527+
probeKey := []byte(randKey())
528+
if rand.IntN(2) == 0 {
529+
// The transformed block keys are random keys with synthPrefix
530+
// prepended. To generate more interesting probe keys, sometimes prepend
531+
// this prefix to the random key.
532+
probeKey = append(slices.Clip(synthPrefix), probeKey...)
533+
}
534+
if it.IsLowerBound(probeKey) {
535+
if cmp(firstKey, probeKey) < 0 {
536+
t.Fatalf("IsLowerBound(%q)=true but First()=%q < probe (transforms=%+v, keys=%v)",
537+
probeKey, firstKey, transforms, keys)
538+
}
539+
}
540+
}
541+
require.NoError(t, it.Close())
542+
}
543+
}

0 commit comments

Comments
 (0)