Skip to content

Commit 78f8281

Browse files
committed
db: avoid duplicate range deletion iterator in the level checker
In 1a2a77b we began interleaving the boundaries of range deletions from level iterators. This interleaving avoids stepping away from a table until its range deletions are no longer useful. However, this interleaving required its own range deletion iterator, necessitating opening two separate range deletion iterators into the same block. This commit updates the levelIter implementation to support interleaving range deletions without using a rangeDelIterSetter callback and adds a new getTombstone method that may be used to retrieve the tombstone at the current iterator's position. In this commit we update the level checker to no longer request its own range deletion iterator through rangeDelIterSetter. Instead use the levelIter's getTombstone method to read the current tombstone within each level, avoiding opening the duplicate range deletion iterator. This commit is a subset of the changes in the unmerged PR #3600. Future work will apply the same technique to the merging iterator, allowing ordinary user iterators to avoid the duplicate range deletion iterator too.
1 parent 9298029 commit 78f8281

3 files changed

Lines changed: 120 additions & 74 deletions

File tree

internal/rangedel/rangedel.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
package rangedel
66

77
import (
8+
"sync"
9+
810
"github.com/cockroachdb/pebble/internal/base"
911
"github.com/cockroachdb/pebble/internal/invariants"
1012
"github.com/cockroachdb/pebble/internal/keyspan"
@@ -61,3 +63,48 @@ func DecodeIntoSpan(cmp base.Compare, ik base.InternalKey, v []byte, s *keyspan.
6163
s.Keys = append(s.Keys, keyspan.Key{Trailer: ik.Trailer})
6264
return nil
6365
}
66+
67+
// Interleave takes a point iterator and a range deletion iterator, returning an
68+
// iterator that interleaves range deletion boundary keys at the maximal
69+
// sequence number among the stream of point keys.
70+
//
71+
// In addition, Interleave returns a function that may be used to retrieve the
72+
// range tombstone overlapping the current iterator position, if any. If range
73+
// deletion iterator is nil, the returned function is nil.
74+
//
75+
// The returned iterator must only be closed once.
76+
func Interleave(
77+
comparer *base.Comparer, iter base.InternalIterator, rangeDelIter keyspan.FragmentIterator,
78+
) (base.InternalIterator, func() *keyspan.Span) {
79+
// If there is no range deletion iterator, don't bother using an interleaving
80+
// iterator. We can return iter verbatim and a func that unconditionally
81+
// returns nil.
82+
if rangeDelIter == nil {
83+
return iter, nil
84+
}
85+
86+
ii := interleavingIterPool.Get().(*interleavingIter)
87+
ii.Init(comparer, iter, rangeDelIter, keyspan.InterleavingIterOpts{
88+
InterleaveEndKeys: true,
89+
})
90+
return ii, ii.Span
91+
}
92+
93+
var interleavingIterPool = sync.Pool{
94+
New: func() interface{} {
95+
return &interleavingIter{}
96+
},
97+
}
98+
99+
type interleavingIter struct {
100+
keyspan.InterleavingIter
101+
}
102+
103+
// Close closes the interleaving iterator and returns the interleaving iterator
104+
// to the pool.
105+
func (i *interleavingIter) Close() error {
106+
err := i.InterleavingIter.Close()
107+
*i = interleavingIter{}
108+
interleavingIterPool.Put(i)
109+
return err
110+
}

level_checker.go

Lines changed: 21 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"github.com/cockroachdb/pebble/internal/base"
1919
"github.com/cockroachdb/pebble/internal/keyspan"
2020
"github.com/cockroachdb/pebble/internal/manifest"
21+
"github.com/cockroachdb/pebble/internal/rangedel"
2122
"github.com/cockroachdb/pebble/sstable"
2223
"github.com/cockroachdb/pebble/sstable/blob"
2324
"github.com/cockroachdb/pebble/sstable/block"
@@ -55,19 +56,11 @@ import (
5556

5657
// The per-level structure used by simpleMergingIter.
5758
type simpleMergingIterLevel struct {
58-
iter internalIterator
59-
rangeDelIter keyspan.FragmentIterator
60-
61-
iterKV *base.InternalKV
62-
tombstone *keyspan.Span
63-
}
64-
65-
func (ml *simpleMergingIterLevel) setRangeDelIter(iter keyspan.FragmentIterator) {
66-
ml.tombstone = nil
67-
if ml.rangeDelIter != nil {
68-
ml.rangeDelIter.Close()
69-
}
70-
ml.rangeDelIter = iter
59+
iter internalIterator
60+
// getTombstone returns the range deletion tombstone covering the current
61+
// iterator position. getTombstone must not be called after iter is closed.
62+
getTombstone func() *keyspan.Span
63+
iterKV *base.InternalKV
7164
}
7265

7366
type simpleMergingIter struct {
@@ -114,26 +107,6 @@ func (m *simpleMergingIter) init(
114107
}
115108
}
116109
m.heap.init()
117-
118-
if m.heap.len() == 0 {
119-
return
120-
}
121-
m.positionRangeDels()
122-
}
123-
124-
// Positions all the rangedel iterators at or past the current top of the
125-
// heap, using SeekGE().
126-
func (m *simpleMergingIter) positionRangeDels() {
127-
item := &m.heap.items[0]
128-
for i := range m.levels {
129-
l := &m.levels[i]
130-
if l.rangeDelIter == nil {
131-
continue
132-
}
133-
t, err := l.rangeDelIter.SeekGE(item.kv.K.UserKey)
134-
m.err = firstError(m.err, err)
135-
l.tombstone = t
136-
}
137110
}
138111

139112
// Returns true if not yet done.
@@ -161,6 +134,7 @@ func (m *simpleMergingIter) step() bool {
161134
if l.iterKV == nil {
162135
m.err = errors.CombineErrors(l.iter.Error(), l.iter.Close())
163136
l.iter = nil
137+
l.getTombstone = nil
164138
m.heap.pop()
165139
} else {
166140
// Check point keys in an sstable are ordered. Although not required, we check
@@ -201,7 +175,6 @@ func (m *simpleMergingIter) step() bool {
201175
}
202176
return false
203177
}
204-
m.positionRangeDels()
205178
return true
206179
}
207180

@@ -283,12 +256,16 @@ func (m *simpleMergingIter) handleVisiblePoint(
283256
// iterators must be positioned at a key > item.key.
284257
for level := item.index + 1; level < len(m.levels); level++ {
285258
lvl := &m.levels[level]
286-
if lvl.rangeDelIter == nil || lvl.tombstone.Empty() {
259+
if lvl.getTombstone == nil {
287260
continue
288261
}
289-
if lvl.tombstone.Contains(m.heap.cmp, item.kv.K.UserKey) && lvl.tombstone.CoversAt(m.snapshot, item.kv.K.SeqNum()) {
262+
t := lvl.getTombstone()
263+
if t.Empty() {
264+
continue
265+
}
266+
if t.Contains(m.heap.cmp, item.kv.K.UserKey) && t.CoversAt(m.snapshot, item.kv.K.SeqNum()) {
290267
m.err = errors.Errorf("tombstone %s in %s deletes key %s in %s",
291-
lvl.tombstone.Pretty(m.formatKey), lvl.iter, item.kv.K.Pretty(m.formatKey),
268+
t.Pretty(m.formatKey), lvl.iter, item.kv.K.Pretty(m.formatKey),
292269
l.iter)
293270
return false
294271
}
@@ -589,20 +566,15 @@ func checkLevelsInternal(c *checkConfig) (err error) {
589566
err = firstError(err, l.iter.Close())
590567
l.iter = nil
591568
}
592-
if l.rangeDelIter != nil {
593-
l.rangeDelIter.Close()
594-
l.rangeDelIter = nil
595-
}
596569
}
597570
}()
598571

599572
memtables := c.readState.memtables
600573
for i := len(memtables) - 1; i >= 0; i-- {
601574
mem := memtables[i]
602-
mlevels = append(mlevels, simpleMergingIterLevel{
603-
iter: mem.newIter(nil),
604-
rangeDelIter: mem.newRangeDelIter(nil),
605-
})
575+
var mil simpleMergingIterLevel
576+
mil.iter, mil.getTombstone = rangedel.Interleave(c.comparer, mem.newIter(nil), mem.newRangeDelIter(nil))
577+
mlevels = append(mlevels, mil)
606578
}
607579

608580
current := c.readState.current
@@ -634,8 +606,9 @@ func checkLevelsInternal(c *checkConfig) (err error) {
634606
li := &levelIter{}
635607
li.init(context.Background(), iterOpts, c.comparer, c.newIters, manifestIter,
636608
manifest.L0Sublevel(sublevel), internalOpts)
637-
li.initRangeDel(&mlevelAlloc[0])
609+
li.interleaveRangeDels = true
638610
mlevelAlloc[0].iter = li
611+
mlevelAlloc[0].getTombstone = li.getTombstone
639612
mlevelAlloc = mlevelAlloc[1:]
640613
for f := range current.L0SublevelFiles[sublevel].All() {
641614
allTables = append(allTables, f)
@@ -649,8 +622,9 @@ func checkLevelsInternal(c *checkConfig) (err error) {
649622
li := &levelIter{}
650623
li.init(context.Background(), iterOpts, c.comparer, c.newIters,
651624
current.Levels[level].Iter(), manifest.Level(level), internalOpts)
652-
li.initRangeDel(&mlevelAlloc[0])
625+
li.interleaveRangeDels = true
653626
mlevelAlloc[0].iter = li
627+
mlevelAlloc[0].getTombstone = li.getTombstone
654628
mlevelAlloc = mlevelAlloc[1:]
655629
for f := range current.Levels[level].All() {
656630
allTables = append(allTables, f)

level_iter.go

Lines changed: 52 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -73,21 +73,23 @@ type levelIter struct {
7373
files manifest.LevelIterator
7474
err error
7575

76+
// interleaveRangeDels is set to true if the levelIter is configured to
77+
// interleave range deletions among point keys, using li.interleaving.
78+
interleaveRangeDels bool
79+
// interleaving is used when interleaveRangeDels=true to interleave the
80+
// boundaries of range deletions among point keys. When the level iterator
81+
// is used by a merging iterator, this ensures that we don't advance to a
82+
// new file until the range deletions are no longer needed by other levels.
83+
interleaving keyspan.InterleavingIter
7684
// When rangeDelIterSetter != nil, the caller requires that this function
7785
// gets called with a range deletion iterator whenever the current file
78-
// changes. The iterator is relinquished to the caller which is responsible
86+
// changes. The iterator is relinquished to the caller which is responsible
7987
// for closing it.
8088
//
81-
// When rangeDelIterSetter != nil, the levelIter will also interleave the
82-
// boundaries of range deletions among point keys.
89+
// When rangeDelIterSetter != nil, interleaveRangeDels must be true (but the
90+
// inverse is not true).
8391
rangeDelIterSetter rangeDelIterSetter
8492

85-
// interleaving is used when rangeDelIterFn != nil to interleave the
86-
// boundaries of range deletions among point keys. When the leve iterator is
87-
// used by a merging iterator, this ensures that we don't advance to a new
88-
// file until the range deletions are no longer needed by other levels.
89-
interleaving keyspan.InterleavingIter
90-
9193
// internalOpts holds the internal iterator options to pass to the table
9294
// cache when constructing new table iterators.
9395
internalOpts internalIterOpts
@@ -170,11 +172,12 @@ func (l *levelIter) init(
170172

171173
// initRangeDel puts the level iterator into a mode where it interleaves range
172174
// deletion boundaries with point keys and provides a range deletion iterator
173-
// (through rangeDelIterFn) whenever the current file changes.
175+
// (through rangeDelIterSetter) whenever the current file changes.
174176
//
175-
// The range deletion iterator passed to rangeDelIterFn is relinquished to the
176-
// implementor who is responsible for closing it.
177+
// The range deletion iterator passed to rangeDelIterSetter is relinquished to
178+
// the implementor who is responsible for closing it.
177179
func (l *levelIter) initRangeDel(rangeDelSetter rangeDelIterSetter) {
180+
l.interleaveRangeDels = true
178181
l.rangeDelIterSetter = rangeDelSetter
179182
}
180183

@@ -571,7 +574,7 @@ func (l *levelIter) loadFile(file *manifest.TableMetadata, dir int) loadFileRetu
571574
}
572575

573576
iterKinds := iterPointKeys
574-
if l.rangeDelIterSetter != nil {
577+
if l.interleaveRangeDels {
575578
iterKinds |= iterRangeDeletions
576579
}
577580

@@ -584,30 +587,39 @@ func (l *levelIter) loadFile(file *manifest.TableMetadata, dir int) loadFileRetu
584587
return noFileLoaded
585588
}
586589
l.iter = iters.Point()
587-
if l.rangeDelIterSetter != nil && iters.rangeDeletion != nil {
590+
if l.interleaveRangeDels && iters.rangeDeletion != nil {
588591
// If this file has range deletions, interleave the bounds of the
589592
// range deletions among the point keys. When used with a
590593
// mergingIter, this ensures we don't move beyond a file with range
591594
// deletions until its range deletions are no longer relevant.
592-
//
593-
// For now, we open a second range deletion iterator. Future work
594-
// will avoid the need to open a second range deletion iterator, and
595-
// avoid surfacing the file's range deletion iterator via rangeDelIterFn.
596-
itersForBounds, err := l.newIters(l.ctx, l.iterFile, &l.tableOpts, l.internalOpts, iterRangeDeletions)
597-
if err != nil {
598-
l.iter = nil
599-
l.err = errors.CombineErrors(err, iters.CloseAll())
600-
return noFileLoaded
601-
}
602-
l.interleaving.Init(l.comparer, l.iter, itersForBounds.RangeDeletion(), keyspan.InterleavingIterOpts{
595+
l.interleaving.Init(l.comparer, l.iter, iters.rangeDeletion, keyspan.InterleavingIterOpts{
603596
LowerBound: l.tableOpts.LowerBound,
604597
UpperBound: l.tableOpts.UpperBound,
605598
InterleaveEndKeys: true,
606599
})
607600
l.iter = &l.interleaving
608601

609-
// Relinquish iters.rangeDeletion to the caller.
610-
l.rangeDelIterSetter.setRangeDelIter(iters.rangeDeletion)
602+
// Additionally, when interleaving range deletions, optionally the
603+
// caller may request a copy of each range deletion iterator we
604+
// open by providing a rangeDelIterSetter.
605+
//
606+
// The levelIter requires its own range deletion iterator for
607+
// interleaving bounds, so we open a second range deletion iterator
608+
// that's solely owned by the caller and we relinquish it to the
609+
// caller through calling setRangeDelIter.
610+
if l.rangeDelIterSetter != nil {
611+
// TODO(jackson): This should be avoidable by teaching the
612+
// merging iterator to read range deletions from
613+
// levelIter.getTombstone() but requires some delicate
614+
// refactoring. See the unmerged PR #3600.
615+
itersForSetter, err := l.newIters(l.ctx, l.iterFile, &l.tableOpts, l.internalOpts, iterRangeDeletions)
616+
if err != nil {
617+
l.iter = nil
618+
l.err = errors.CombineErrors(err, iters.CloseAll())
619+
return noFileLoaded
620+
}
621+
l.rangeDelIterSetter.setRangeDelIter(itersForSetter.rangeDeletion)
622+
}
611623
}
612624
if treesteps.Enabled && treesteps.IsRecording(l) {
613625
treesteps.NodeUpdated(l, fmt.Sprintf("file %s loaded", l.iterFile.TableNum))
@@ -1027,6 +1039,19 @@ func (l *levelIter) exhaustedBackward() {
10271039
l.exhaustedDir = -1
10281040
}
10291041

1042+
// getTombstone retrieves the range tombstone covering the current iterator
1043+
// position. If there is none, or if the iterator is not configured to
1044+
// interleave range deletions, getTombstone returns nil.
1045+
//
1046+
// The returned Span's memory is guaranteed to be valid until the iterator is
1047+
// moved beyond the Span's interleaved boundary keys.
1048+
func (l *levelIter) getTombstone() *keyspan.Span {
1049+
if l.iter != &l.interleaving {
1050+
return nil
1051+
}
1052+
return l.interleaving.Span()
1053+
}
1054+
10301055
func (l *levelIter) Error() error {
10311056
if l.err != nil || l.iter == nil {
10321057
return l.err

0 commit comments

Comments
 (0)