From a98afefb0d065bf06c4629d8271aabc2e49f6db3 Mon Sep 17 00:00:00 2001 From: Shaun Patterson Date: Mon, 25 May 2026 20:41:37 -0400 Subject: [PATCH 1/2] perf(iterator): elide y.ParseKey in hasPrefix fast path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `hasPrefix` runs on every iterator.Next() call. Previously it always called `y.ParseKey(iitr.Key())` (nil check + sub + slice) before delegating to bytes.HasPrefix. When the user-supplied Prefix fits entirely within the userKey portion (len(internalKey) >= len(Prefix)+8), bytes.HasPrefix on the full internal key is equivalent to bytes.HasPrefix on ParseKey(internalKey) — the prefix bytes can only land inside the userKey region, never inside the 8-byte ts suffix. The fallback (prefix longer than userKey) still takes the ParseKey path so we don't spuriously match against ts bytes. Direct motivation: dgraph's posting/mvcc.go ReadPostingList walks all versions of a single key on every cache miss and every opRollup batch (16 keys per batch, continuous). hasPrefix is on that hot loop. Tests added: - TestRegressionHasPrefixFastPath: prefix < userKey hits the fast path - TestRegressionHasPrefixShortPrefixFallback: prefix > userKey hits the fallback and correctly returns false Co-Authored-By: Claude Opus 4.7 --- iterator.go | 19 +++++++++-- iterator_hasprefix_test.go | 66 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 iterator_hasprefix_test.go diff --git a/iterator.go b/iterator.go index f57cfa4c9..52542f909 100644 --- a/iterator.go +++ b/iterator.go @@ -717,10 +717,23 @@ func (it *Iterator) fill(item *Item) { func hasPrefix(it *Iterator) bool { // We shouldn't check prefix in case the iterator is going in reverse. Since in reverse we expect // people to append items to the end of prefix. - if !it.opt.Reverse && len(it.opt.Prefix) > 0 { - return bytes.HasPrefix(y.ParseKey(it.iitr.Key()), it.opt.Prefix) + if it.opt.Reverse || len(it.opt.Prefix) == 0 { + return true } - return true + // iitr.Key() is the internal key = userKey + 8-byte ts. When len(Prefix) fits + // entirely within the userKey portion (len(key) >= len(Prefix)+8), then + // bytes.HasPrefix(internalKey, Prefix) is equivalent to + // bytes.HasPrefix(y.ParseKey(internalKey), Prefix) — the prefix can only + // match bytes that lie before the ts suffix. This elides one y.ParseKey + // (nil-check + sub + slice) per iterator Next on the hot dgraph rollup loop. + key := it.iitr.Key() + p := it.opt.Prefix + if len(key) >= len(p)+8 { + return bytes.HasPrefix(key, p) + } + // Prefix is longer than userKey — must reparse so we don't spuriously match + // against ts bytes. + return bytes.HasPrefix(y.ParseKey(key), p) } func (it *Iterator) prefetch() { diff --git a/iterator_hasprefix_test.go b/iterator_hasprefix_test.go new file mode 100644 index 000000000..c2dd9ace7 --- /dev/null +++ b/iterator_hasprefix_test.go @@ -0,0 +1,66 @@ +/* + * SPDX-FileCopyrightText: © Hypermode Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package badger + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/require" +) + +// TestRegressionHasPrefixShortPrefixFallback covers the hasPrefix fallback +// branch where the user-supplied prefix is longer than userKey (len(p) > +// len(key)-8). The short-circuit "len(key) >= len(p)+8" must be false so we +// take the ParseKey path and correctly return false (no spurious match +// against ts bytes). +func TestRegressionHasPrefixShortPrefixFallback(t *testing.T) { + runBadgerTest(t, nil, func(t *testing.T, db *DB) { + // Single short key — userKey is 1 byte. + txnSet(t, db, []byte("a"), []byte("v"), 0) + + require.NoError(t, db.View(func(txn *Txn) error { + opt := DefaultIteratorOptions + // Prefix is longer than the single existing userKey "a". The + // optimized hasPrefix must take the ParseKey fallback (because + // len(key) < len(prefix)+8) and return false — no key matches. + opt.Prefix = []byte("abcdefghij") // 10 bytes, longer than "a" + it := txn.NewIterator(opt) + defer it.Close() + count := 0 + for it.Rewind(); it.Valid(); it.Next() { + count++ + } + require.Equal(t, 0, count, "no key should match an over-long prefix") + return nil + })) + }) +} + +// TestRegressionHasPrefixFastPath covers the optimized hasPrefix path where +// len(prefix) fits within userKey: it must still correctly identify matching +// and non-matching keys, equivalent to the old ParseKey-based check. +func TestRegressionHasPrefixFastPath(t *testing.T) { + runBadgerTest(t, nil, func(t *testing.T, db *DB) { + txnSet(t, db, []byte("alpha/1"), []byte("v"), 0) + txnSet(t, db, []byte("alpha/2"), []byte("v"), 0) + txnSet(t, db, []byte("beta/1"), []byte("v"), 0) + + require.NoError(t, db.View(func(txn *Txn) error { + opt := DefaultIteratorOptions + opt.Prefix = []byte("alpha/") + it := txn.NewIterator(opt) + defer it.Close() + count := 0 + for it.Rewind(); it.Valid(); it.Next() { + require.True(t, bytes.HasPrefix(it.Item().Key(), opt.Prefix)) + count++ + } + require.Equal(t, 2, count, "alpha/ should match both alpha/1 and alpha/2") + return nil + })) + }) +} From df816ab99ff615ebc2651c4eb3b2c55ffb0dbf3b Mon Sep 17 00:00:00 2001 From: Shaun Patterson Date: Mon, 25 May 2026 21:47:05 -0400 Subject: [PATCH 2/2] test(iterator): make hasPrefix fallback test actually trigger fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fixture used userKey="a" with prefix="abcdefghij". Seek lands past EOF (since "a" sorts before "abcdefghij") so hasPrefix is never called and the ParseKey fallback branch stays uncovered. Switch to userKey="z" with prefix="yy" — "z" sorts after "yy" so Seek lands on it, hasPrefix() runs, and the len(key)=9 < len(prefix)+8=10 test forces the fallback path. Confirmed: line 736 now hit in coverage. Co-Authored-By: Claude Opus 4.7 --- iterator_hasprefix_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/iterator_hasprefix_test.go b/iterator_hasprefix_test.go index c2dd9ace7..3c07378be 100644 --- a/iterator_hasprefix_test.go +++ b/iterator_hasprefix_test.go @@ -19,15 +19,15 @@ import ( // against ts bytes). func TestRegressionHasPrefixShortPrefixFallback(t *testing.T) { runBadgerTest(t, nil, func(t *testing.T, db *DB) { - // Single short key — userKey is 1 byte. - txnSet(t, db, []byte("a"), []byte("v"), 0) + // userKey "z" sorts AFTER prefix "yy", so Seek(prefix) lands on it + // and hasPrefix is actually called. The internal key is "z"+8 ts + // bytes (9 bytes), strictly shorter than prefix(2)+8=10, forcing + // the ParseKey fallback branch. + txnSet(t, db, []byte("z"), []byte("v"), 0) require.NoError(t, db.View(func(txn *Txn) error { opt := DefaultIteratorOptions - // Prefix is longer than the single existing userKey "a". The - // optimized hasPrefix must take the ParseKey fallback (because - // len(key) < len(prefix)+8) and return false — no key matches. - opt.Prefix = []byte("abcdefghij") // 10 bytes, longer than "a" + opt.Prefix = []byte("yy") // 2 bytes, longer than userKey "z" it := txn.NewIterator(opt) defer it.Close() count := 0