diff --git a/db.go b/db.go
index 8a1021308..129607a32 100644
--- a/db.go
+++ b/db.go
@@ -52,12 +52,19 @@ type closers struct {
 type lockedKeys struct {
 	sync.RWMutex
 	keys map[uint64]struct{}
+	// hasAny is a fast-path flag: false until the first add(), then true forever.
+	// Hot-path callers (DB.isBanned) check this without taking the lock so the
+	// common case (empty ban set) costs one atomic load instead of an
+	// RLock/RUnlock pair plus a map lookup. There is no remove API, so the
+	// flag is monotonic and never needs to flip back to false.
+	hasAny atomic.Bool
 }
 
 func (lk *lockedKeys) add(key uint64) {
 	lk.Lock()
 	defer lk.Unlock()
 	lk.keys[key] = struct{}{}
+	lk.hasAny.Store(true)
 }
 
 func (lk *lockedKeys) has(key uint64) bool {
@@ -1846,6 +1853,14 @@ func (db *DB) isBanned(key []byte) error {
 	if db.opt.NamespaceOffset < 0 {
 		return nil
 	}
+	// Fast path: no namespaces have ever been banned in this DB lifetime
+	// (the common production case). Skip the slice + lookup + lock entirely.
+	// isBanned is called on every iterator step and every Txn.Get/modify, so
+	// avoiding the RLock here matters when NamespaceOffset is enabled but no
+	// bans are active.
+	if !db.bannedNamespaces.hasAny.Load() {
+		return nil
+	}
 	if len(key) <= db.opt.NamespaceOffset+8 {
 		return nil
 	}
diff --git a/errors.go b/errors.go
index dcf0d12ae..40433dcb6 100644
--- a/errors.go
+++ b/errors.go
@@ -46,6 +46,14 @@ var (
 	// ErrBannedKey is returned if the read/write key belongs to any banned namespace.
 	ErrBannedKey = stderrors.New("Key is using the banned prefix")
 
+	// ErrKeyOnlyMode is returned by Item.Value and Item.ValueCopy when the
+	// containing iterator was created with IteratorOptions.KeyOnly=true.
+	// In that mode the iterator never copies the value bytes into the Item
+	// (the main reason to use KeyOnly is to avoid that per-item copy on
+	// key-only scans), so value access is unavailable on those items.
+	ErrKeyOnlyMode = stderrors.New(
+		"Item value is unavailable in KeyOnly iterator mode")
+
 	// ErrThresholdZero is returned if threshold is set to zero, and value log GC is called.
 	// In such a case, GC can't be run.
 	ErrThresholdZero = stderrors.New(
diff --git a/iterator.go b/iterator.go
index f57cfa4c9..379b5e5dc 100644
--- a/iterator.go
+++ b/iterator.go
@@ -43,6 +43,11 @@ type Item struct {
 	status   prefetchStatus
 	meta     byte // We need to store meta to know about bitValuePointer.
 	userMeta byte
+	// keyOnly is true when the parent iterator was created with
+	// IteratorOptions.KeyOnly. The iterator skips copying value bytes into
+	// this item, so Item.Value/ValueCopy and the size estimators must
+	// short-circuit instead of touching the (nil) vptr.
+	keyOnly bool
 }
 
 // String returns a string representation of Item
@@ -81,6 +86,9 @@ func (item *Item) Version() uint64 {
 // instead, or copy it yourself. Value might change once discard or commit is called.
 // Use ValueCopy if you want to do a Set after Get.
 func (item *Item) Value(fn func(val []byte) error) error {
+	if item.keyOnly {
+		return ErrKeyOnlyMode
+	}
 	item.wg.Wait()
 	if item.status == prefetched {
 		if item.err == nil && fn != nil {
@@ -108,6 +116,9 @@ func (item *Item) Value(fn func(val []byte) error) error {
 // This function is useful in long running iterate/update transactions to avoid a write deadlock.
 // See Github issue: https://github.com/dgraph-io/badger/issues/315
 func (item *Item) ValueCopy(dst []byte) ([]byte, error) {
+	if item.keyOnly {
+		return nil, ErrKeyOnlyMode
+	}
 	item.wg.Wait()
 	if item.status == prefetched {
 		return y.SafeCopy(dst, item.val), item.err
@@ -213,7 +224,14 @@ func (item *Item) prefetchValue() {
 // This can be called while iterating through a store to quickly estimate the
 // size of a range of key-value pairs (without fetching the corresponding
 // values).
+//
+// When the iterator was created with IteratorOptions.KeyOnly=true, the
+// value bytes (and value pointer for vlog entries) are not retained on
+// the item, so this returns the key size only.
 func (item *Item) EstimatedSize() int64 {
+	if item.keyOnly {
+		return int64(len(item.key))
+	}
 	if !item.hasValue() {
 		return 0
 	}
@@ -235,7 +253,13 @@ func (item *Item) KeySize() int64 {
 //
 // This can be called to quickly estimate the size of a value without fetching
 // it.
+//
+// When the iterator was created with IteratorOptions.KeyOnly=true the value
+// length is not retained on the item; this returns 0.
 func (item *Item) ValueSize() int64 {
+	if item.keyOnly {
+		return 0
+	}
 	if !item.hasValue() {
 		return 0
 	}
@@ -312,6 +336,17 @@ type IteratorOptions struct {
 	AllVersions    bool // Fetch all valid versions of the same key.
 	InternalAccess bool // Used to allow internal access to badger keys.
 
+	// KeyOnly tells the iterator that the caller will not access value bytes
+	// from any item. When set, the iterator skips copying value bytes into
+	// the Item, saving a per-item memcpy on key-only forward scans (e.g.
+	// dgraph's has() predicate evaluator and index scans). The trade-off:
+	// Item.Value and Item.ValueCopy return ErrKeyOnlyMode, and
+	// Item.ValueSize / Item.EstimatedSize report 0. Item.Key, Version,
+	// UserMeta, ExpiresAt and IsDeletedOrExpired continue to work normally.
+	//
+	// PrefetchValues is forced to false when KeyOnly is true.
+	KeyOnly bool
+
 	// The following option is used to narrow down the SSTables that iterator
 	// picks up. If Prefix is specified, only tables which could have this
 	// prefix are picked based on their range of keys.
@@ -433,7 +468,18 @@ type Iterator struct {
 	data  list
 	waste list
 
-	lastKey []byte // Used to skip over multiple versions of the same key.
+	// lastKey stores the user-key (no 8-byte timestamp suffix) of the most
+	// recently considered candidate, used to skip subsequent older versions
+	// of the same user-key on AllVersions=false forward scans. Storing the
+	// user-key only avoids one ParseKey per same-key compare and a per-item
+	// 8-byte memcpy on the update.
+	lastKey []byte
+
+	// canSeeInternalKeys is true when this iterator can possibly surface a
+	// badger-internal key (e.g. "!badger!banned"). When false, parseItem
+	// can skip the per-step bytes.HasPrefix(key, badgerPrefix) check.
+	// Computed once at construction from opt.Prefix.
+	canSeeInternalKeys bool
 
 	closed  bool
 	scanned int // Used to estimate the size of data scanned by iterator.
@@ -464,6 +510,12 @@ func (txn *Txn) NewIterator(opt IteratorOptions) *Iterator {
 		panic(ErrDBClosed)
 	}
 
+	// KeyOnly disables value access, so prefetching values is nonsensical.
+	// Force PrefetchValues off so the prefetch goroutine is never started.
+	if opt.KeyOnly {
+		opt.PrefetchValues = false
+	}
+
 	y.NumIteratorsCreatedAdd(txn.db.opt.MetricsEnabled, 1)
 
 	// Keep track of the number of active iterators.
@@ -482,14 +534,26 @@ func (txn *Txn) NewIterator(opt IteratorOptions) *Iterator {
 	}
 	iters = txn.db.lc.appendIterators(iters, &opt) // This will increment references.
 	res := &Iterator{
-		txn:    txn,
-		iitr:   table.NewMergeIterator(iters, opt.Reverse),
-		opt:    opt,
-		readTs: txn.readTs,
+		txn:               txn,
+		iitr:              table.NewMergeIterator(iters, opt.Reverse),
+		opt:               opt,
+		readTs:            txn.readTs,
+		canSeeInternalKeys: canSeeInternalKeys(opt.Prefix),
 	}
 	return res
 }
 
+// canSeeInternalKeys reports whether an iterator with the given prefix can
+// possibly surface a badger-internal key. Internal keys all live under the
+// fixed badgerPrefix; if the user's prefix exists and starts with a
+// different byte than badgerPrefix[0], no internal key can match.
+func canSeeInternalKeys(prefix []byte) bool {
+	if len(prefix) == 0 {
+		return true
+	}
+	return prefix[0] == badgerPrefix[0]
+}
+
 // NewKeyIterator is just like NewIterator, but allows the user to iterate over all versions of a
 // single key. Internally, it sets the Prefix option in provided opt, and uses that prefix to
 // additionally run bloom filter lookups before picking tables from the LSM tree.
@@ -616,11 +680,18 @@ func (it *Iterator) parseItem() bool {
 		}
 	}
 
-	isInternalKey := bytes.HasPrefix(key, badgerPrefix)
-	// Skip badger keys.
-	if !it.opt.InternalAccess && isInternalKey {
-		mi.Next()
-		return false
+	// Detect badger-internal keys. When canSeeInternalKeys is false (the
+	// common case for prefix-bounded user scans whose prefix cannot collide
+	// with badgerPrefix), we know the current key cannot be internal and
+	// elide the per-step bytes.HasPrefix(key, badgerPrefix) probe.
+	var isInternalKey bool
+	if it.canSeeInternalKeys {
+		isInternalKey = bytes.HasPrefix(key, badgerPrefix)
+		// Skip badger keys.
+		if !it.opt.InternalAccess && isInternalKey {
+			mi.Next()
+			return false
+		}
 	}
 
 	// Skip any versions which are beyond the readTs.
@@ -640,8 +711,9 @@ func (it *Iterator) parseItem() bool {
 	if it.opt.AllVersions {
 		// Return deleted or expired values also, otherwise user can't figure out
 		// whether the key was deleted.
+		vs := mi.Value()
 		item := it.newItem()
-		it.fill(item)
+		it.fill(item, key, &vs)
 		setItem(item)
 		mi.Next()
 		return true
@@ -650,7 +722,18 @@ func (it *Iterator) parseItem() bool {
 	// If iterating in forward direction, then just checking the last key against current key would
 	// be sufficient.
 	if !it.opt.Reverse {
-		if y.SameKey(it.lastKey, key) {
+		// lastKey holds the user-key only. Compare against the user-key
+		// portion of the current full key (last 8 bytes are the ts).
+		// bytes.Equal already short-circuits on length mismatch, but the
+		// explicit length check lets the compiler hoist the bounds check
+		// out of the user-key slice and keeps the hot path branch-tight.
+		//
+		// len(key) >= 8 is a badger-wide invariant: every key in the LSM is
+		// stored with an 8-byte timestamp suffix via y.KeyWithTs, and
+		// y.ParseTs(key) above already relies on this (it indexes
+		// key[len(key)-8:]). No defensive check is needed here.
+		ukLen := len(key) - 8
+		if ukLen == len(it.lastKey) && bytes.Equal(key[:ukLen], it.lastKey) {
 			mi.Next()
 			return false
 		}
@@ -659,11 +742,16 @@ func (it *Iterator) parseItem() bool {
 		// Consider keys: a 5, b 7 (del), b 5. When iterating, lastKey = a.
 		// Then we see b 7, which is deleted. If we don't store lastKey = b, we'll then return b 5,
 		// which is wrong. Therefore, update lastKey here.
-		it.lastKey = y.SafeCopy(it.lastKey, mi.Key())
+		it.lastKey = y.SafeCopy(it.lastKey, key[:ukLen])
 	}
 
 FILL:
-	// If deleted, advance and return.
+	// Invariant on entry to FILL: `key` is mi.Key() at the *current* iitr
+	// position. The only goto FILL (below, reverse path) refreshes `key`
+	// after mi.Next(); the fall-through entry from above never advances the
+	// iterator between `key := mi.Key()` and reaching FILL. fill() can
+	// therefore safely reuse the caller-supplied key without re-calling
+	// mi.Key().
 	vs := mi.Value()
 	if isDeletedOrExpired(vs.Meta, vs.ExpiresAt) {
 		mi.Next()
@@ -671,7 +759,7 @@ FILL:
 	}
 
 	item := it.newItem()
-	it.fill(item)
+	it.fill(item, key, &vs)
 	// fill item based on current cursor position. All Next calls have returned, so reaching here
 	// means no Next was called.
 
@@ -681,9 +769,11 @@ FILL:
 		return true
 	}
 
-	// Reverse direction.
-	nextTs := y.ParseTs(mi.Key())
-	mik := y.ParseKey(mi.Key())
+	// Reverse direction. Refresh key after the Next() above; the iterator
+	// has advanced, so the previous `key` slice now refers to a later block.
+	key = mi.Key()
+	nextTs := y.ParseTs(key)
+	mik := y.ParseKey(key)
 	if nextTs <= it.readTs && bytes.Equal(mik, item.key) {
 		// This is a valid potential candidate.
 		goto FILL
@@ -693,17 +783,32 @@ FILL:
 	return true
 }
 
-func (it *Iterator) fill(item *Item) {
-	vs := it.iitr.Value()
+// fill populates item from the current iterator position. Callers pass the
+// already-fetched key and value pointer to avoid the per-item cost of
+// calling mi.Key() / mi.Value() (and decoding ValueStruct) a second time
+// on the hot iterator path. vs is passed by pointer to avoid copying the
+// ~40-byte ValueStruct on every kept item.
+func (it *Iterator) fill(item *Item, key []byte, vs *y.ValueStruct) {
 	item.meta = vs.Meta
 	item.userMeta = vs.UserMeta
 	item.expiresAt = vs.ExpiresAt
+	item.keyOnly = it.opt.KeyOnly
 
-	item.version = y.ParseTs(it.iitr.Key())
-	item.key = y.SafeCopy(item.key, y.ParseKey(it.iitr.Key()))
+	item.version = y.ParseTs(key)
+	item.key = y.SafeCopy(item.key, y.ParseKey(key))
 
-	item.vptr = y.SafeCopy(item.vptr, vs.Value)
 	item.val = nil
+	if it.opt.KeyOnly {
+		// Don't copy vs.Value: KeyOnly callers have promised not to read
+		// it, and the SafeCopy is the largest per-item memmove on the
+		// key-only forward-scan hot path. nil out any leftover capacity
+		// from a previous item that was reused via the iterator's
+		// freelist; callers that ignore the contract will at least see a
+		// nil vptr rather than stale bytes.
+		item.vptr = nil
+	} else {
+		item.vptr = y.SafeCopy(item.vptr, vs.Value)
+	}
 	if it.opt.PrefetchValues {
 		item.wg.Add(1)
 		go func() {
diff --git a/iterator_dgraph_bench_test.go b/iterator_dgraph_bench_test.go
new file mode 100644
index 000000000..61bd9d3a2
--- /dev/null
+++ b/iterator_dgraph_bench_test.go
@@ -0,0 +1,340 @@
+/*
+ * SPDX-FileCopyrightText: © 2017-2025 Istari Digital, Inc.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package badger
+
+import (
+	"encoding/binary"
+	"math"
+	"math/rand"
+	"os"
+	"testing"
+
+	"github.com/dgraph-io/badger/v4/y"
+)
+
+// dgraph-shaped iterator micro-benchmarks.
+//
+// These benchmarks model the hot iterator paths in dgraph's posting layer
+// (posting/mvcc.go: ReadPostingList, IterateDisk, sort.go: index bucket scan).
+// They are intentionally distinct from the existing root-package benchmarks
+// because none of those exercise the combination dgraph actually uses:
+//
+//   - NamespaceOffset = 1 (so DB.isBanned runs on every key)
+//   - NumVersionsToKeep = math.MaxInt32 (dgraph keeps all versions)
+//   - DetectConflicts = false (dgraph owns OCC)
+//   - PrefetchValues = false (key-only scans in hot paths)
+//   - AllVersions = true (MVCC-aware reads in ReadPostingList / rollup)
+//   - Prefix-bounded scans over mid-sized structured keys
+//
+// Key layout mirrors dgraph's data keys (see x/keys.go in dgraph):
+//
+//   byte 0      : type prefix (0x00 = data)
+//   bytes 1..8  : namespace, big-endian uint64
+//   bytes 9..10 : 2-byte attr length, big-endian uint16
+//   bytes 11..  : attr name (variable, ~16 bytes typical)
+//   next 1 byte : subtype
+//   last 8 bytes: UID, big-endian
+//
+// Total: 1 + 8 + 2 + len(attr) + 1 + 8 ≈ 36 bytes for a 16-byte attr.
+
+const (
+	dgKeyTypeData = 0x00
+	dgAttrName    = "predicate_namespaced" // 20 bytes -> 40-byte keys
+)
+
+// dgKey constructs a dgraph-shaped data key with the given namespace, attr,
+// subtype, and UID.
+func dgKey(ns uint64, attr string, subtype byte, uid uint64) []byte {
+	buf := make([]byte, 1+8+2+len(attr)+1+8)
+	buf[0] = dgKeyTypeData
+	binary.BigEndian.PutUint64(buf[1:9], ns)
+	binary.BigEndian.PutUint16(buf[9:11], uint16(len(attr)))
+	copy(buf[11:11+len(attr)], attr)
+	buf[11+len(attr)] = subtype
+	binary.BigEndian.PutUint64(buf[12+len(attr):], uid)
+	return buf
+}
+
+// dgPrefix returns the (ns + attr) prefix shared by all keys for one predicate.
+// This is what dgraph iterates with for prefix-bounded scans.
+func dgPrefix(ns uint64, attr string) []byte {
+	buf := make([]byte, 1+8+2+len(attr))
+	buf[0] = dgKeyTypeData
+	binary.BigEndian.PutUint64(buf[1:9], ns)
+	binary.BigEndian.PutUint16(buf[9:11], uint16(len(attr)))
+	copy(buf[11:], attr)
+	return buf
+}
+
+// dgraphTestOptions returns the badger options that match dgraph's
+// production configuration: managed DB, all-versions retention, no conflict
+// detection, namespace offset = 1.
+func dgraphTestOptions(dir string) Options {
+	return DefaultOptions(dir).
+		WithSyncWrites(false).
+		WithLoggingLevel(WARNING).
+		WithNumVersionsToKeep(math.MaxInt32).
+		WithDetectConflicts(false).
+		WithNamespaceOffset(1)
+}
+
+// dgraphLoadDB populates a managed DB with `nKeys` unique keys under a single
+// predicate, each carrying `versionsPerKey` MVCC versions. UIDs are dense
+// (1..nKeys) so prefix scans land contiguous keys, mirroring dgraph's
+// `has()` predicate scans.
+func dgraphLoadDB(b *testing.B, db *DB, ns uint64, attr string, nKeys, versionsPerKey int) {
+	b.Helper()
+	// Small value: realistic for posting-list deltas (a few hundred bytes is
+	// the upper end; we use 64 bytes to keep total disk footprint bounded).
+	val := make([]byte, 64)
+	for i := range val {
+		val[i] = byte(i)
+	}
+
+	const batch = 4000
+	commitTs := uint64(1)
+	for start := 0; start < nKeys; start += batch {
+		end := start + batch
+		if end > nKeys {
+			end = nKeys
+		}
+		// Write `versionsPerKey` versions of each key in this batch.
+		for v := 0; v < versionsPerKey; v++ {
+			txn := db.NewTransactionAt(math.MaxUint64, true)
+			for i := start; i < end; i++ {
+				key := dgKey(ns, attr, 0, uint64(i+1))
+				if err := txn.SetEntry(&Entry{Key: key, Value: val}); err != nil {
+					b.Fatalf("SetEntry: %v", err)
+				}
+			}
+			if err := txn.CommitAt(commitTs, nil); err != nil {
+				b.Fatalf("CommitAt: %v", err)
+			}
+			commitTs++
+		}
+	}
+	// Force everything to disk so iteration reflects the on-disk format
+	// (avoids measuring only the memtable hit).
+	if err := db.Flatten(2); err != nil {
+		b.Fatalf("Flatten: %v", err)
+	}
+}
+
+// openDgraphDB opens a managed DB with the dgraph-shaped options.
+func openDgraphDB(b *testing.B) (*DB, string) {
+	b.Helper()
+	dir, err := os.MkdirTemp(".", "badger-dgraph-bench")
+	y.Check(err)
+	db, err := OpenManaged(dgraphTestOptions(dir))
+	y.Check(err)
+	return db, dir
+}
+
+// BenchmarkDgraphPrefixScanKeyOnly models the IterateDisk / index-scan path:
+// prefix-bounded forward iteration with PrefetchValues=false, one version per
+// key. This is what dgraph's `has()` predicate evaluator and sort.go index
+// scans drive.
+func BenchmarkDgraphPrefixScanKeyOnly(b *testing.B) {
+	const (
+		ns    = uint64(0x0102030405060708)
+		nKeys = 200_000
+	)
+	db, dir := openDgraphDB(b)
+	defer func() { db.Close(); removeDir(dir) }()
+	dgraphLoadDB(b, db, ns, dgAttrName, nKeys, 1)
+
+	prefix := dgPrefix(ns, dgAttrName)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		txn := db.NewTransactionAt(math.MaxUint64, false)
+		opt := DefaultIteratorOptions
+		opt.Prefix = prefix
+		opt.PrefetchValues = false
+		it := txn.NewIterator(opt)
+		count := 0
+		for it.Rewind(); it.Valid(); it.Next() {
+			count++
+		}
+		it.Close()
+		txn.Discard()
+		if count != nKeys {
+			b.Fatalf("expected %d keys, got %d", nKeys, count)
+		}
+	}
+	b.ReportMetric(float64(nKeys), "keys/op")
+}
+
+// BenchmarkDgraphPrefixScanKeyOnlyOpt is the same workload as
+// BenchmarkDgraphPrefixScanKeyOnly but uses IteratorOptions.KeyOnly=true.
+// This measures the win from skipping the per-item SafeCopy(vptr) on the
+// has()/index-scan hot path; callers in this mode never read item.Value.
+func BenchmarkDgraphPrefixScanKeyOnlyOpt(b *testing.B) {
+	const (
+		ns    = uint64(0x0102030405060708)
+		nKeys = 200_000
+	)
+	db, dir := openDgraphDB(b)
+	defer func() { db.Close(); removeDir(dir) }()
+	dgraphLoadDB(b, db, ns, dgAttrName, nKeys, 1)
+
+	prefix := dgPrefix(ns, dgAttrName)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		txn := db.NewTransactionAt(math.MaxUint64, false)
+		opt := DefaultIteratorOptions
+		opt.Prefix = prefix
+		opt.KeyOnly = true
+		it := txn.NewIterator(opt)
+		count := 0
+		for it.Rewind(); it.Valid(); it.Next() {
+			count++
+		}
+		it.Close()
+		txn.Discard()
+		if count != nKeys {
+			b.Fatalf("expected %d keys, got %d", nKeys, count)
+		}
+	}
+	b.ReportMetric(float64(nKeys), "keys/op")
+}
+
+// BenchmarkDgraphPrefixScanAllVersions models the rollup path:
+// prefix-bounded forward iteration with PrefetchValues=false and
+// AllVersions=true, where each key has several MVCC versions.
+func BenchmarkDgraphPrefixScanAllVersions(b *testing.B) {
+	const (
+		ns             = uint64(0x0102030405060708)
+		nKeys          = 50_000
+		versionsPerKey = 5
+	)
+	db, dir := openDgraphDB(b)
+	defer func() { db.Close(); removeDir(dir) }()
+	dgraphLoadDB(b, db, ns, dgAttrName, nKeys, versionsPerKey)
+
+	prefix := dgPrefix(ns, dgAttrName)
+	b.ResetTimer()
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		txn := db.NewTransactionAt(math.MaxUint64, false)
+		opt := DefaultIteratorOptions
+		opt.Prefix = prefix
+		opt.PrefetchValues = false
+		opt.AllVersions = true
+		it := txn.NewIterator(opt)
+		count := 0
+		for it.Rewind(); it.Valid(); it.Next() {
+			count++
+		}
+		it.Close()
+		txn.Discard()
+		// Should see nKeys * versionsPerKey items.
+		if count != nKeys*versionsPerKey {
+			b.Fatalf("expected %d items, got %d", nKeys*versionsPerKey, count)
+		}
+	}
+	b.ReportMetric(float64(nKeys*versionsPerKey), "items/op")
+}
+
+// BenchmarkDgraphKeyIteratorAllVersions models readFromDisk:
+// a NewKeyIterator over a single posting key with AllVersions=true. dgraph
+// invokes this on every cache miss in the posting layer.
+func BenchmarkDgraphKeyIteratorAllVersions(b *testing.B) {
+	const (
+		ns             = uint64(0x0102030405060708)
+		nKeys          = 20_000
+		versionsPerKey = 8
+	)
+	db, dir := openDgraphDB(b)
+	defer func() { db.Close(); removeDir(dir) }()
+	dgraphLoadDB(b, db, ns, dgAttrName, nKeys, versionsPerKey)
+
+	// Pre-build the lookup key set so the random selection cost stays out
+	// of the measured loop.
+	keys := make([][]byte, nKeys)
+	for i := 0; i < nKeys; i++ {
+		keys[i] = dgKey(ns, dgAttrName, 0, uint64(i+1))
+	}
+	rng := rand.New(rand.NewSource(1))
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		key := keys[rng.Intn(nKeys)]
+		txn := db.NewTransactionAt(math.MaxUint64, false)
+		opt := DefaultIteratorOptions
+		opt.PrefetchValues = false
+		opt.AllVersions = true
+		it := txn.NewKeyIterator(key, opt)
+		count := 0
+		for it.Seek(key); it.Valid(); it.Next() {
+			count++
+		}
+		it.Close()
+		txn.Discard()
+		if count != versionsPerKey {
+			b.Fatalf("expected %d versions, got %d", versionsPerKey, count)
+		}
+	}
+}
+
+// BenchmarkDgraphPrefixScanNamespaceOffset measures the cost of
+// NamespaceOffset=1 vs NamespaceOffset=-1 (off) under identical workload.
+// This isolates the per-key cost of DB.isBanned, which today takes an
+// RWMutex on every iterator step even when no namespaces are banned.
+func BenchmarkDgraphPrefixScanNamespaceOffset(b *testing.B) {
+	const (
+		ns    = uint64(0x0102030405060708)
+		nKeys = 100_000
+	)
+	cases := []struct {
+		name   string
+		nsOff  int // -1 = off, 1 = dgraph-style
+	}{
+		{"off", -1},
+		{"dgraph", 1},
+	}
+
+	for _, c := range cases {
+		b.Run(c.name, func(b *testing.B) {
+			dir, err := os.MkdirTemp(".", "badger-dgraph-nsoff")
+			y.Check(err)
+			defer removeDir(dir)
+			opts := DefaultOptions(dir).
+				WithSyncWrites(false).
+				WithLoggingLevel(WARNING).
+				WithNumVersionsToKeep(math.MaxInt32).
+				WithDetectConflicts(false).
+				WithNamespaceOffset(c.nsOff)
+			db, err := OpenManaged(opts)
+			y.Check(err)
+			defer db.Close()
+			dgraphLoadDB(b, db, ns, dgAttrName, nKeys, 1)
+
+			prefix := dgPrefix(ns, dgAttrName)
+			b.ResetTimer()
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				txn := db.NewTransactionAt(math.MaxUint64, false)
+				opt := DefaultIteratorOptions
+				opt.Prefix = prefix
+				opt.PrefetchValues = false
+				it := txn.NewIterator(opt)
+				count := 0
+				for it.Rewind(); it.Valid(); it.Next() {
+					count++
+				}
+				it.Close()
+				txn.Discard()
+				if count != nKeys {
+					b.Fatalf("expected %d keys, got %d", nKeys, count)
+				}
+			}
+		})
+	}
+}
+
diff --git a/iterator_keyonly_test.go b/iterator_keyonly_test.go
new file mode 100644
index 000000000..8a7d1c601
--- /dev/null
+++ b/iterator_keyonly_test.go
@@ -0,0 +1,399 @@
+/*
+ * SPDX-FileCopyrightText: © 2017-2025 Istari Digital, Inc.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package badger
+
+import (
+	"errors"
+	"fmt"
+	"testing"
+
+	"github.com/dgraph-io/badger/v4/y"
+	"github.com/stretchr/testify/require"
+)
+
+// TestKeyOnlyIterator_ValueReturnsErrKeyOnlyMode covers iterator.go:Item.Value
+// short-circuit when item.keyOnly is set (iter4).
+func TestKeyOnlyIterator_ValueReturnsErrKeyOnlyMode(t *testing.T) {
+	runBadgerTest(t, nil, func(t *testing.T, db *DB) {
+		txnSet(t, db, []byte("k1"), []byte("v1"), 0)
+
+		require.NoError(t, db.View(func(txn *Txn) error {
+			opt := DefaultIteratorOptions
+			opt.KeyOnly = true
+			it := txn.NewIterator(opt)
+			defer it.Close()
+			for it.Rewind(); it.Valid(); it.Next() {
+				item := it.Item()
+				err := item.Value(func(v []byte) error { return nil })
+				require.True(t, errors.Is(err, ErrKeyOnlyMode),
+					"Value() should return ErrKeyOnlyMode, got %v", err)
+			}
+			return nil
+		}))
+	})
+}
+
+// TestKeyOnlyIterator_ValueCopyReturnsErrKeyOnlyMode covers iterator.go:Item.ValueCopy.
+func TestKeyOnlyIterator_ValueCopyReturnsErrKeyOnlyMode(t *testing.T) {
+	runBadgerTest(t, nil, func(t *testing.T, db *DB) {
+		txnSet(t, db, []byte("k1"), []byte("v1"), 0)
+
+		require.NoError(t, db.View(func(txn *Txn) error {
+			opt := DefaultIteratorOptions
+			opt.KeyOnly = true
+			it := txn.NewIterator(opt)
+			defer it.Close()
+			for it.Rewind(); it.Valid(); it.Next() {
+				item := it.Item()
+				buf, err := item.ValueCopy(nil)
+				require.True(t, errors.Is(err, ErrKeyOnlyMode))
+				require.Nil(t, buf)
+			}
+			return nil
+		}))
+	})
+}
+
+// TestKeyOnlyIterator_EstimatedSizeReturnsKeyLen covers iterator.go:Item.EstimatedSize
+// short-circuit (returns int64(len(item.key)) when keyOnly).
+func TestKeyOnlyIterator_EstimatedSizeReturnsKeyLen(t *testing.T) {
+	runBadgerTest(t, nil, func(t *testing.T, db *DB) {
+		txnSet(t, db, []byte("abc"), []byte("lots-of-value-bytes-here"), 0)
+
+		require.NoError(t, db.View(func(txn *Txn) error {
+			opt := DefaultIteratorOptions
+			opt.KeyOnly = true
+			it := txn.NewIterator(opt)
+			defer it.Close()
+			it.Rewind()
+			require.True(t, it.Valid())
+			item := it.Item()
+			require.Equal(t, int64(len("abc")), item.EstimatedSize())
+			return nil
+		}))
+	})
+}
+
+// TestKeyOnlyIterator_ValueSizeIsZero covers iterator.go:Item.ValueSize short-circuit.
+func TestKeyOnlyIterator_ValueSizeIsZero(t *testing.T) {
+	runBadgerTest(t, nil, func(t *testing.T, db *DB) {
+		txnSet(t, db, []byte("k1"), []byte("hello-world"), 0)
+
+		require.NoError(t, db.View(func(txn *Txn) error {
+			opt := DefaultIteratorOptions
+			opt.KeyOnly = true
+			it := txn.NewIterator(opt)
+			defer it.Close()
+			it.Rewind()
+			require.True(t, it.Valid())
+			require.Equal(t, int64(0), it.Item().ValueSize())
+			return nil
+		}))
+	})
+}
+
+// TestKeyOnlyIterator_ForcesPrefetchOff covers iterator.go:NewIterator forcing
+// PrefetchValues=false when KeyOnly=true.
+func TestKeyOnlyIterator_ForcesPrefetchOff(t *testing.T) {
+	runBadgerTest(t, nil, func(t *testing.T, db *DB) {
+		txnSet(t, db, []byte("k1"), []byte("v1"), 0)
+
+		require.NoError(t, db.View(func(txn *Txn) error {
+			opt := DefaultIteratorOptions
+			opt.KeyOnly = true
+			opt.PrefetchValues = true
+			opt.PrefetchSize = 100
+			it := txn.NewIterator(opt)
+			defer it.Close()
+			require.False(t, it.opt.PrefetchValues,
+				"NewIterator must force PrefetchValues=false when KeyOnly=true")
+			it.Rewind()
+			require.True(t, it.Valid())
+			return nil
+		}))
+	})
+}
+
+// TestKeyOnlyIterator_KeyMetaVersionWork verifies that the metadata methods on Item
+// still work correctly in KeyOnly mode (per the contract documented on KeyOnly).
+// Indirectly covers fill() setting item.keyOnly while still populating meta/version.
+func TestKeyOnlyIterator_KeyMetaVersionWork(t *testing.T) {
+	runBadgerTest(t, nil, func(t *testing.T, db *DB) {
+		key := []byte("hello")
+		val := []byte("world")
+		txn := db.NewTransaction(true)
+		require.NoError(t, txn.SetEntry(NewEntry(key, val).WithMeta(0x42)))
+		require.NoError(t, txn.Commit())
+
+		require.NoError(t, db.View(func(txn *Txn) error {
+			opt := DefaultIteratorOptions
+			opt.KeyOnly = true
+			it := txn.NewIterator(opt)
+			defer it.Close()
+			it.Rewind()
+			require.True(t, it.Valid())
+			item := it.Item()
+			require.Equal(t, key, item.Key())
+			require.Equal(t, byte(0x42), item.UserMeta())
+			require.NotZero(t, item.Version())
+			require.False(t, item.IsDeletedOrExpired())
+			require.Equal(t, int64(len(key)), item.KeySize())
+			require.False(t, item.DiscardEarlierVersions())
+			return nil
+		}))
+	})
+}
+
+// TestKeyOnlyIterator_PrefetchValuesFalseStillWorks covers fill() KeyOnly branch
+// without prefetch (the actual hot path; PrefetchValues=false bypasses the
+// prefetch goroutine entirely).
+func TestKeyOnlyIterator_PrefetchValuesFalseStillWorks(t *testing.T) {
+	runBadgerTest(t, nil, func(t *testing.T, db *DB) {
+		for i := 0; i < 20; i++ {
+			txnSet(t, db, []byte(fmt.Sprintf("k%02d", i)), []byte("vvv"), 0)
+		}
+
+		require.NoError(t, db.View(func(txn *Txn) error {
+			opt := DefaultIteratorOptions
+			opt.PrefetchValues = false
+			opt.KeyOnly = true
+			it := txn.NewIterator(opt)
+			defer it.Close()
+			count := 0
+			for it.Rewind(); it.Valid(); it.Next() {
+				item := it.Item()
+				require.NotEmpty(t, item.Key())
+				err := item.Value(func(v []byte) error { return nil })
+				require.True(t, errors.Is(err, ErrKeyOnlyMode))
+				count++
+			}
+			require.Equal(t, 20, count)
+			return nil
+		}))
+	})
+}
+
+// TestCanSeeInternalKeys is a table-driven unit test for the iter3 helper.
+// It is the single source of truth for which prefix shapes allow internal keys.
+func TestCanSeeInternalKeys(t *testing.T) {
+	cases := []struct {
+		name   string
+		prefix []byte
+		want   bool
+	}{
+		{"empty prefix sees everything", nil, true},
+		{"empty slice sees everything", []byte{}, true},
+		{"prefix starting with '!' (badgerPrefix[0]) may overlap", []byte("!badger"), true},
+		{"prefix starting with 0x21 (== '!') overlap", []byte{0x21, 0xff}, true},
+		{"prefix starting with 0x00 cannot overlap", []byte{0x00, 0x01, 0x02}, false},
+		{"prefix starting with 'a' cannot overlap", []byte("abc"), false},
+		{"prefix starting with high byte cannot overlap", []byte{0xff}, false},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := canSeeInternalKeys(tc.prefix)
+			require.Equal(t, tc.want, got)
+		})
+	}
+}
+
+// TestRegressionSameKeyDedup locks in the iter5 lastKey behavior: when multiple
+// versions of the same user-key exist and AllVersions=false, the iterator must
+// surface exactly one item per user-key (the freshest non-expired one). This
+// test exercises the user-key-only comparison path.
+func TestRegressionSameKeyDedup(t *testing.T) {
+	runBadgerTest(t, nil, func(t *testing.T, db *DB) {
+		// Write three versions of "k1" and three versions of "k2".
+		for _, k := range []string{"k1", "k2"} {
+			for v := 1; v <= 3; v++ {
+				txnSet(t, db, []byte(k), []byte(fmt.Sprintf("v%d", v)), 0)
+			}
+		}
+
+		require.NoError(t, db.View(func(txn *Txn) error {
+			opt := DefaultIteratorOptions
+			opt.PrefetchValues = false
+			it := txn.NewIterator(opt)
+			defer it.Close()
+			var seen []string
+			for it.Rewind(); it.Valid(); it.Next() {
+				seen = append(seen, string(it.Item().Key()))
+			}
+			require.Equal(t, []string{"k1", "k2"}, seen,
+				"non-AllVersions iterator must dedup to one item per user-key")
+			return nil
+		}))
+
+		// AllVersions=true should see all 3 versions per key.
+		require.NoError(t, db.View(func(txn *Txn) error {
+			opt := DefaultIteratorOptions
+			opt.AllVersions = true
+			opt.PrefetchValues = false
+			it := txn.NewIterator(opt)
+			defer it.Close()
+			counts := map[string]int{}
+			for it.Rewind(); it.Valid(); it.Next() {
+				counts[string(it.Item().Key())]++
+			}
+			require.Equal(t, 3, counts["k1"])
+			require.Equal(t, 3, counts["k2"])
+			return nil
+		}))
+	})
+}
+
+// TestRegressionInternalKeysHidden locks in iter3 behavior: a default user
+// iterator (no InternalAccess) must not surface badger-internal keys even when
+// the optimized canSeeInternalKeys path is taken (prefix nil or '!').
+func TestRegressionInternalKeysHidden(t *testing.T) {
+	runBadgerTest(t, nil, func(t *testing.T, db *DB) {
+		txnSet(t, db, []byte("user-key"), []byte("v"), 0)
+
+		require.NoError(t, db.View(func(txn *Txn) error {
+			it := txn.NewIterator(DefaultIteratorOptions)
+			defer it.Close()
+			for it.Rewind(); it.Valid(); it.Next() {
+				k := it.Item().Key()
+				require.False(t, isBadgerInternalKey(k),
+					"default iterator surfaced internal key %q", k)
+			}
+			return nil
+		}))
+	})
+}
+
+func isBadgerInternalKey(k []byte) bool {
+	if len(k) < len(badgerPrefix) {
+		return false
+	}
+	for i := range badgerPrefix {
+		if k[i] != badgerPrefix[i] {
+			return false
+		}
+	}
+	return true
+}
+
+// TestRegressionIsBannedNoBans locks in iter1's fast-path correctness:
+// when no namespaces have been banned, isBanned must return nil for any key
+// (regardless of NamespaceOffset). Exercises the hasAny.Load()==false branch.
+func TestRegressionIsBannedNoBans(t *testing.T) {
+	opt := getTestOptions("")
+	opt.NamespaceOffset = 0
+	runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
+		// hasAny is false at this point - never any bans on a fresh DB.
+		require.False(t, db.bannedNamespaces.hasAny.Load())
+
+		// isBanned must return nil for any key on a fresh DB.
+		key := y.KeyWithTs([]byte("hello-world-12345"), 1)
+		require.NoError(t, db.isBanned(key))
+
+		// Empty key, short key - all fine, fast path doesn't even reach the
+		// length check.
+		require.NoError(t, db.isBanned(nil))
+		require.NoError(t, db.isBanned([]byte("x")))
+	})
+}
+
+// TestRegressionIsBannedWithBan covers the slow-path: hasAny=true, key matches
+// a banned namespace.
+func TestRegressionIsBannedWithBan(t *testing.T) {
+	opt := getTestOptions("")
+	opt.NamespaceOffset = 0
+	runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
+		const ns = uint64(0x4242)
+		require.NoError(t, db.BanNamespace(ns))
+		require.True(t, db.bannedNamespaces.hasAny.Load(),
+			"hasAny must flip to true after add()")
+
+		// Key in banned namespace: ns bytes (big-endian) at offset 0 + suffix
+		// + 8B ts.
+		banned := y.KeyWithTs(append(y.U64ToBytes(ns), []byte("suffix")...), 1)
+		require.ErrorIs(t, db.isBanned(banned), ErrBannedKey)
+
+		// Key in a different namespace: not banned.
+		other := y.KeyWithTs(append(y.U64ToBytes(0x1111), []byte("suffix")...), 1)
+		require.NoError(t, db.isBanned(other))
+	})
+}
+
+// TestRegressionIsBannedShortKeyWithBans covers the slow path early-return when
+// hasAny=true but the key is too short to extract a namespace.
+func TestRegressionIsBannedShortKeyWithBans(t *testing.T) {
+	opt := getTestOptions("")
+	opt.NamespaceOffset = 0
+	runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
+		const ns = uint64(0x4242)
+		require.NoError(t, db.BanNamespace(ns))
+
+		// Key shorter than NamespaceOffset+8 should be accepted (nil) — the
+		// pre-existing semantics treat un-namespaceable keys as non-banned.
+		require.NoError(t, db.isBanned([]byte("short")))
+		require.NoError(t, db.isBanned(nil))
+	})
+}
+
+// TestRegressionIsBannedNegativeOffset covers the early-return: NamespaceOffset<0
+// short-circuits before any atomic load.
+func TestRegressionIsBannedNegativeOffset(t *testing.T) {
+	opt := getTestOptions("")
+	opt.NamespaceOffset = -1
+	runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
+		require.NoError(t, db.isBanned([]byte("anything")))
+		require.NoError(t, db.isBanned(nil))
+	})
+}
+
+// TestRegressionFillNonKeyOnly exercises the non-KeyOnly fill path (iter2's
+// hoist), verifying that values are still copied correctly when KeyOnly=false.
+// This is the path that all existing badger users hit by default.
+func TestRegressionFillNonKeyOnly(t *testing.T) {
+	runBadgerTest(t, nil, func(t *testing.T, db *DB) {
+		txnSet(t, db, []byte("k1"), []byte("expected-value-bytes"), 0)
+
+		require.NoError(t, db.View(func(txn *Txn) error {
+			opt := DefaultIteratorOptions
+			opt.PrefetchValues = false // exercise yieldItemValue path
+			it := txn.NewIterator(opt)
+			defer it.Close()
+			it.Rewind()
+			require.True(t, it.Valid())
+			got, err := it.Item().ValueCopy(nil)
+			require.NoError(t, err)
+			require.Equal(t, []byte("expected-value-bytes"), got)
+			return nil
+		}))
+	})
+}
+
+// TestRegressionPrefetchValuesTrue exercises the prefetch goroutine path,
+// confirming KeyOnly=false + PrefetchValues=true still works (iter2 + iter4
+// didn't break the default).
+func TestRegressionPrefetchValuesTrue(t *testing.T) {
+	runBadgerTest(t, nil, func(t *testing.T, db *DB) {
+		for i := 0; i < 5; i++ {
+			txnSet(t, db, []byte(fmt.Sprintf("k%d", i)), []byte(fmt.Sprintf("v%d", i)), 0)
+		}
+
+		require.NoError(t, db.View(func(txn *Txn) error {
+			opt := DefaultIteratorOptions
+			opt.PrefetchValues = true
+			opt.PrefetchSize = 10
+			it := txn.NewIterator(opt)
+			defer it.Close()
+			seen := 0
+			for it.Rewind(); it.Valid(); it.Next() {
+				item := it.Item()
+				v, err := item.ValueCopy(nil)
+				require.NoError(t, err)
+				require.NotEmpty(t, v)
+				seen++
+			}
+			require.Equal(t, 5, seen)
+			return nil
+		}))
+	})
+}