Skip to content

Commit d7d4b15

Browse files
committed
Add efficient range iteration: Ranges, UnsetRanges, NextSet, NextClear
Add Bitmap.NextSet and Bitmap.NextClear methods that return (uint32, bool), equivalent to NextValue/NextAbsentValue but designed for use in loops. Add Ranges() iter.Seq2 that yields contiguous [start, end) pairs of present values in O(number of ranges), not O(number of set bits). Uses type-specific container logic: run containers yield stored intervals directly, bitmap containers scan uint64 words with TrailingZeros/TrailingOnes, and array containers walk sorted values linearly. Cross-container ranges are merged. Add UnsetRanges(min, max) as the complement, yielding gaps within [min, max].
1 parent 8923a74 commit d7d4b15

3 files changed

Lines changed: 684 additions & 0 deletions

File tree

iter.go

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,153 @@ func Unset(b *Bitmap, min, max uint32) iter.Seq[uint32] {
4242
}
4343
}
4444
}
45+
46+
// Ranges returns an iterator over contiguous ranges of values present in the
47+
// bitmap. Each pair (start, endExclusive) represents a maximal half-open range
48+
// [start, endExclusive) where every value is present.
49+
// endExclusive is uint64 to correctly represent ranges that include MaxUint32.
50+
//
51+
// This iterates in O(number of contiguous ranges), not O(number of set bits).
52+
// Ranges that span container boundaries are merged automatically.
53+
//
54+
// See also Values for iterating individual values, and UnsetRanges for
55+
// iterating contiguous ranges of absent values.
56+
func Ranges(b *Bitmap) iter.Seq2[uint32, uint64] {
57+
return func(yield func(uint32, uint64) bool) {
58+
ra := &b.highlowcontainer
59+
keys := ra.keys
60+
containers := ra.containers
61+
n := len(keys)
62+
63+
var pendingStart, pendingEnd uint64
64+
hasPending := false
65+
66+
emit := func(rStart, rEnd uint64) bool {
67+
if hasPending && rStart <= pendingEnd {
68+
if rEnd > pendingEnd {
69+
pendingEnd = rEnd
70+
}
71+
return true
72+
}
73+
if hasPending {
74+
if !yield(uint32(pendingStart), pendingEnd) {
75+
return false
76+
}
77+
}
78+
pendingStart = rStart
79+
pendingEnd = rEnd
80+
hasPending = true
81+
return true
82+
}
83+
84+
for idx := 0; idx < n; idx++ {
85+
hs := uint64(keys[idx]) << 16
86+
c := containers[idx]
87+
88+
switch t := c.(type) {
89+
case *runContainer16:
90+
for _, iv := range t.iv {
91+
if !emit(hs+uint64(iv.start), hs+uint64(iv.start)+uint64(iv.length)+1) {
92+
return
93+
}
94+
}
95+
96+
case *bitmapContainer:
97+
bm := t.bitmap
98+
length := uint(len(bm))
99+
pos := uint(0)
100+
101+
for pos < length {
102+
if bm[pos] == 0 {
103+
pos++
104+
continue
105+
}
106+
107+
w := bm[pos]
108+
lo := uint(countTrailingZeros(w))
109+
bitStart := pos*64 + lo
110+
111+
ones := uint(countTrailingOnes(w >> lo))
112+
if lo+ones < 64 {
113+
if !emit(hs|uint64(bitStart), hs|uint64(bitStart+ones)) {
114+
return
115+
}
116+
pos = (bitStart + ones) / 64
117+
} else {
118+
pos++
119+
for pos < length && bm[pos] == 0xFFFFFFFFFFFFFFFF {
120+
pos++
121+
}
122+
var bitEnd uint
123+
if pos < length {
124+
bitEnd = pos*64 + uint(countTrailingOnes(bm[pos]))
125+
} else {
126+
bitEnd = length * 64
127+
}
128+
if !emit(hs|uint64(bitStart), hs|uint64(bitEnd)) {
129+
return
130+
}
131+
}
132+
}
133+
134+
case *arrayContainer:
135+
content := t.content
136+
i := 0
137+
for i < len(content) {
138+
start := uint64(content[i])
139+
end := start + 1
140+
i++
141+
for i < len(content) && uint64(content[i]) == end {
142+
end++
143+
i++
144+
}
145+
if !emit(hs|start, hs|end) {
146+
return
147+
}
148+
}
149+
}
150+
}
151+
152+
if hasPending {
153+
yield(uint32(pendingStart), pendingEnd)
154+
}
155+
}
156+
}
157+
158+
// UnsetRanges returns an iterator over contiguous ranges of values NOT in the
159+
// bitmap, within the closed range [min, max]. Each pair (start, endExclusive)
160+
// represents a maximal half-open range [start, endExclusive) where no value is
161+
// present in the bitmap. endExclusive is uint64 to correctly represent ranges
162+
// that include MaxUint32.
163+
//
164+
// This iterates in O(number of contiguous ranges), not O(number of unset bits).
165+
// It works by iterating the set ranges and yielding the gaps between them.
166+
//
167+
// See also Unset for iterating individual absent values.
168+
func UnsetRanges(b *Bitmap, min, max uint32) iter.Seq2[uint32, uint64] {
169+
limit := uint64(max) + 1
170+
return func(yield func(uint32, uint64) bool) {
171+
pos := uint64(min)
172+
for start, end := range Ranges(b) {
173+
if uint64(start) >= limit {
174+
break
175+
}
176+
if end <= pos {
177+
continue
178+
}
179+
if uint64(start) > pos {
180+
gapEnd := uint64(start)
181+
if gapEnd > limit {
182+
gapEnd = limit
183+
}
184+
if !yield(uint32(pos), gapEnd) {
185+
return
186+
}
187+
}
188+
pos = end
189+
}
190+
if pos < limit {
191+
yield(uint32(pos), limit)
192+
}
193+
}
194+
}

0 commit comments

Comments
 (0)