Skip to content

Commit 2dfca88

Browse files
add find all function
1 parent 31c6929 commit 2dfca88

3 files changed

Lines changed: 97 additions & 0 deletions

File tree

recordio/simd/magic_number_search.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,32 @@ func FindMagicNumber(data []byte, off int) int {
1919
}
2020
return -1
2121
}
22+
23+
// FindAllMagicNumbers finds all occurrences of the magic number pattern in the data,
24+
// starting from the given offset. Returns a slice of all offsets where the pattern was found.
25+
func FindAllMagicNumbers(data []byte, off int) []int {
26+
if len(data) < 3 {
27+
return nil
28+
}
29+
if off >= len(data) || off < 0 {
30+
return nil
31+
}
32+
33+
var results []int
34+
pos := off
35+
36+
for {
37+
next := FindMagicNumber(data, pos)
38+
if next < 0 {
39+
break
40+
}
41+
results = append(results, next)
42+
// Start searching from the next position after this match
43+
pos = next + 1
44+
if pos >= len(data)-2 {
45+
break
46+
}
47+
}
48+
49+
return results
50+
}

recordio/simd/magic_number_search_bench_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,42 @@ func BenchmarkFindMagicNumber(b *testing.B) {
4242
})
4343
}
4444
}
45+
46+
func BenchmarkFindAllMagicNumbers(b *testing.B) {
47+
// Create 1GB buffer with magic numbers every 50KB
48+
const dataSize = 1024 * 1024 * 1024 // 1GB
49+
const markerInterval = 50 * 1024 // 50KB
50+
51+
data := make([]byte, dataSize)
52+
pattern := []byte{145, 141, 76}
53+
54+
// Place magic numbers every 50KB
55+
for i := 0; i < len(data)-3; i += markerInterval {
56+
data[i] = pattern[0]
57+
data[i+1] = pattern[1]
58+
data[i+2] = pattern[2]
59+
}
60+
61+
// Place one at the end to ensure we find something
62+
if len(data) >= 3 {
63+
data[len(data)-3] = pattern[0]
64+
data[len(data)-2] = pattern[1]
65+
data[len(data)-1] = pattern[2]
66+
}
67+
68+
// Calculate expected number of matches
69+
// Markers placed at: 0, 50KB, 100KB, ..., n*50KB where n*50KB < dataSize-3
70+
// Plus one at the end (dataSize-3)
71+
expectedMatches := (dataSize-3)/markerInterval + 2 // floor division + loop markers + end marker
72+
73+
b.ResetTimer()
74+
b.ReportAllocs()
75+
b.SetBytes(dataSize) // Report bandwidth in GB/s
76+
77+
for i := 0; i < b.N; i++ {
78+
results := FindAllMagicNumbers(data, 0)
79+
if len(results) != expectedMatches {
80+
b.Fatalf("expected %d matches, got %d", expectedMatches, len(results))
81+
}
82+
}
83+
}

recordio/simd/magic_number_search_cgo.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,32 @@ func FindMagicNumber(data []byte, off int) int {
9191

9292
return cgo_find_magic_numbers_scalar(data, off)
9393
}
94+
95+
// FindAllMagicNumbers finds all occurrences of the magic number pattern in the data,
96+
// starting from the given offset. Returns a slice of all offsets where the pattern was found.
97+
func FindAllMagicNumbers(data []byte, off int) []int {
98+
if len(data) < 3 {
99+
return nil
100+
}
101+
if off >= len(data) || off < 0 {
102+
return nil
103+
}
104+
105+
var results []int
106+
pos := off
107+
108+
for {
109+
next := FindMagicNumber(data, pos)
110+
if next < 0 {
111+
break
112+
}
113+
results = append(results, next)
114+
// Start searching from the next position after this match
115+
pos = next + 1
116+
if pos >= len(data)-2 {
117+
break
118+
}
119+
}
120+
121+
return results
122+
}

0 commit comments

Comments
 (0)