Skip to content

Commit 11fdbb8

Browse files
authored
Merge pull request #11 from BlackVectorOps/perf/optimize-indexing-loop-9245333493878548011
⚡ Optimize indexing loop by batching signature additions
2 parents 45bb265 + e1fb243 commit 11fdbb8

3 files changed

Lines changed: 116 additions & 8 deletions

File tree

internal/cli/index.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,13 @@ func RunIndexJSON(target string, results []diff.FingerprintResult, name, severit
166166
Created: time.Now().Format("2006-01-02"),
167167
}
168168

169-
scanner.AddSignature(&sig)
170169
indexed = append(indexed, sig)
171170
}
172171

172+
if err := scanner.AddSignatures(indexed); err != nil {
173+
return nil, 0, err
174+
}
175+
173176
if err := scanner.SaveDatabase(dbPath); err != nil {
174177
return nil, 0, err
175178
}

pkg/storage/jsondb/json_store.go

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,18 @@ func (s *Scanner) SaveDatabase(path string) error {
177177
return nil
178178
}
179179

180+
// Generates a secure random ID if the signature doesn't have one.
181+
func (s *Scanner) ensureID(sig *detection.Signature) error {
182+
if sig.ID == "" {
183+
b := make([]byte, 8)
184+
if _, err := rand.Read(b); err != nil {
185+
return fmt.Errorf("failed to generate secure random ID: %w", err)
186+
}
187+
sig.ID = fmt.Sprintf("SFW-AUTO-%s", hex.EncodeToString(b))
188+
}
189+
return nil
190+
}
191+
180192
// Adds a new signature to the database.
181193
// We use crypto/rand for ID generation because math/rand is deterministic
182194
// and we don't want ID collisions if the seed isn't set properly.
@@ -195,20 +207,44 @@ func (s *Scanner) AddSignature(sig *detection.Signature) error {
195207
}
196208
}
197209

198-
// Generate ID if not provided using secure entropy.
199-
if sig.ID == "" {
200-
b := make([]byte, 8)
201-
if _, err := rand.Read(b); err != nil {
202-
return fmt.Errorf("failed to generate secure random ID: %w", err)
203-
}
204-
sig.ID = fmt.Sprintf("SFW-AUTO-%s", hex.EncodeToString(b))
210+
if err := s.ensureID(sig); err != nil {
211+
return err
205212
}
206213

207214
// Append copies the struct value.
208215
s.db.Signatures = append(s.db.Signatures, *sig)
209216
return nil
210217
}
211218

219+
// Adds multiple signatures to the database in a batch.
220+
// This reduces locking overhead when indexing many files.
221+
func (s *Scanner) AddSignatures(sigs []detection.Signature) error {
222+
s.mu.Lock()
223+
defer s.mu.Unlock()
224+
225+
if len(sigs) == 0 {
226+
return nil
227+
}
228+
229+
if s.db == nil {
230+
s.db = &detection.SignatureDatabase{
231+
Version: "1.0",
232+
Description: "Semantic Firewall Malware Signature Database",
233+
}
234+
}
235+
236+
for i := range sigs {
237+
sig := &sigs[i]
238+
239+
if err := s.ensureID(sig); err != nil {
240+
return err
241+
}
242+
243+
s.db.Signatures = append(s.db.Signatures, *sig)
244+
}
245+
return nil
246+
}
247+
212248
// Retrieves a signature by ID.
213249
// Returns a deep copy to prevent the caller from modifying the internal
214250
// database state without a lock. Shared mutable state is the root of all evil.
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package jsondb
2+
3+
import (
4+
"fmt"
5+
"testing"
6+
7+
"github.com/BlackVectorOps/semantic_firewall/v3/pkg/detection"
8+
)
9+
10+
func TestAddSignatures(t *testing.T) {
11+
s := NewScanner()
12+
sigs := []detection.Signature{
13+
{Name: "Sig1"},
14+
{Name: "Sig2"},
15+
}
16+
if err := s.AddSignatures(sigs); err != nil {
17+
t.Fatalf("AddSignatures failed: %v", err)
18+
}
19+
20+
if len(s.db.Signatures) != 2 {
21+
t.Errorf("Expected 2 signatures, got %d", len(s.db.Signatures))
22+
}
23+
if s.db.Signatures[0].Name != "Sig1" {
24+
t.Errorf("Expected Sig1, got %s", s.db.Signatures[0].Name)
25+
}
26+
// Verify IDs were generated
27+
if s.db.Signatures[0].ID == "" {
28+
t.Error("ID should have been generated for Sig1")
29+
}
30+
}
31+
32+
const count = 10000
33+
34+
func BenchmarkAddSignatureLoop(b *testing.B) {
35+
sigs := make([]detection.Signature, count)
36+
for i := 0; i < count; i++ {
37+
sigs[i] = detection.Signature{
38+
ID: fmt.Sprintf("SIG-%d", i),
39+
Name: fmt.Sprintf("Signature %d", i),
40+
}
41+
}
42+
43+
b.ResetTimer()
44+
for i := 0; i < b.N; i++ {
45+
s := NewScanner()
46+
s.db.Signatures = make([]detection.Signature, 0, count)
47+
48+
for j := 0; j < count; j++ {
49+
_ = s.AddSignature(&sigs[j])
50+
}
51+
}
52+
}
53+
54+
func BenchmarkAddSignaturesBatch(b *testing.B) {
55+
sigs := make([]detection.Signature, count)
56+
for i := 0; i < count; i++ {
57+
sigs[i] = detection.Signature{
58+
ID: fmt.Sprintf("SIG-%d", i),
59+
Name: fmt.Sprintf("Signature %d", i),
60+
}
61+
}
62+
63+
b.ResetTimer()
64+
for i := 0; i < b.N; i++ {
65+
s := NewScanner()
66+
s.db.Signatures = make([]detection.Signature, 0, count)
67+
_ = s.AddSignatures(sigs)
68+
}
69+
}

0 commit comments

Comments
 (0)