Skip to content

Commit df24915

Browse files
committed
reduce number of protobuf serializations per program
1 parent 69c88b5 commit df24915

3 files changed

Lines changed: 74 additions & 111 deletions

File tree

Sources/Fuzzilli/Database/DatabaseUtils.swift

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,19 @@ import Crypto
44

55
public class DatabaseUtils {
66

7-
public static func encodeProgramToBase64(program: Program) throws -> String {
8-
let data = try encodeProgramToProtobuf(program: program)
9-
return data.base64EncodedString()
7+
public static func prepareProgram(program: Program) throws -> (hash: String, base64: String) {
8+
if program.code.contains(where: { $0.op is JsInternalOperation }) {
9+
throw DatabaseUtilsError.programContainsInternalOperations
10+
}
11+
12+
var proto = program.asProtobuf()
13+
proto.clearParent()
14+
let data = try proto.serializedData()
15+
16+
let hash = SHA256.hash(data: data).map { String(format: "%02x", $0) }.joined()
17+
let base64 = data.base64EncodedString()
18+
19+
return (hash, base64)
1020
}
1121

1222
public static func decodeProgramFromBase64(base64: String) throws -> Program {
@@ -16,6 +26,7 @@ public class DatabaseUtils {
1626
return try decodeProgramFromProtobuf(data: data)
1727
}
1828

29+
/*
1930
public static func encodeProgramToProtobuf(program: Program) throws -> Data {
2031
// Make sure the program does not contain internal operations
2132
if program.code.contains(where: { $0.op is JsInternalOperation }) {
@@ -29,12 +40,14 @@ public class DatabaseUtils {
2940

3041
return try proto.serializedData()
3142
}
43+
*/
3244

3345
public static func decodeProgramFromProtobuf(data: Data) throws -> Program {
3446
let proto = try Fuzzilli_Protobuf_Program(serializedBytes: data)
3547
return try Program(from: proto)
3648
}
3749

50+
/*
3851
public static func calculateProgramHash(program: Program) throws -> String {
3952
// Make sure the program does not contain internal operations
4053
if program.code.contains(where: { $0.op is JsInternalOperation }) {
@@ -57,6 +70,7 @@ public class DatabaseUtils {
5770
// Convert to hex string (64 characters for SHA-256)
5871
return digest.map { String(format: "%02x", $0) }.joined()
5972
}
73+
*/
6074

6175
public static func mapExecutionOutcome(outcome: ExecutionOutcome) -> Int {
6276
switch outcome {

Sources/Fuzzilli/Database/PostgresSQLStorage.swift

Lines changed: 22 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -67,20 +67,29 @@ public actor PostgresSQLStorage {
6767

6868
public struct ProgramInput {
6969
public let program: Program
70+
public let programHash: String
71+
public let programBase64: String
7072
public let fuzzerId: Int
7173
public let mutatorNames: [String]
7274
public let contributorNames: [String]
75+
public let parentHash: String?
7376

7477
public init(
7578
program: Program,
79+
programHash: String,
80+
programBase64: String,
7681
fuzzerId: Int,
7782
mutatorNames: [String],
78-
contributorNames: [String]
83+
contributorNames: [String],
84+
parentHash: String?
7985
) {
8086
self.program = program
87+
self.programHash = programHash
88+
self.programBase64 = programBase64
8189
self.fuzzerId = fuzzerId
8290
self.mutatorNames = mutatorNames
8391
self.contributorNames = contributorNames
92+
self.parentHash = parentHash
8493
}
8594
}
8695

@@ -369,6 +378,7 @@ public actor PostgresSQLStorage {
369378
}
370379

371380
public func addProgramToBatch(_ programInput: ProgramInput) {
381+
/*
372382
guard let programHash = try? DatabaseUtils.calculateProgramHash(program: programInput.program) else {
373383
if self.enableLogging {
374384
self.logger.warning("Failed to calculate program hash, skipping program")
@@ -385,6 +395,7 @@ public actor PostgresSQLStorage {
385395
}
386396

387397
seenProgramHashes.insert(programHash)
398+
*/
388399
pendingPrograms.append(programInput)
389400
}
390401

@@ -421,6 +432,7 @@ public actor PostgresSQLStorage {
421432

422433
if !programsToStore.isEmpty {
423434
// Group by fuzzerId to use storeProgramsBatch
435+
// TODO Aleksi: Is this even necessary?
424436
let groupedPrograms = Dictionary(grouping: programsToStore, by: { $0.fuzzerId})
425437
for (fuzzerId, programInputs) in groupedPrograms {
426438
_ = try await storeProgramsBatch(programInputs: programInputs, fuzzerId: fuzzerId)
@@ -441,66 +453,8 @@ public actor PostgresSQLStorage {
441453
}
442454

443455
private func _storeProgramsBatchImpl(programInputs: [ProgramInput], fuzzerId: Int) async throws -> [String] {
444-
// Pre-calculate all hashes and sort to ensure consistent lock ordering
445-
struct PreparedProgram {
446-
let hash: String
447-
let input: ProgramInput
448-
let parentHash: String?
449-
let programData: String
450-
}
451-
452-
var preparedPrograms: [PreparedProgram] = []
453-
454-
for programInput in programInputs {
455-
let program = programInput.program
456-
457-
// Calculate program hash
458-
let programHash: String
459-
do {
460-
programHash = try DatabaseUtils.calculateProgramHash(program: program)
461-
} catch {
462-
if self.enableLogging {
463-
self.logger.warning("Failed to calculate hash for program, skipping: \(error)")
464-
}
465-
continue
466-
}
467-
468-
// Calculate parent hash if exists
469-
let parentHash: String?
470-
if let parentProgram = program.parent {
471-
do {
472-
parentHash = try DatabaseUtils.calculateProgramHash(program: parentProgram)
473-
} catch {
474-
if self.enableLogging {
475-
self.logger.warning("Failed to calculate parent hash, using nil: \(error)")
476-
}
477-
parentHash = nil
478-
}
479-
} else {
480-
parentHash = nil
481-
}
482-
483-
// Encode program (must be done after parent hash calculation)
484-
let programData: String
485-
do {
486-
programData = try DatabaseUtils.encodeProgramToBase64(program: program)
487-
} catch {
488-
if self.enableLogging {
489-
self.logger.warning("Failed to encode program with hash \(programHash), skipping: \(error)")
490-
}
491-
continue
492-
}
493-
494-
preparedPrograms.append(PreparedProgram(
495-
hash: programHash,
496-
input: programInput,
497-
parentHash: parentHash,
498-
programData: programData
499-
))
500-
}
501-
502456
// Sort by hash - ensures all workers acquire locks in the same order
503-
preparedPrograms.sort { $0.hash < $1.hash }
457+
let sortedProgramInputs = programInputs.sorted { $0.programHash < $1.programHash }
504458

505459
return try await databasePool.withConnection { connection in
506460
var programHashes: [String] = []
@@ -511,11 +465,10 @@ public actor PostgresSQLStorage {
511465
var insertedCount = 0
512466
var skippedCount = 0
513467

514-
for prepared in preparedPrograms {
515-
let programHash = prepared.hash
516-
let programInput = prepared.input
517-
let mutatorNames = programInput.mutatorNames
518-
let contributorNames = programInput.contributorNames
468+
for input in sortedProgramInputs {
469+
let programHash = input.programHash
470+
let mutatorNames = input.mutatorNames
471+
let contributorNames = input.contributorNames
519472

520473
// Format as PostgreSQL arrays: ARRAY['name1', 'name2', ...]
521474
let mutatorsArray: String
@@ -539,7 +492,7 @@ public actor PostgresSQLStorage {
539492
// Single unified INSERT into the program table (previously split between fuzzer and program tables)
540493
let programQuery = PostgresQuery(stringLiteral: """
541494
INSERT INTO program (program_hash, fuzzer_id, inserted_at, program_base64, created_at, source_mutators, contributors, parent_program_hash)
542-
VALUES ('\(programHash)', \(fuzzerId), NOW(), '\(prepared.programData)', NOW(), \(mutatorsArray), \(contributorsArray), \(prepared.parentHash != nil ? "'\(prepared.parentHash!)'" : "NULL"))
495+
VALUES ('\(programHash)', \(fuzzerId), NOW(), '\(input.programBase64)', NOW(), \(mutatorsArray), \(contributorsArray), \(input.parentHash != nil ? "'\(input.parentHash!)'" : "NULL"))
543496
ON CONFLICT (program_hash) DO NOTHING
544497
RETURNING program_hash
545498
""")
@@ -683,7 +636,7 @@ public actor PostgresSQLStorage {
683636
}
684637

685638
var allPrograms: [Program] = []
686-
var seenHashes = Set<String>()
639+
//var seenHashes = Set<String>()
687640
let batchSize = 5000 // Fetch 5k programs at a time
688641
var offset = 0
689642
var totalFetched = 0
@@ -696,6 +649,7 @@ public actor PostgresSQLStorage {
696649
}
697650

698651
// Deduplicate and add to results
652+
/*
699653
for program in batch {
700654
do {
701655
let hash = try DatabaseUtils.calculateProgramHash(program: program)
@@ -710,6 +664,7 @@ public actor PostgresSQLStorage {
710664
}
711665
}
712666
}
667+
*/
713668

714669
totalFetched += batch.count
715670

Sources/Fuzzilli/Modules/PostgreSQLSync.swift

Lines changed: 35 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ public class PostgreSQLSync: Module {
111111
self.cleanupCache(&self.executionCache)
112112
}
113113

114-
// Cache all mutator names and contributor names from ProgramGenerated event (before minimization)
114+
// Cache all mutator names and contributor names from ProgramGenerated event (before minddimization)
115115
// This works around the fact that contributors don't survive protobuf serialization
116116
fuzzer.registerEventListener(for: fuzzer.events.ProgramGenerated) { program in
117117
let programId = program.id.uuidString
@@ -149,15 +149,7 @@ public class PostgreSQLSync: Module {
149149
return
150150
}
151151

152-
let mutatorNames = self.mutatorCache[programId] ?? []
153-
let contributorNames = self.contributorCache[programId] ?? []
154-
155-
let programInput = PostgresSQLStorage.ProgramInput(
156-
program: program,
157-
fuzzerId: fuzzerId,
158-
mutatorNames: mutatorNames,
159-
contributorNames: contributorNames
160-
)
152+
let programInput = try self.prepareProgramInput(program: program, fuzzerId: fuzzerId, programId: programId)
161153
await self.storage.addProgramToBatch(programInput)
162154

163155
if let execution = execution {
@@ -200,18 +192,8 @@ public class PostgreSQLSync: Module {
200192
feedbackNexusCount = Int(evaluator.getFeedbackNexusCount())
201193
}
202194

203-
let programHash: String
204-
do {
205-
programHash = try DatabaseUtils.calculateProgramHash(program: program)
206-
} catch {
207-
if self.enableLogging {
208-
self.logger.warning("Failed to calculate program hash, skipping execution record: \(error)")
209-
}
210-
return
211-
}
212-
213195
let executionInput = PostgresSQLStorage.ExecutionInput(
214-
programHash: programHash,
196+
programHash: programInput.programHash,
215197
executionOutcomeId: outcomeId,
216198
coverageTotal: coverageTotal,
217199
edgesFound: edgesFound,
@@ -254,15 +236,7 @@ public class PostgreSQLSync: Module {
254236
return
255237
}
256238

257-
let mutatorNames = self.mutatorCache[programId] ?? []
258-
let contributorNames = self.contributorCache[programId] ?? []
259-
260-
let programInput = PostgresSQLStorage.ProgramInput(
261-
program: program,
262-
fuzzerId: fuzzerId,
263-
mutatorNames: mutatorNames,
264-
contributorNames: contributorNames
265-
)
239+
let programInput = try self.prepareProgramInput(program: program, fuzzerId: fuzzerId, programId: programId)
266240
await self.storage.addProgramToBatch(programInput)
267241

268242
// Get coverage metrics if available (crashes may still have coverage)
@@ -289,19 +263,9 @@ public class PostgreSQLSync: Module {
289263
feedbackNexusCount = Int(evaluator.getFeedbackNexusCount())
290264
}
291265

292-
let programHash: String
293-
do {
294-
programHash = try DatabaseUtils.calculateProgramHash(program: program)
295-
} catch {
296-
if self.enableLogging {
297-
self.logger.warning("Failed to calculate program hash for crash, skipping execution record: \(error)")
298-
}
299-
return
300-
}
301-
302266
// Create execution record with outcome_id = 1 (Crashed)
303267
let executionInput = PostgresSQLStorage.ExecutionInput(
304-
programHash: programHash,
268+
programHash: programInput.programHash,
305269
executionOutcomeId: 1, // Crashed
306270
coverageTotal: coverageTotal,
307271
edgesFound: edgesFound,
@@ -473,4 +437,34 @@ public class PostgreSQLSync: Module {
473437
}
474438
}
475439
}
440+
441+
private func prepareProgramInput(program: Program, fuzzerId: Int, programId: String) throws -> PostgresSQLStorage.ProgramInput {
442+
let mutatorNames = self.mutatorCache[programId] ?? []
443+
self.mutatorCache.removeValue(forKey: programId)
444+
445+
let contributorNames = self.contributorCache[programId] ?? []
446+
self.contributorCache.removeValue(forKey: programId)
447+
448+
let (programHash, programBase64) = try DatabaseUtils.prepareProgram(program: program)
449+
let parentHash: String?
450+
if let parentProgram = program.parent {
451+
parentHash = try? DatabaseUtils.prepareProgram(program: parentProgram).hash
452+
} else {
453+
parentHash = nil
454+
}
455+
456+
return PostgresSQLStorage.ProgramInput(
457+
program: program,
458+
programHash: programHash,
459+
programBase64: programBase64,
460+
fuzzerId: fuzzerId,
461+
mutatorNames: mutatorNames,
462+
contributorNames: contributorNames,
463+
parentHash: parentHash
464+
)
465+
}
466+
467+
private func prepareExecutionInput() {
468+
469+
}
476470
}

0 commit comments

Comments
 (0)