Skip to content

Commit 6528e03

Browse files
authored
Merge pull request #86 from sonukapoor/feature/issue-78-sync-performance
[Enhancement] Speed up advisory sync ingestion for growing OSV datasets
2 parents 7f698db + d9ab6b6 commit 6528e03

2 files changed

Lines changed: 93 additions & 70 deletions

File tree

src/advisory/local-db.ts

Lines changed: 87 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,13 @@ const SCHEMA_SQL = `
5050

5151
export class LocalAdvisoryDatabase {
5252
private readonly db: Database.Database;
53+
private readonly upsertAdvisoryStatement: Database.Statement;
54+
private readonly deleteRangesStatement: Database.Statement;
55+
private readonly insertRangeStatement: Database.Statement;
56+
private readonly setMetadataStatement: Database.Statement;
57+
private readonly getMetadataStatement: Database.Statement;
58+
private readonly getVulnerabilityStatement: Database.Statement;
59+
private readonly findMatchingIdsStatement: Database.Statement;
5360

5461
constructor(
5562
private readonly dbPath: string,
@@ -64,44 +71,7 @@ export class LocalAdvisoryDatabase {
6471
this.db = new Database(dbPath, readonly ? { readonly: true, fileMustExist: true } : undefined);
6572
this.db.pragma("foreign_keys = ON");
6673
this.db.exec(SCHEMA_SQL);
67-
}
68-
69-
close(): void {
70-
this.db.close();
71-
}
72-
73-
setMetadata(metadata: AdvisoryDbMetadata): void {
74-
this.db.prepare(`
75-
INSERT INTO advisory_db_metadata (id, last_sync_at, source_url)
76-
VALUES (1, @last_sync_at, @source_url)
77-
ON CONFLICT(id) DO UPDATE SET
78-
last_sync_at = excluded.last_sync_at,
79-
source_url = excluded.source_url
80-
`).run({
81-
last_sync_at: metadata.lastSyncAt,
82-
source_url: metadata.sourceUrl,
83-
});
84-
}
85-
86-
getMetadata(): AdvisoryDbMetadata {
87-
const row = this.db.prepare(`
88-
SELECT last_sync_at, source_url
89-
FROM advisory_db_metadata
90-
WHERE id = 1
91-
`).get() as { last_sync_at: string | null; source_url: string | null } | undefined;
92-
93-
return {
94-
lastSyncAt: row?.last_sync_at ?? null,
95-
sourceUrl: row?.source_url ?? null,
96-
};
97-
}
98-
99-
upsertVulnerability(vuln: OsvVuln): void {
100-
const advisoryRows = deriveAdvisoryPackageRows(vuln);
101-
const advisoryJson = JSON.stringify(vuln);
102-
const aliasesJson = JSON.stringify(vuln.aliases ?? []);
103-
104-
const upsertAdvisory = this.db.prepare(`
74+
this.upsertAdvisoryStatement = this.db.prepare(`
10575
INSERT INTO advisories (id, modified_at, aliases_json, summary, osv_json)
10676
VALUES (@id, @modified_at, @aliases_json, @summary, @osv_json)
10777
ON CONFLICT(id) DO UPDATE SET
@@ -110,11 +80,11 @@ export class LocalAdvisoryDatabase {
11080
summary = excluded.summary,
11181
osv_json = excluded.osv_json
11282
`);
113-
const deleteRanges = this.db.prepare(`
83+
this.deleteRangesStatement = this.db.prepare(`
11484
DELETE FROM advisory_packages
11585
WHERE advisory_id = ?
11686
`);
117-
const insertRange = this.db.prepare(`
87+
this.insertRangeStatement = this.db.prepare(`
11888
INSERT INTO advisory_packages (
11989
advisory_id,
12090
ecosystem,
@@ -131,31 +101,89 @@ export class LocalAdvisoryDatabase {
131101
@last_affected
132102
)
133103
`);
104+
this.setMetadataStatement = this.db.prepare(`
105+
INSERT INTO advisory_db_metadata (id, last_sync_at, source_url)
106+
VALUES (1, @last_sync_at, @source_url)
107+
ON CONFLICT(id) DO UPDATE SET
108+
last_sync_at = excluded.last_sync_at,
109+
source_url = excluded.source_url
110+
`);
111+
this.getMetadataStatement = this.db.prepare(`
112+
SELECT last_sync_at, source_url
113+
FROM advisory_db_metadata
114+
WHERE id = 1
115+
`);
116+
this.getVulnerabilityStatement = this.db.prepare(`
117+
SELECT osv_json
118+
FROM advisories
119+
WHERE id = ?
120+
`);
121+
this.findMatchingIdsStatement = this.db.prepare(`
122+
SELECT advisory_id, introduced, fixed, last_affected
123+
FROM advisory_packages
124+
WHERE ecosystem = ? AND package_name = ?
125+
`);
126+
}
127+
128+
close(): void {
129+
this.db.close();
130+
}
134131

135-
const transaction = this.db.transaction(() => {
136-
upsertAdvisory.run({
137-
id: vuln.id,
138-
modified_at: null,
139-
aliases_json: aliasesJson,
140-
summary: vuln.summary ?? null,
141-
osv_json: advisoryJson,
142-
});
143-
144-
deleteRanges.run(vuln.id);
145-
for (const row of advisoryRows) {
146-
insertRange.run(row);
132+
setMetadata(metadata: AdvisoryDbMetadata): void {
133+
this.setMetadataStatement.run({
134+
last_sync_at: metadata.lastSyncAt,
135+
source_url: metadata.sourceUrl,
136+
});
137+
}
138+
139+
getMetadata(): AdvisoryDbMetadata {
140+
const row = this.getMetadataStatement.get() as { last_sync_at: string | null; source_url: string | null } | undefined;
141+
142+
return {
143+
lastSyncAt: row?.last_sync_at ?? null,
144+
sourceUrl: row?.source_url ?? null,
145+
};
146+
}
147+
148+
upsertVulnerability(vuln: OsvVuln): void {
149+
const transaction = this.db.transaction((item: OsvVuln) => {
150+
this.upsertVulnerabilityInternal(item);
151+
});
152+
153+
transaction(vuln);
154+
}
155+
156+
bulkUpsertVulnerabilities(vulns: Iterable<OsvVuln>): void {
157+
const transaction = this.db.transaction((items: OsvVuln[]) => {
158+
for (const item of items) {
159+
this.upsertVulnerabilityInternal(item);
147160
}
148161
});
149162

150-
transaction();
163+
transaction([...vulns]);
164+
}
165+
166+
private upsertVulnerabilityInternal(vuln: OsvVuln): void {
167+
const advisoryRows = deriveAdvisoryPackageRows(vuln);
168+
const advisoryJson = JSON.stringify(vuln);
169+
const aliasesJson = JSON.stringify(vuln.aliases ?? []);
170+
171+
this.upsertAdvisoryStatement.run({
172+
id: vuln.id,
173+
modified_at: null,
174+
aliases_json: aliasesJson,
175+
summary: vuln.summary ?? null,
176+
osv_json: advisoryJson,
177+
});
178+
179+
this.deleteRangesStatement.run(vuln.id);
180+
for (const row of advisoryRows) {
181+
this.insertRangeStatement.run(row);
182+
}
151183
}
152184

153185
getVulnerability(id: string): OsvVuln | null {
154-
const row = this.db.prepare(`
155-
SELECT osv_json
156-
FROM advisories
157-
WHERE id = ?
158-
`).get(id) as { osv_json: string } | undefined;
186+
const row = this.getVulnerabilityStatement.get(id) as { osv_json: string } | undefined;
159187

160188
if (!row) return null;
161189
return JSON.parse(row.osv_json) as OsvVuln;
@@ -166,11 +194,7 @@ export class LocalAdvisoryDatabase {
166194
return [];
167195
}
168196

169-
const rows = this.db.prepare(`
170-
SELECT advisory_id, introduced, fixed, last_affected
171-
FROM advisory_packages
172-
WHERE ecosystem = ? AND package_name = ?
173-
`).all(pkg.ecosystem, pkg.name) as AdvisoryRangeRow[];
197+
const rows = this.findMatchingIdsStatement.all(pkg.ecosystem, pkg.name) as AdvisoryRangeRow[];
174198

175199
const ids = new Set<string>();
176200
for (const row of rows) {

src/advisory/osv-sync.ts

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ export async function syncOsvAdvisories(
123123
await yieldToEventLoop();
124124

125125
const archiveEntries = unzipSync(zippedBytes);
126-
const advisoryEntries = Object.keys(archiveEntries).filter(entryName => entryName.endsWith(".json"));
126+
const advisoryEntries = Object.entries(archiveEntries).filter(([entryName]) => entryName.endsWith(".json"));
127127
onProgress?.({
128128
phase: "extract",
129129
totalEntries: Object.keys(archiveEntries).length,
@@ -134,21 +134,18 @@ export async function syncOsvAdvisories(
134134
const db = new LocalAdvisoryDatabase(dbPath);
135135

136136
try {
137+
const parsedVulns: OsvVuln[] = [];
137138
let advisoryCount = 0;
138139
const progressInterval = 250;
139140

140-
for (const [entryName, bytes] of Object.entries(archiveEntries)) {
141-
if (!entryName.endsWith(".json")) {
142-
continue;
143-
}
144-
141+
for (const [, bytes] of advisoryEntries) {
145142
const text = Buffer.from(bytes).toString("utf8");
146143
const vuln = JSON.parse(text) as OsvVuln;
147144
if (!vuln.id) {
148145
continue;
149146
}
150147

151-
db.upsertVulnerability(vuln);
148+
parsedVulns.push(vuln);
152149
advisoryCount += 1;
153150

154151
if (
@@ -166,6 +163,8 @@ export async function syncOsvAdvisories(
166163
}
167164
}
168165

166+
db.bulkUpsertVulnerabilities(parsedVulns);
167+
169168
db.setMetadata({
170169
lastSyncAt: new Date().toISOString(),
171170
sourceUrl,

0 commit comments

Comments
 (0)