Skip to content

Commit cc3baec

Browse files
fix(pipeline): make parallel-processing CI and schema validation pass
- replace invalid progress logger API calls that crash under Node 24 - emit modules.stage.5.json in the expected object shape with a modules array - merge worker results into stage-2 modules and keep only schema-allowed fields
1 parent 999af08 commit cc3baec

1 file changed

Lines changed: 63 additions & 7 deletions

File tree

scripts/parallel-processing.js

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
* Reads modules.stage.2.json and outputs modules.stage.5.json
77
*/
88

9-
import { createLogger, createStageProgressLogger } from "../scripts/shared/logger.js";
109
import { readFile, writeFile } from "node:fs/promises";
1110
import { WorkerPool } from "../pipeline/workers/worker-pool.js";
1211
import { cpus } from "node:os";
12+
import { createLogger } from "../scripts/shared/logger.js";
1313
import process from "node:process";
1414
import { resolve } from "node:path";
1515
import { stringifyDeterministic } from "../scripts/shared/deterministic-output.js";
@@ -47,6 +47,43 @@ function getBatchSize() {
4747
return 50; // Default batch size
4848
}
4949

50+
const STAGE5_ALLOWED_KEYS = [
51+
"name",
52+
"category",
53+
"url",
54+
"id",
55+
"maintainer",
56+
"maintainerURL",
57+
"description",
58+
"outdated",
59+
"issues",
60+
"stars",
61+
"license",
62+
"hasGithubIssues",
63+
"isArchived",
64+
"lastCommit",
65+
"keywords",
66+
"tags",
67+
"image",
68+
"packageJson"
69+
];
70+
71+
function toStage5Module(module) {
72+
const entry = {};
73+
74+
for (const key of STAGE5_ALLOWED_KEYS) {
75+
if (Object.hasOwn(module, key) && typeof module[key] !== "undefined") {
76+
entry[key] = module[key];
77+
}
78+
}
79+
80+
if (!Array.isArray(entry.issues)) {
81+
entry.issues = [];
82+
}
83+
84+
return entry;
85+
}
86+
5087
async function main() {
5188
const startTime = Date.now();
5289

@@ -72,7 +109,6 @@ async function main() {
72109
});
73110

74111
// Set up progress tracking
75-
const progressLogger = createStageProgressLogger("parallel-processing", modules.length);
76112
let processedCount = 0;
77113

78114
pool.onProgress((event) => {
@@ -87,9 +123,7 @@ async function main() {
87123
}
88124
const cacheInfo = event.fromCache ? " (cached)" : "";
89125

90-
progressLogger.update(processedCount, {
91-
current: `${status} ${event.moduleId}${cacheInfo}`
92-
});
126+
logger.info(`[${processedCount}/${modules.length}] ${status} ${event.moduleId}${cacheInfo}`);
93127
}
94128
});
95129

@@ -112,11 +146,33 @@ async function main() {
112146
// Process all modules
113147
const results = await pool.processModules(modules, moduleConfig);
114148

115-
progressLogger.complete();
149+
// Stage 5 schema expects an object with a `modules` array.
150+
// Merge worker results back into stage-2 module entries to preserve
151+
// Required base fields like category and maintainerURL.
152+
const resultsById = new Map(results.map(result => [result.id, result]));
153+
const mergedModules = modules.map((module) => {
154+
const result = resultsById.get(module.id);
155+
if (!result) {
156+
return {
157+
...module,
158+
issues: [...module.issues || []],
159+
status: "failed",
160+
failurePhase: "pipeline",
161+
error: "No worker result available for module"
162+
};
163+
}
164+
165+
return {
166+
...module,
167+
...result,
168+
issues: [...result.issues || module.issues || []]
169+
};
170+
});
171+
const stage5Modules = mergedModules.map(toStage5Module);
116172

117173
// Write results to stage 5 output
118174
const stage5Path = resolve(PROJECT_ROOT, "website/data/modules.stage.5.json");
119-
const stage5Data = stringifyDeterministic(results);
175+
const stage5Data = stringifyDeterministic({ modules: stage5Modules });
120176
await writeFile(stage5Path, stage5Data, "utf-8");
121177

122178
const duration = Date.now() - startTime;

0 commit comments

Comments
 (0)