From 1ad4bb13837f610d1b20a29d038d5dad58d97737 Mon Sep 17 00:00:00 2001 From: adibarra <93070681+adibarra@users.noreply.github.com> Date: Thu, 11 Jun 2026 11:30:28 -0500 Subject: [PATCH] fix(ingest): tolerate missing artifacts and reuse-bundle collisions --- .github/workflows/ingest-results.yml | 29 ++++++++++++------- .../db/src/etl/reused-ingest-metadata.test.ts | 14 +++++++-- packages/db/src/etl/reused-ingest-metadata.ts | 6 +++- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/.github/workflows/ingest-results.yml b/.github/workflows/ingest-results.yml index 1b4f7d9f..ca0589ca 100644 --- a/.github/workflows/ingest-results.yml +++ b/.github/workflows/ingest-results.yml @@ -56,23 +56,31 @@ jobs: sleep "$attempt" done if [ "$ok" = false ]; then - echo "::error::Failed to download artifact after 3 attempts: ${name} — skipping" + echo "::warning::Failed to download artifact after 3 attempts: ${name} — skipping" rm -f artifact.zip - echo 1 >> "$ARTIFACTS_PATH/.failures" + echo "$name" >> "$ARTIFACTS_PATH/.failures" continue fi mkdir -p "${ARTIFACTS_PATH}/${name}" - unzip -o artifact.zip -d "${ARTIFACTS_PATH}/${name}" - rm artifact.zip + if ! unzip -o artifact.zip -d "${ARTIFACTS_PATH}/${name}"; then + echo "::warning::Failed to extract artifact: ${name} — skipping" + rm -rf "${ARTIFACTS_PATH:?}/${name}" + echo "$name" >> "$ARTIFACTS_PATH/.failures" + fi + rm -f artifact.zip done - echo "Downloaded artifacts:" - ls "$ARTIFACTS_PATH/" - if [ -f "$ARTIFACTS_PATH/.failures" ]; then count=$(wc -l < "$ARTIFACTS_PATH/.failures") rm "$ARTIFACTS_PATH/.failures" - echo "::error::${count} artifact(s) failed to download" + echo "::warning::${count} artifact(s) failed to download; ingesting what's available" + fi + + echo "Downloaded artifacts:" + ls "$ARTIFACTS_PATH/" + + if [ -z "$(ls -A "$ARTIFACTS_PATH")" ]; then + echo "::error::No artifacts could be downloaded from run ${RUN_ID}" exit 1 fi @@ -92,8 +100,9 @@ jobs: name=$(basename "$child") dest="$ARTIFACTS_PATH/$name" if [ -e "$dest" ]; then - echo "::error::Cannot flatten reused artifact '$name'; destination already exists" - exit 1 + echo "::warning::Skipping reused artifact '$name'; the run has a fresher copy" + rm -rf "$child" + continue fi mv "$child" "$dest" echo " $name" diff --git a/packages/db/src/etl/reused-ingest-metadata.test.ts b/packages/db/src/etl/reused-ingest-metadata.test.ts index 271de25b..140fa438 100644 --- a/packages/db/src/etl/reused-ingest-metadata.test.ts +++ b/packages/db/src/etl/reused-ingest-metadata.test.ts @@ -113,13 +113,23 @@ describe('flattenReusedIngestArtifactBundle', () => { expect(readReusedIngestMetadata(root)?.sourceRunId).toBe('25763435778'); }); - it('rejects flattening when it would overwrite an existing artifact', () => { + it('keeps the run-local artifact when a reused artifact collides', () => { const root = tempDir(); fs.mkdirSync(path.join(root, 'results_bmk')); + fs.writeFileSync(path.join(root, 'results_bmk', 'fresh.json'), '[]'); fs.mkdirSync(path.join(root, 'reused-ingest-artifacts', 'results_bmk'), { recursive: true, }); + fs.writeFileSync( + path.join(root, 'reused-ingest-artifacts', 'results_bmk', 'reused.json'), + '[]', + ); + fs.mkdirSync(path.join(root, 'reused-ingest-artifacts', 'run-stats'), { recursive: true }); - expect(() => flattenReusedIngestArtifactBundle(root)).toThrow(/destination already exists/u); + expect(flattenReusedIngestArtifactBundle(root)).toEqual(['run-stats']); + expect(fs.existsSync(path.join(root, 'reused-ingest-artifacts'))).toBe(false); + expect(fs.existsSync(path.join(root, 'run-stats'))).toBe(true); + expect(fs.existsSync(path.join(root, 'results_bmk', 'fresh.json'))).toBe(true); + expect(fs.existsSync(path.join(root, 'results_bmk', 'reused.json'))).toBe(false); }); }); diff --git a/packages/db/src/etl/reused-ingest-metadata.ts b/packages/db/src/etl/reused-ingest-metadata.ts index 9396495d..715c720b 100644 --- a/packages/db/src/etl/reused-ingest-metadata.ts +++ b/packages/db/src/etl/reused-ingest-metadata.ts @@ -19,7 +19,11 @@ export function flattenReusedIngestArtifactBundle(rootDir: string): string[] { const source = path.join(bundleDir, name); const dest = path.join(rootDir, name); if (fs.existsSync(dest)) { - throw new Error(`Cannot flatten reused artifact '${name}'; destination already exists`); + // The run re-produced this artifact itself; the fresh copy wins over + // the one reused from the source run. + console.warn(` [WARN] Skipping reused artifact '${name}'; the run has a fresher copy`); + fs.rmSync(source, { recursive: true, force: true }); + continue; } fs.renameSync(source, dest); moved.push(name);