Skip to content

Commit 3814e33

Browse files
tofikwestclaude
andcommitted
fix(evidence-export): address review feedback on streaming export
Four issues from the PR review, all legitimate: - Attachment streamer treated every S3 failure as a missing object. Now only NoSuchKey / HTTP 404 produce a `_MISSING_*.txt` placeholder. Network, permission, and throttling errors rethrow so the archive aborts and the user sees a real failure instead of a silently-incomplete bundle. - `streamOrganizationEvidenceZip` was throwing NotFoundException from the async populate task, which fired after headers were already sent — the client got a broken stream instead of a 404. Hoisted the empty-content check to pre-flight so it becomes a proper HTTP 404. - Controllers now listen for client disconnect (`req.on('close')`) and abort the archive so cancelled downloads stop consuming backend resources (S3 fetches, background populate task). - Org populate no longer buffers all task summaries into an array before writing. Each task is streamed into the archive as it's processed, and only a lightweight manifest (id / title / counts) is accumulated. Manifest is written last. Tests: 31 passing (was 29) — added AccessDenied rethrow, client-disconnect abort. Pre-flight 404 test now asserts the throw is synchronous and no archive is created. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 1bc86d8 commit 3814e33

5 files changed

Lines changed: 242 additions & 95 deletions

File tree

apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,12 @@ export function createFilenameTracker(): (rawName: string) => string {
7171

7272
/**
7373
* Append a single attachment to the archive by streaming its S3 body.
74-
* If the object is missing (deleted from S3 but DB row still exists), writes a
75-
* plaintext placeholder so the bundle remains auditable instead of failing.
74+
*
75+
* - Genuine missing-object errors (`NoSuchKey` / HTTP 404) → write a
76+
* `_MISSING_<name>.txt` placeholder so the bundle stays auditable.
77+
* - All other failures (network, permissions, throttling, empty body) → rethrow
78+
* so the archive aborts and the user sees a real failure instead of silently
79+
* receiving an incomplete export.
7680
*/
7781
export async function appendAttachmentToArchive(params: {
7882
archive: Archiver;
@@ -83,14 +87,11 @@ export async function appendAttachmentToArchive(params: {
8387
const { archive, attachment, folderPath, uniqueName } = params;
8488

8589
if (!s3Client || !BUCKET_NAME) {
86-
logger.warn(
87-
`S3 client unavailable — attachment ${attachment.id} skipped with placeholder`,
88-
);
89-
archive.append(
90-
buildMissingPlaceholder(attachment, 'S3 client not configured'),
91-
{ name: `${folderPath}/_MISSING_${uniqueName(attachment.name)}.txt` },
90+
// Misconfiguration at process level — fail the whole export, don't silently
91+
// produce placeholders for every attachment.
92+
throw new Error(
93+
'S3 client or bucket not configured; cannot stream attachments',
9294
);
93-
return;
9495
}
9596

9697
try {
@@ -114,6 +115,15 @@ export async function appendAttachmentToArchive(params: {
114115
name: `${folderPath}/${uniqueName(attachment.name)}`,
115116
});
116117
} catch (error) {
118+
if (!isS3MissingObjectError(error)) {
119+
logger.error(
120+
`Failed to fetch attachment ${attachment.id} (key=${attachment.url}): ${
121+
error instanceof Error ? error.message : String(error)
122+
}`,
123+
);
124+
throw error;
125+
}
126+
117127
const message = error instanceof Error ? error.message : String(error);
118128
logger.warn(
119129
`Missing S3 object for attachment ${attachment.id} (key=${attachment.url}): ${message}`,
@@ -124,6 +134,24 @@ export async function appendAttachmentToArchive(params: {
124134
}
125135
}
126136

137+
/**
138+
* True only for "the object does not exist" — NoSuchKey or HTTP 404.
139+
* Everything else (AccessDenied, SlowDown, NetworkError, timeouts) is treated
140+
* as a real failure that should surface, not a silent skip.
141+
*/
142+
function isS3MissingObjectError(error: unknown): boolean {
143+
if (!error || typeof error !== 'object') return false;
144+
const err = error as {
145+
name?: string;
146+
Code?: string;
147+
$metadata?: { httpStatusCode?: number };
148+
};
149+
if (err.name === 'NoSuchKey' || err.name === 'NotFound') return true;
150+
if (err.Code === 'NoSuchKey' || err.Code === 'NotFound') return true;
151+
if (err.$metadata?.httpStatusCode === 404) return true;
152+
return false;
153+
}
154+
127155
function buildMissingPlaceholder(
128156
attachment: TaskAttachment,
129157
reason: string,

apps/api/src/tasks/evidence-export/evidence-export.controller.spec.ts

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,20 +50,23 @@ function makeFakeArchive() {
5050
}
5151

5252
function makeFakeResponse() {
53-
const res: {
54-
setHeader: jest.Mock;
55-
status: jest.Mock;
56-
end: jest.Mock;
57-
headersSent: boolean;
58-
} = {
53+
const emitter = new EventEmitter();
54+
const res = Object.assign(emitter, {
5955
setHeader: jest.fn(),
60-
status: jest.fn(() => res),
56+
status: jest.fn(function (this: unknown) {
57+
return res;
58+
}),
6159
end: jest.fn(),
6260
headersSent: false,
63-
};
61+
writableEnded: false,
62+
});
6463
return res;
6564
}
6665

66+
function makeFakeRequest() {
67+
return new EventEmitter();
68+
}
69+
6770
describe('EvidenceExportController', () => {
6871
let controller: EvidenceExportController;
6972
let service: jest.Mocked<
@@ -106,12 +109,14 @@ describe('EvidenceExportController', () => {
106109
archive: archive as unknown as import('archiver').Archiver,
107110
filename: 'acme_mytask_evidence_2026-04-22.zip',
108111
});
112+
const req = makeFakeRequest();
109113
const res = makeFakeResponse();
110114

111115
await controller.exportTaskEvidenceZip(
112116
'org_1',
113117
'tsk_1',
114118
'true',
119+
req as unknown as import('express').Request,
115120
res as unknown as import('express').Response,
116121
);
117122

@@ -138,12 +143,14 @@ describe('EvidenceExportController', () => {
138143
archive: archive as unknown as import('archiver').Archiver,
139144
filename: 'f.zip',
140145
});
146+
const req = makeFakeRequest();
141147
const res = makeFakeResponse();
142148

143149
await controller.exportTaskEvidenceZip(
144150
'org_1',
145151
'tsk_1',
146152
undefined as unknown as string,
153+
req as unknown as import('express').Request,
147154
res as unknown as import('express').Response,
148155
);
149156

@@ -153,6 +160,31 @@ describe('EvidenceExportController', () => {
153160
{ includeRawJson: false },
154161
);
155162
});
163+
164+
it('aborts the archive when the client disconnects mid-stream', async () => {
165+
const archive = makeFakeArchive();
166+
service.streamTaskEvidenceZip.mockResolvedValue({
167+
archive: archive as unknown as import('archiver').Archiver,
168+
filename: 'f.zip',
169+
});
170+
const req = makeFakeRequest();
171+
const res = makeFakeResponse();
172+
173+
await controller.exportTaskEvidenceZip(
174+
'org_1',
175+
'tsk_1',
176+
'false',
177+
req as unknown as import('express').Request,
178+
res as unknown as import('express').Response,
179+
);
180+
181+
expect(archive.abort).not.toHaveBeenCalled();
182+
183+
// Simulate client closing the connection before the stream finished.
184+
req.emit('close');
185+
186+
expect(archive.abort).toHaveBeenCalledTimes(1);
187+
});
156188
});
157189

158190
describe('AuditorEvidenceExportController', () => {
@@ -185,11 +217,13 @@ describe('AuditorEvidenceExportController', () => {
185217
archive: archive as unknown as import('archiver').Archiver,
186218
filename: 'acme_all-evidence_2026-04-22.zip',
187219
});
220+
const req = makeFakeRequest();
188221
const res = makeFakeResponse();
189222

190223
await controller.exportAllEvidence(
191224
'org_1',
192225
'true',
226+
req as unknown as import('express').Request,
193227
res as unknown as import('express').Response,
194228
);
195229

apps/api/src/tasks/evidence-export/evidence-export.controller.ts

Lines changed: 56 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {
33
Get,
44
Param,
55
Query,
6+
Req,
67
Res,
78
UseGuards,
89
Logger,
@@ -15,7 +16,8 @@ import {
1516
ApiSecurity,
1617
ApiTags,
1718
} from '@nestjs/swagger';
18-
import type { Response } from 'express';
19+
import type { Request, Response } from 'express';
20+
import type { Archiver } from 'archiver';
1921
import { AuditRead } from '../../audit/skip-audit-log.decorator';
2022
import { OrganizationId } from '../../auth/auth-context.decorator';
2123
import { HybridAuthGuard } from '../../auth/hybrid-auth.guard';
@@ -164,6 +166,7 @@ export class EvidenceExportController {
164166
@OrganizationId() organizationId: string,
165167
@Param('taskId') taskId: string,
166168
@Query('includeJson') includeJson: string,
169+
@Req() req: Request,
167170
@Res() res: Response,
168171
) {
169172
this.logger.log('Export task evidence ZIP', {
@@ -186,16 +189,13 @@ export class EvidenceExportController {
186189
`attachment; filename="${filename}"`,
187190
);
188191

189-
archive.on('error', (err) => {
190-
this.logger.error(`Archive stream error for task ${taskId}: ${err.message}`);
191-
if (!res.headersSent) {
192-
res.status(500).end();
193-
} else {
194-
res.end();
195-
}
192+
pipeArchiveToResponse({
193+
archive,
194+
req,
195+
res,
196+
logger: this.logger,
197+
tag: `task ${taskId}`,
196198
});
197-
198-
archive.pipe(res);
199199
}
200200
}
201201

@@ -242,6 +242,7 @@ export class AuditorEvidenceExportController {
242242
async exportAllEvidence(
243243
@OrganizationId() organizationId: string,
244244
@Query('includeJson') includeJson: string,
245+
@Req() req: Request,
245246
@Res() res: Response,
246247
) {
247248
this.logger.log('Auditor exporting all evidence', {
@@ -261,17 +262,51 @@ export class AuditorEvidenceExportController {
261262
`attachment; filename="${filename}"`,
262263
);
263264

264-
archive.on('error', (err) => {
265-
this.logger.error(
266-
`Archive stream error for org ${organizationId}: ${err.message}`,
267-
);
268-
if (!res.headersSent) {
269-
res.status(500).end();
270-
} else {
271-
res.end();
272-
}
265+
pipeArchiveToResponse({
266+
archive,
267+
req,
268+
res,
269+
logger: this.logger,
270+
tag: `org ${organizationId}`,
273271
});
274-
275-
archive.pipe(res);
276272
}
277273
}
274+
275+
/**
276+
* Wire an archive to the HTTP response with two concerns:
277+
* 1. Archive errors → log and end the response (500 if headers not yet sent).
278+
* 2. Client disconnect → abort the archive so S3 fetches stop and the
279+
* background populate task doesn't keep running for a closed socket.
280+
*/
281+
function pipeArchiveToResponse(params: {
282+
archive: Archiver;
283+
req: Request;
284+
res: Response;
285+
logger: Logger;
286+
tag: string;
287+
}): void {
288+
const { archive, req, res, logger, tag } = params;
289+
let aborted = false;
290+
291+
const abortIfIncomplete = () => {
292+
if (aborted) return;
293+
if (res.writableEnded) return;
294+
aborted = true;
295+
logger.warn(`Client disconnected during export (${tag}); aborting archive`);
296+
archive.abort();
297+
};
298+
299+
req.once('close', abortIfIncomplete);
300+
res.once('close', abortIfIncomplete);
301+
302+
archive.on('error', (err) => {
303+
logger.error(`Archive stream error (${tag}): ${err.message}`);
304+
if (!res.headersSent) {
305+
res.status(500).end();
306+
} else if (!res.writableEnded) {
307+
res.end();
308+
}
309+
});
310+
311+
archive.pipe(res);
312+
}

apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ describe('EvidenceExportService — streaming ZIPs', () => {
199199
);
200200
});
201201

202-
it('writes a placeholder when S3 object is missing', async () => {
202+
it('writes a placeholder when S3 object is truly missing (NoSuchKey)', async () => {
203203
const attachments = [
204204
{
205205
id: 'att_missing',
@@ -210,9 +210,11 @@ describe('EvidenceExportService — streaming ZIPs', () => {
210210
},
211211
];
212212

213-
(s3Client!.send as jest.Mock).mockRejectedValue(
214-
new Error('NoSuchKey: The specified key does not exist.'),
215-
);
213+
const noSuchKeyError = Object.assign(new Error('NoSuchKey'), {
214+
name: 'NoSuchKey',
215+
$metadata: { httpStatusCode: 404 },
216+
});
217+
(s3Client!.send as jest.Mock).mockRejectedValue(noSuchKeyError);
216218
primeTaskQueries({ attachments });
217219

218220
const { archive } = await service.streamTaskEvidenceZip(
@@ -230,6 +232,41 @@ describe('EvidenceExportService — streaming ZIPs', () => {
230232
expect(String(placeholder!.source)).toContain('att_missing');
231233
});
232234

235+
it('aborts the archive on transient S3 failures (not a placeholder)', async () => {
236+
const attachments = [
237+
{
238+
id: 'att_err',
239+
name: 'file.pdf',
240+
url: 'org_1/attachments/task/tsk_123/file.pdf',
241+
type: 'document',
242+
createdAt: new Date(),
243+
},
244+
];
245+
246+
// AccessDenied — NOT a missing-object error; must surface as a failure.
247+
const accessDeniedError = Object.assign(new Error('Access Denied'), {
248+
name: 'AccessDenied',
249+
$metadata: { httpStatusCode: 403 },
250+
});
251+
(s3Client!.send as jest.Mock).mockRejectedValue(accessDeniedError);
252+
primeTaskQueries({ attachments });
253+
254+
const { archive } = await service.streamTaskEvidenceZip(
255+
'org_1',
256+
'tsk_123',
257+
);
258+
const mock = archive as unknown as MockArchive;
259+
260+
await expect(mock.finalized).rejects.toThrow('aborted');
261+
expect(mock.abort).toHaveBeenCalled();
262+
263+
// No placeholder text file written for a non-missing error
264+
const placeholder = mock.appendCalls.find((c) =>
265+
c.options.name.includes('_MISSING_'),
266+
);
267+
expect(placeholder).toBeUndefined();
268+
});
269+
233270
it('disambiguates duplicate filenames within attachments folder', async () => {
234271
const attachments = [
235272
{
@@ -296,18 +333,20 @@ describe('EvidenceExportService — streaming ZIPs', () => {
296333
).rejects.toBeInstanceOf(NotFoundException);
297334
});
298335

299-
it('aborts archive with NotFoundException when no tasks have content', async () => {
336+
it('throws NotFoundException synchronously (pre-flight) when no tasks have content', async () => {
337+
// Org exists but no tasks with automations and no attachments.
300338
mockDb.organization.findUnique.mockResolvedValue({ name: 'Acme' });
301339
mockDb.task.findMany.mockResolvedValue([]);
302340
mockDb.attachment.findMany.mockResolvedValue([]);
303341

304-
const { archive } = await service.streamOrganizationEvidenceZip(
305-
'org_1',
306-
);
307-
const mock = archive as unknown as MockArchive;
342+
// Must reject synchronously — before an archive is returned — so the
343+
// controller can produce a real HTTP 404 instead of a broken streamed ZIP.
344+
await expect(
345+
service.streamOrganizationEvidenceZip('org_1'),
346+
).rejects.toBeInstanceOf(NotFoundException);
308347

309-
await expect(mock.finalized).rejects.toThrow('aborted');
310-
expect(mock.abort).toHaveBeenCalled();
348+
// No archive should have been created at all.
349+
expect(archiveInstances).toHaveLength(0);
311350
});
312351

313352
it('includes a task that has attachments but no automations', async () => {

0 commit comments

Comments
 (0)