Skip to content

Commit a943f3d

Browse files
authored
Merge pull request #2639 from trycompai/worktree-evidence-export-attachments
feat(evidence-export): include task attachments and stream large ZIPs
2 parents 3cb397a + 317d407 commit a943f3d

7 files changed

Lines changed: 1286 additions & 214 deletions
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
/**
2+
* Evidence Attachment Streamer
3+
* Fetches task attachments and streams them from S3 directly into a ZIP archive.
4+
*/
5+
6+
import { GetObjectCommand } from '@aws-sdk/client-s3';
7+
import { Logger } from '@nestjs/common';
8+
import type { Archiver } from 'archiver';
9+
import { Readable } from 'node:stream';
10+
import { db } from '@db';
11+
import { AttachmentEntityType, type Attachment } from '@db';
12+
import { BUCKET_NAME, s3Client } from '../../app/s3';
13+
14+
const logger = new Logger('EvidenceAttachmentStreamer');
15+
16+
export type TaskAttachment = Pick<
17+
Attachment,
18+
'id' | 'name' | 'url' | 'type' | 'createdAt'
19+
>;
20+
21+
/**
22+
* Fetch attachments uploaded directly to a task.
23+
* Task-items hold vendor/risk attachments (per `TaskItemEntityType`), so they're
24+
* intentionally excluded here — this is the task-evidence scope only.
25+
*/
26+
export async function getTaskAttachments(
27+
organizationId: string,
28+
taskId: string,
29+
): Promise<TaskAttachment[]> {
30+
return db.attachment.findMany({
31+
where: {
32+
organizationId,
33+
entityType: AttachmentEntityType.task,
34+
entityId: taskId,
35+
},
36+
select: {
37+
id: true,
38+
name: true,
39+
url: true,
40+
type: true,
41+
createdAt: true,
42+
},
43+
orderBy: { createdAt: 'asc' },
44+
});
45+
}
46+
47+
/**
48+
* Create a case-insensitive filename tracker that disambiguates collisions by
49+
* inserting a numeric suffix before the extension. Scoped per directory.
50+
*/
51+
export function createFilenameTracker(): (rawName: string) => string {
52+
const used = new Set<string>();
53+
return (rawName: string) => {
54+
const sanitized = (rawName || 'file')
55+
.replace(/[\\/]/g, '_')
56+
.replace(/\s+/g, ' ')
57+
.trim();
58+
const dot = sanitized.lastIndexOf('.');
59+
const base = dot > 0 ? sanitized.slice(0, dot) : sanitized;
60+
const ext = dot > 0 ? sanitized.slice(dot) : '';
61+
let candidate = `${base}${ext}`;
62+
let i = 1;
63+
while (used.has(candidate.toLowerCase())) {
64+
candidate = `${base} (${i})${ext}`;
65+
i += 1;
66+
}
67+
used.add(candidate.toLowerCase());
68+
return candidate;
69+
};
70+
}
71+
72+
/**
73+
* Append a single attachment to the archive by streaming its S3 body.
74+
*
75+
* - Genuine missing-object errors (`NoSuchKey` / HTTP 404) → write a
76+
* `_MISSING_<name>.txt` placeholder so the bundle stays auditable.
77+
* - All other failures (network, permissions, throttling, empty body) → rethrow
78+
* so the archive aborts and the user sees a real failure instead of silently
79+
* receiving an incomplete export.
80+
*/
81+
export async function appendAttachmentToArchive(params: {
82+
archive: Archiver;
83+
attachment: TaskAttachment;
84+
folderPath: string;
85+
uniqueName: (rawName: string) => string;
86+
}): Promise<void> {
87+
const { archive, attachment, folderPath, uniqueName } = params;
88+
89+
if (!s3Client || !BUCKET_NAME) {
90+
// Misconfiguration at process level — fail the whole export, don't silently
91+
// produce placeholders for every attachment.
92+
throw new Error(
93+
'S3 client or bucket not configured; cannot stream attachments',
94+
);
95+
}
96+
97+
try {
98+
const response = await s3Client.send(
99+
new GetObjectCommand({
100+
Bucket: BUCKET_NAME,
101+
Key: attachment.url,
102+
}),
103+
);
104+
105+
if (!response.Body) {
106+
throw new Error('S3 returned no body');
107+
}
108+
109+
const bodyStream =
110+
response.Body instanceof Readable
111+
? response.Body
112+
: Readable.from(response.Body as AsyncIterable<Uint8Array>);
113+
114+
archive.append(bodyStream, {
115+
name: `${folderPath}/${uniqueName(attachment.name)}`,
116+
});
117+
} catch (error) {
118+
if (!isS3MissingObjectError(error)) {
119+
logger.error(
120+
`Failed to fetch attachment ${attachment.id} (key=${attachment.url}): ${
121+
error instanceof Error ? error.message : String(error)
122+
}`,
123+
);
124+
throw error;
125+
}
126+
127+
const message = error instanceof Error ? error.message : String(error);
128+
logger.warn(
129+
`Missing S3 object for attachment ${attachment.id} (key=${attachment.url}): ${message}`,
130+
);
131+
archive.append(buildMissingPlaceholder(attachment, message), {
132+
name: `${folderPath}/_MISSING_${uniqueName(attachment.name)}.txt`,
133+
});
134+
}
135+
}
136+
137+
/**
138+
* True only for "the object does not exist" — specifically `NoSuchKey` (or
139+
* `NotFound` for HeadObject semantics). Anything else — including the other
140+
* 404s like `NoSuchBucket`, or 403s like `AccessDenied` — is a real failure
141+
* that must surface, not a silent per-attachment skip. A misconfigured bucket
142+
* returning NoSuchBucket would otherwise produce an export full of placeholders
143+
* that looks "successful" but contains none of the customer's evidence.
144+
*/
145+
function isS3MissingObjectError(error: unknown): boolean {
146+
if (!error || typeof error !== 'object') return false;
147+
const err = error as { name?: string; Code?: string };
148+
const code = err.name ?? err.Code;
149+
return code === 'NoSuchKey' || code === 'NotFound';
150+
}
151+
152+
function buildMissingPlaceholder(
153+
attachment: TaskAttachment,
154+
reason: string,
155+
): string {
156+
return [
157+
`Attachment missing from storage.`,
158+
`attachmentId: ${attachment.id}`,
159+
`originalName: ${attachment.name}`,
160+
`s3Key: ${attachment.url}`,
161+
`reason: ${reason}`,
162+
].join('\n');
163+
}

0 commit comments

Comments
 (0)