Skip to content

Commit fc47952

Browse files
authored
Merge pull request #2640 from trycompai/main
[comp] Production Deploy
2 parents 2fe1402 + 0ac9c80 commit fc47952

7 files changed

Lines changed: 1628 additions & 214 deletions
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
/**
2+
* Evidence Attachment Streamer
3+
* Fetches task attachments and streams them from S3 directly into a ZIP archive.
4+
*/
5+
6+
import { GetObjectCommand } from '@aws-sdk/client-s3';
7+
import { Logger } from '@nestjs/common';
8+
import type { Archiver } from 'archiver';
9+
import { Readable } from 'node:stream';
10+
import { db } from '@db';
11+
import { AttachmentEntityType, type Attachment } from '@db';
12+
import { BUCKET_NAME, s3Client } from '../../app/s3';
13+
14+
const logger = new Logger('EvidenceAttachmentStreamer');
15+
16+
export type TaskAttachment = Pick<
17+
Attachment,
18+
'id' | 'name' | 'url' | 'type' | 'createdAt'
19+
>;
20+
21+
/**
22+
* Fetch attachments uploaded directly to a task.
23+
* Task-items hold vendor/risk attachments (per `TaskItemEntityType`), so they're
24+
* intentionally excluded here — this is the task-evidence scope only.
25+
*/
26+
export async function getTaskAttachments(
27+
organizationId: string,
28+
taskId: string,
29+
): Promise<TaskAttachment[]> {
30+
return db.attachment.findMany({
31+
where: {
32+
organizationId,
33+
entityType: AttachmentEntityType.task,
34+
entityId: taskId,
35+
},
36+
select: {
37+
id: true,
38+
name: true,
39+
url: true,
40+
type: true,
41+
createdAt: true,
42+
},
43+
orderBy: { createdAt: 'asc' },
44+
});
45+
}
46+
47+
/**
48+
* Create a case-insensitive filename tracker that disambiguates collisions by
49+
* inserting a numeric suffix before the extension. Scoped per directory.
50+
*/
51+
export function createFilenameTracker(): (rawName: string) => string {
52+
const used = new Set<string>();
53+
return (rawName: string) => {
54+
const sanitized = (rawName || 'file')
55+
.replace(/[\\/]/g, '_')
56+
.replace(/\s+/g, ' ')
57+
.trim();
58+
const dot = sanitized.lastIndexOf('.');
59+
const base = dot > 0 ? sanitized.slice(0, dot) : sanitized;
60+
const ext = dot > 0 ? sanitized.slice(dot) : '';
61+
let candidate = `${base}${ext}`;
62+
let i = 1;
63+
while (used.has(candidate.toLowerCase())) {
64+
candidate = `${base} (${i})${ext}`;
65+
i += 1;
66+
}
67+
used.add(candidate.toLowerCase());
68+
return candidate;
69+
};
70+
}
71+
72+
/**
73+
* Append a single attachment to the archive by streaming its S3 body.
74+
*
75+
* - Genuine missing-object errors (`NoSuchKey` / HTTP 404) → write a
76+
* `_MISSING_<name>.txt` placeholder so the bundle stays auditable.
77+
* - All other failures (network, permissions, throttling, empty body) → rethrow
78+
* so the archive aborts and the user sees a real failure instead of silently
79+
* receiving an incomplete export.
80+
*
81+
* Filename collisions are resolved on the *final* ZIP entry name (including
82+
* any `_MISSING_…txt` wrapping), not the raw attachment name — otherwise a
83+
* success-path file named `_MISSING_foo.txt` could collide with a failure-path
84+
* placeholder for a file named `foo` once the wrapping is applied.
85+
*/
86+
export async function appendAttachmentToArchive(params: {
87+
archive: Archiver;
88+
attachment: TaskAttachment;
89+
folderPath: string;
90+
uniqueName: (rawName: string) => string;
91+
}): Promise<void> {
92+
const { archive, attachment, folderPath, uniqueName } = params;
93+
94+
if (!s3Client || !BUCKET_NAME) {
95+
// Misconfiguration at process level — fail the whole export, don't silently
96+
// produce placeholders for every attachment.
97+
throw new Error(
98+
'S3 client or bucket not configured; cannot stream attachments',
99+
);
100+
}
101+
102+
try {
103+
const response = await s3Client.send(
104+
new GetObjectCommand({
105+
Bucket: BUCKET_NAME,
106+
Key: attachment.url,
107+
}),
108+
);
109+
110+
if (!response.Body) {
111+
throw new Error('S3 returned no body');
112+
}
113+
114+
const bodyStream =
115+
response.Body instanceof Readable
116+
? response.Body
117+
: Readable.from(response.Body as AsyncIterable<Uint8Array>);
118+
119+
archive.append(bodyStream, {
120+
name: `${folderPath}/${uniqueName(attachment.name)}`,
121+
});
122+
} catch (error) {
123+
if (!isS3MissingObjectError(error)) {
124+
logger.error(
125+
`Failed to fetch attachment ${attachment.id} (key=${attachment.url}): ${
126+
error instanceof Error ? error.message : String(error)
127+
}`,
128+
);
129+
throw error;
130+
}
131+
132+
const message = error instanceof Error ? error.message : String(error);
133+
logger.warn(
134+
`Missing S3 object for attachment ${attachment.id} (key=${attachment.url}): ${message}`,
135+
);
136+
// Feed the FULL final name (including `_MISSING_` prefix and `.txt` suffix)
137+
// into the same collision tracker that success paths use, so a legitimate
138+
// file uploaded as `_MISSING_foo.txt` can't silently collide with a
139+
// placeholder for a different missing attachment named `foo`.
140+
const placeholderName = uniqueName(`_MISSING_${attachment.name}.txt`);
141+
archive.append(buildMissingPlaceholder(attachment, message), {
142+
name: `${folderPath}/${placeholderName}`,
143+
});
144+
}
145+
}
146+
147+
/**
148+
* True only for "the object does not exist" — specifically `NoSuchKey` (or
149+
* `NotFound` for HeadObject semantics). Anything else — including the other
150+
* 404s like `NoSuchBucket`, or 403s like `AccessDenied` — is a real failure
151+
* that must surface, not a silent per-attachment skip. A misconfigured bucket
152+
* returning NoSuchBucket would otherwise produce an export full of placeholders
153+
* that looks "successful" but contains none of the customer's evidence.
154+
*/
155+
function isS3MissingObjectError(error: unknown): boolean {
156+
if (!error || typeof error !== 'object') return false;
157+
const err = error as { name?: string; Code?: string };
158+
const code = err.name ?? err.Code;
159+
return code === 'NoSuchKey' || code === 'NotFound';
160+
}
161+
162+
function buildMissingPlaceholder(
163+
attachment: TaskAttachment,
164+
reason: string,
165+
): string {
166+
return [
167+
`Attachment missing from storage.`,
168+
`attachmentId: ${attachment.id}`,
169+
`originalName: ${attachment.name}`,
170+
`s3Key: ${attachment.url}`,
171+
`reason: ${reason}`,
172+
].join('\n');
173+
}

0 commit comments

Comments
 (0)