Skip to content

Commit a06516a

Browse files
ascorbicclaude
andauthored
fix(pds): detect content-type from magic bytes for blobs with invalid type (#116)
* fix(pds): detect content-type from magic bytes for blobs with invalid type When blobs are uploaded with Content-Type: */* (e.g., from video.bsky.app), the getBlob endpoint now detects the actual content type from magic bytes. Supports: MP4, MOV, M4V, JPEG, PNG, GIF, WebP, WebM This fixes video playback issues where videos would stop working after CDN cache expires because video.bsky.app rejects */* content type. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Fingerprint on the way in and out * changeset --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 2f224a9 commit a06516a

5 files changed

Lines changed: 275 additions & 9 deletions

File tree

.changeset/long-hairs-walk.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@getcirrus/pds": patch
3+
---
4+
5+
Detect content type of blobs

packages/pds/src/format.ts

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/**
2+
* Detect content type from file magic bytes.
3+
* Returns the detected MIME type or null if unknown.
4+
*/
5+
export function detectContentType(bytes: Uint8Array): string | null {
6+
// MP4/M4V/MOV - check for ftyp box
7+
if (bytes.length >= 12) {
8+
const ftyp = String.fromCharCode(
9+
bytes[4]!,
10+
bytes[5]!,
11+
bytes[6]!,
12+
bytes[7]!,
13+
);
14+
if (ftyp === "ftyp") {
15+
// Check brand for more specific type
16+
const brand = String.fromCharCode(
17+
bytes[8]!,
18+
bytes[9]!,
19+
bytes[10]!,
20+
bytes[11]!,
21+
);
22+
if (
23+
brand === "isom" ||
24+
brand === "iso2" ||
25+
brand === "mp41" ||
26+
brand === "mp42" ||
27+
brand === "avc1"
28+
) {
29+
return "video/mp4";
30+
}
31+
if (brand === "M4V " || brand === "M4VH" || brand === "M4VP") {
32+
return "video/x-m4v";
33+
}
34+
if (brand === "qt ") {
35+
return "video/quicktime";
36+
}
37+
// Default to mp4 for any ftyp
38+
return "video/mp4";
39+
}
40+
}
41+
42+
// JPEG
43+
if (bytes[0] === 0xff && bytes[1] === 0xd8 && bytes[2] === 0xff) {
44+
return "image/jpeg";
45+
}
46+
47+
// PNG
48+
if (
49+
bytes[0] === 0x89 &&
50+
bytes[1] === 0x50 &&
51+
bytes[2] === 0x4e &&
52+
bytes[3] === 0x47
53+
) {
54+
return "image/png";
55+
}
56+
57+
// GIF
58+
if (bytes[0] === 0x47 && bytes[1] === 0x49 && bytes[2] === 0x46) {
59+
return "image/gif";
60+
}
61+
62+
// WebP
63+
if (
64+
bytes[0] === 0x52 &&
65+
bytes[1] === 0x49 &&
66+
bytes[2] === 0x46 &&
67+
bytes[3] === 0x46 &&
68+
bytes[8] === 0x57 &&
69+
bytes[9] === 0x45 &&
70+
bytes[10] === 0x42 &&
71+
bytes[11] === 0x50
72+
) {
73+
return "image/webp";
74+
}
75+
76+
// WebM
77+
if (
78+
bytes[0] === 0x1a &&
79+
bytes[1] === 0x45 &&
80+
bytes[2] === 0xdf &&
81+
bytes[3] === 0xa3
82+
) {
83+
return "video/webm";
84+
}
85+
86+
return null;
87+
}

packages/pds/src/xrpc/repo.ts

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import type { Context } from "hono";
22
import { isDid } from "@atcute/lexicons/syntax";
3-
import { AccountDurableObject } from "../account-do";
4-
import type { AppEnv, AuthedAppEnv } from "../types";
5-
import { validator } from "../validation";
3+
import { AccountDurableObject } from "../account-do.js";
4+
import type { AppEnv, AuthedAppEnv } from "../types.js";
5+
import { validator } from "../validation.js";
6+
import { detectContentType } from "../format.js";
67

78
function invalidRecordError(
89
c: Context<AuthedAppEnv>,
@@ -423,9 +424,12 @@ export async function uploadBlob(
423424
c: Context<AuthedAppEnv>,
424425
accountDO: DurableObjectStub<AccountDurableObject>,
425426
): Promise<Response> {
426-
const contentType =
427-
c.req.header("Content-Type") || "application/octet-stream";
427+
let contentType = c.req.header("Content-Type");
428+
428429
const bytes = new Uint8Array(await c.req.arrayBuffer());
430+
if (!contentType || contentType === "*/*") {
431+
contentType = detectContentType(bytes) || "application/octet-stream";
432+
}
429433

430434
// Size limit check (60MB)
431435
const MAX_BLOB_SIZE = 60 * 1024 * 1024;

packages/pds/src/xrpc/sync.ts

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import type { Context } from "hono";
22
import { isDid, isNsid, isRecordKey } from "@atcute/lexicons/syntax";
33
import type { AccountDurableObject } from "../account-do.js";
4-
import type { AppEnv } from "../types";
4+
import type { AppEnv } from "../types.js";
5+
import { detectContentType } from "../format.js";
56

67
export async function getRepo(
78
c: Context<AppEnv>,
@@ -289,11 +290,37 @@ export async function getBlob(
289290
);
290291
}
291292

293+
// Determine content type, with fallback for missing or invalid values
294+
let contentType = blob.httpMetadata?.contentType;
295+
296+
// If no content type or invalid wildcard, try to detect from content
297+
if (!contentType || contentType === "*/*") {
298+
// Read first few bytes to detect content type
299+
const [headerStream, bodyStream] = blob.body.tee();
300+
const reader = headerStream.getReader();
301+
const { value: headerBytes } = await reader.read();
302+
reader.releaseLock();
303+
304+
if (headerBytes && headerBytes.length >= 12) {
305+
contentType =
306+
detectContentType(headerBytes) || "application/octet-stream";
307+
} else {
308+
contentType = "application/octet-stream";
309+
}
310+
311+
return new Response(bodyStream, {
312+
status: 200,
313+
headers: {
314+
"Content-Type": contentType,
315+
"Content-Length": blob.size.toString(),
316+
},
317+
});
318+
}
319+
292320
return new Response(blob.body, {
293321
status: 200,
294322
headers: {
295-
"Content-Type":
296-
blob.httpMetadata?.contentType || "application/octet-stream",
323+
"Content-Type": contentType,
297324
"Content-Length": blob.size.toString(),
298325
},
299326
});
@@ -348,7 +375,10 @@ export async function getRecord(
348375
// Validate collection is an NSID
349376
if (!isNsid(collection)) {
350377
return c.json(
351-
{ error: "InvalidRequest", message: "Invalid collection format (must be NSID)" },
378+
{
379+
error: "InvalidRequest",
380+
message: "Invalid collection format (must be NSID)",
381+
},
352382
400,
353383
);
354384
}

packages/pds/test/blobs.test.ts

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,146 @@ describe("Blob Storage", () => {
225225
});
226226
});
227227

228+
describe("Content-Type Detection", () => {
229+
it("should detect video/mp4 from magic bytes when stored with */*", async () => {
230+
// Create a valid MP4 header (ftyp box with isom brand)
231+
const mp4Header = new Uint8Array([
232+
0x00, 0x00, 0x00, 0x14, // box size (20 bytes)
233+
0x66, 0x74, 0x79, 0x70, // "ftyp"
234+
0x69, 0x73, 0x6f, 0x6d, // "isom" brand
235+
0x00, 0x00, 0x00, 0x01, // minor version
236+
0x69, 0x73, 0x6f, 0x6d, // compatible brand
237+
]);
238+
239+
// Upload with wildcard content type (simulating the bug)
240+
const uploadResponse = await worker.fetch(
241+
new Request("http://pds.test/xrpc/com.atproto.repo.uploadBlob", {
242+
method: "POST",
243+
headers: {
244+
"Content-Type": "*/*",
245+
Authorization: `Bearer ${env.AUTH_TOKEN}`,
246+
},
247+
body: mp4Header,
248+
}),
249+
env,
250+
);
251+
252+
expect(uploadResponse.status).toBe(200);
253+
254+
const uploadData = (await uploadResponse.json()) as {
255+
blob: { ref: { $link: string } };
256+
};
257+
const cid = uploadData.blob.ref.$link;
258+
259+
// Retrieve - should detect video/mp4 from magic bytes
260+
const getResponse = await worker.fetch(
261+
new Request(
262+
`http://pds.test/xrpc/com.atproto.sync.getBlob?did=${env.DID}&cid=${cid}`,
263+
),
264+
env,
265+
);
266+
267+
expect(getResponse.status).toBe(200);
268+
expect(getResponse.headers.get("Content-Type")).toBe("video/mp4");
269+
});
270+
271+
it("should detect image/jpeg from magic bytes when stored with */*", async () => {
272+
// JPEG magic bytes
273+
const jpegData = new Uint8Array([0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46, 0x00, 0x01]);
274+
275+
const uploadResponse = await worker.fetch(
276+
new Request("http://pds.test/xrpc/com.atproto.repo.uploadBlob", {
277+
method: "POST",
278+
headers: {
279+
"Content-Type": "*/*",
280+
Authorization: `Bearer ${env.AUTH_TOKEN}`,
281+
},
282+
body: jpegData,
283+
}),
284+
env,
285+
);
286+
287+
const uploadData = (await uploadResponse.json()) as {
288+
blob: { ref: { $link: string } };
289+
};
290+
const cid = uploadData.blob.ref.$link;
291+
292+
const getResponse = await worker.fetch(
293+
new Request(
294+
`http://pds.test/xrpc/com.atproto.sync.getBlob?did=${env.DID}&cid=${cid}`,
295+
),
296+
env,
297+
);
298+
299+
expect(getResponse.status).toBe(200);
300+
expect(getResponse.headers.get("Content-Type")).toBe("image/jpeg");
301+
});
302+
303+
it("should detect image/png from magic bytes", async () => {
304+
// PNG magic bytes
305+
const pngData = new Uint8Array([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D]);
306+
307+
const uploadResponse = await worker.fetch(
308+
new Request("http://pds.test/xrpc/com.atproto.repo.uploadBlob", {
309+
method: "POST",
310+
headers: {
311+
"Content-Type": "*/*",
312+
Authorization: `Bearer ${env.AUTH_TOKEN}`,
313+
},
314+
body: pngData,
315+
}),
316+
env,
317+
);
318+
319+
const uploadData = (await uploadResponse.json()) as {
320+
blob: { ref: { $link: string } };
321+
};
322+
const cid = uploadData.blob.ref.$link;
323+
324+
const getResponse = await worker.fetch(
325+
new Request(
326+
`http://pds.test/xrpc/com.atproto.sync.getBlob?did=${env.DID}&cid=${cid}`,
327+
),
328+
env,
329+
);
330+
331+
expect(getResponse.status).toBe(200);
332+
expect(getResponse.headers.get("Content-Type")).toBe("image/png");
333+
});
334+
335+
it("should fallback to application/octet-stream for unknown content", async () => {
336+
// Random bytes that don't match any known format
337+
const unknownData = new Uint8Array([0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C]);
338+
339+
const uploadResponse = await worker.fetch(
340+
new Request("http://pds.test/xrpc/com.atproto.repo.uploadBlob", {
341+
method: "POST",
342+
headers: {
343+
"Content-Type": "*/*",
344+
Authorization: `Bearer ${env.AUTH_TOKEN}`,
345+
},
346+
body: unknownData,
347+
}),
348+
env,
349+
);
350+
351+
const uploadData = (await uploadResponse.json()) as {
352+
blob: { ref: { $link: string } };
353+
};
354+
const cid = uploadData.blob.ref.$link;
355+
356+
const getResponse = await worker.fetch(
357+
new Request(
358+
`http://pds.test/xrpc/com.atproto.sync.getBlob?did=${env.DID}&cid=${cid}`,
359+
),
360+
env,
361+
);
362+
363+
expect(getResponse.status).toBe(200);
364+
expect(getResponse.headers.get("Content-Type")).toBe("application/octet-stream");
365+
});
366+
});
367+
228368
describe("Integration", () => {
229369
it("should handle upload and retrieval flow", async () => {
230370
// Create test data

0 commit comments

Comments
 (0)