Skip to content

Commit 34ddea3

Browse files
openrijalclaude
andcommitted
feat: opt-in ETag caching, maxBytes guardrail, richer storage errors
Builds on the Blobs API fallback to address three operational gaps that surfaced while diagnosing a production 500: 1. **Opt-in ETag read cache (disabled by default).** An in-process LRU (cap 64) keyed by `owner/repo@ref:path` stores parsed JSON alongside the response ETag. When enabled, subsequent reads send `If-None-Match`; GitHub returns 304 without spending rate-limit budget. Successful and conflicted writes always invalidate the cached entry, so the cache code is safe to leave in even when `cacheReads` is `false` (the gate ensures it's never populated). Enable globally via `runtimeConfig.autoadmin.github.cacheReads = true` or per-resource via `storage.cacheReads = true`. Defaults to `false` at every layer because module-scoped state is undesirable in multi-tenant isolates and a stale read could hide a manual repo edit. 2. **`maxBytes` / `warnAtBytes` size guardrail.** New per-resource `storage.maxBytes` throws a 413 with the actual byte count when a read or write exceeds it; `warnAtBytes` logs `console.warn` once per path. Enforced against the Contents API's `body.size` on reads and `Buffer.byteLength(payload.content, 'base64')` on writes. 3. **Locator-prefixed error messages.** Every `createError` now embeds `owner/repo:path[@ref]` and, where relevant, the file size or blob sha. This matters in serverless logs where the original request context is otherwise lost. Also replaces the post-fallback `base64Content!` non-null assertion with an explicit narrowing check, and surfaces empty-file decode as a clear 422 instead of the previous misleading "not valid JSON". Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent ca97d60 commit 34ddea3

3 files changed

Lines changed: 212 additions & 13 deletions

File tree

server/utils/githubContents.ts

Lines changed: 177 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,64 @@ export interface GetGithubFileResult<T = unknown> {
1111
encoding: string
1212
}
1313

14+
export interface GithubReadOptions {
15+
/** Throw 413 when the file exceeds this size in bytes. */
16+
maxBytes?: number
17+
/** Emit a console.warn (once per path) when the file exceeds this size. */
18+
warnAtBytes?: number
19+
/**
20+
* Opt-in: cache responses by ETag in-process and send `If-None-Match` on
21+
* subsequent reads. GitHub returns 304 without spending rate-limit budget when
22+
* unchanged. Disabled by default because the cache lives at module scope,
23+
* which is undesirable for some deployment topologies (e.g. multi-tenant
24+
* shared isolates, or anywhere a stale read could hide a manual repo edit).
25+
*/
26+
cacheReads?: boolean
27+
}
28+
29+
interface CacheEntry {
30+
etag: string
31+
sha: string
32+
encoding: string
33+
// Parsed JSON is `unknown` in the cache; callers re-assert their generic.
34+
parsed: unknown
35+
}
36+
37+
// Module-level LRU cache for read responses. ETag-conditional requests against
38+
// GitHub return 304 without counting toward the rate limit, so caching the
39+
// parsed value alongside the etag is a meaningful latency + quota win on
40+
// repeated reads of the same resource (e.g. admin list re-renders).
41+
const READ_CACHE_MAX = 64
42+
const readCache = new Map<string, CacheEntry>()
43+
const sizeWarned = new Set<string>()
44+
45+
function cacheKey(owner: string, repo: string, path: string, ref?: string): string {
46+
return `${owner}/${repo}@${ref ?? '*'}:${path}`
47+
}
48+
49+
function cacheTouch(key: string, entry: CacheEntry): void {
50+
if (readCache.has(key)) {
51+
readCache.delete(key)
52+
}
53+
else if (readCache.size >= READ_CACHE_MAX) {
54+
const oldest = readCache.keys().next().value
55+
if (oldest !== undefined) {
56+
readCache.delete(oldest)
57+
}
58+
}
59+
readCache.set(key, entry)
60+
}
61+
62+
function cacheGet(key: string): CacheEntry | undefined {
63+
const entry = readCache.get(key)
64+
if (entry) {
65+
// Refresh LRU position.
66+
readCache.delete(key)
67+
readCache.set(key, entry)
68+
}
69+
return entry
70+
}
71+
1472
function authHeaders(token: string): HeadersInit {
1573
return {
1674
'Accept': 'application/vnd.github+json',
@@ -20,18 +78,62 @@ function authHeaders(token: string): HeadersInit {
2078
}
2179
}
2280

81+
function locator(owner: string, repo: string, path: string, ref?: string): string {
82+
return `${owner}/${repo}:${path}${ref ? `@${ref}` : ''}`
83+
}
84+
85+
function checkSize(
86+
size: number,
87+
owner: string,
88+
repo: string,
89+
path: string,
90+
ref: string | undefined,
91+
opts: GithubReadOptions | undefined,
92+
): void {
93+
const where = locator(owner, repo, path, ref)
94+
if (opts?.maxBytes !== undefined && size > opts.maxBytes) {
95+
throw createError({
96+
statusCode: 413,
97+
statusMessage: `File ${where} is ${size} bytes, exceeds configured maxBytes=${opts.maxBytes}.`,
98+
})
99+
}
100+
if (opts?.warnAtBytes !== undefined && size > opts.warnAtBytes && !sizeWarned.has(where)) {
101+
sizeWarned.add(where)
102+
console.warn(
103+
`[autoadmin] ${where} is ${size} bytes (warnAtBytes=${opts.warnAtBytes}). `
104+
+ `GitHub Contents API inlines content only under 1 MB; files between 1 MB and 100 MB `
105+
+ `take an extra Blobs API round-trip on every read. See docs/storage-limits.md.`,
106+
)
107+
}
108+
}
109+
23110
export async function getGithubJsonFile<T = unknown>(
24111
token: string,
25112
owner: string,
26113
repo: string,
27114
path: string,
28115
ref?: string,
116+
opts?: GithubReadOptions,
29117
): Promise<GetGithubFileResult<T>> {
30118
const url = new URL(`https://api.github.com/repos/${owner}/${repo}/contents/${path.replace(/^\//, '')}`)
31119
if (ref) {
32120
url.searchParams.set('ref', ref)
33121
}
34-
const res = await fetch(url, { headers: authHeaders(token) })
122+
const where = locator(owner, repo, path, ref)
123+
const cacheEnabled = opts?.cacheReads === true
124+
const key = cacheEnabled ? cacheKey(owner, repo, path, ref) : ''
125+
const cached = cacheEnabled ? cacheGet(key) : undefined
126+
const headers: Record<string, string> = { ...(authHeaders(token) as Record<string, string>) }
127+
if (cached) {
128+
headers['If-None-Match'] = cached.etag
129+
}
130+
const res = await fetch(url, { headers })
131+
132+
// 304 Not Modified: return cached parsed value without spending rate-limit budget.
133+
if (cacheEnabled && res.status === 304 && cached) {
134+
return { parsed: cached.parsed as T, sha: cached.sha, encoding: cached.encoding }
135+
}
136+
35137
const text = await res.text()
36138
let body: any
37139
try {
@@ -43,18 +145,25 @@ export async function getGithubJsonFile<T = unknown>(
43145
if (!res.ok) {
44146
throw createError({
45147
statusCode: res.status === 404 ? 404 : res.status >= 500 ? 502 : 400,
46-
statusMessage: body?.message || `GitHub API error (${res.status})`,
148+
statusMessage: body?.message
149+
? `${body.message} (${where})`
150+
: `GitHub API error (${res.status}) for ${where}`,
47151
})
48152
}
49153
if (body.type !== 'file' || !body.sha) {
50154
throw createError({
51155
statusCode: 500,
52-
statusMessage: 'GitHub response is not a single file with content.',
156+
statusMessage: `GitHub response for ${where} is not a regular file (type=${body?.type ?? 'unknown'}).`,
53157
})
54158
}
55-
// The Contents API omits `content` for files >1MB (returns encoding "none").
56-
// Fall back to the Git Blobs API, which streams base64 content up to 100MB.
57-
let base64Content: string | undefined = body.content
159+
160+
if (typeof body.size === 'number') {
161+
checkSize(body.size, owner, repo, path, ref, opts)
162+
}
163+
164+
// The Contents API omits `content` for files >1 MB (returns encoding "none").
165+
// Fall back to the Git Blobs API, which streams base64 content up to 100 MB.
166+
let base64Content: string | undefined = typeof body.content === 'string' && body.content.length > 0 ? body.content : undefined
58167
if (!base64Content || body.encoding === 'none') {
59168
const blobUrl = `https://api.github.com/repos/${owner}/${repo}/git/blobs/${body.sha}`
60169
const blobRes = await fetch(blobUrl, { headers: authHeaders(token) })
@@ -69,38 +178,77 @@ export async function getGithubJsonFile<T = unknown>(
69178
if (!blobRes.ok) {
70179
throw createError({
71180
statusCode: blobRes.status >= 500 ? 502 : 400,
72-
statusMessage: blobBody?.message || `GitHub Blobs API error (${blobRes.status})`,
181+
statusMessage: blobBody?.message
182+
? `${blobBody.message} (${where}, blob ${body.sha.slice(0, 8)})`
183+
: `GitHub Blobs API error (${blobRes.status}) for ${where}`,
73184
})
74185
}
75186
if (blobBody.encoding !== 'base64' || typeof blobBody.content !== 'string') {
76187
throw createError({
77188
statusCode: 500,
78-
statusMessage: 'GitHub Blobs API response is missing base64 content.',
189+
statusMessage: `GitHub Blobs API response for ${where} is missing base64 content (encoding=${blobBody?.encoding ?? 'unknown'}).`,
79190
})
80191
}
81192
base64Content = blobBody.content
82193
}
83-
const decoded = Buffer.from(base64Content!.replace(/\n/g, ''), 'base64').toString('utf8')
194+
195+
// Explicit narrowing after the if-block: TypeScript can't follow the cross-branch
196+
// dataflow proving `base64Content` is now a string.
197+
if (typeof base64Content !== 'string') {
198+
throw createError({
199+
statusCode: 500,
200+
statusMessage: `Internal error: no base64 content resolved for ${where}.`,
201+
})
202+
}
203+
const decoded = Buffer.from(base64Content.replace(/\n/g, ''), 'base64').toString('utf8')
204+
if (!decoded) {
205+
throw createError({
206+
statusCode: 422,
207+
statusMessage: `File ${where} is empty.`,
208+
})
209+
}
84210
let parsed: T
85211
try {
86212
parsed = JSON.parse(decoded) as T
87213
}
88-
catch {
214+
catch (e: any) {
89215
throw createError({
90216
statusCode: 422,
91-
statusMessage: 'File is not valid JSON.',
217+
statusMessage: `File ${where} is not valid JSON: ${e?.message ?? 'parse error'}.`,
92218
})
93219
}
220+
221+
if (cacheEnabled) {
222+
const etag = res.headers.get('etag')
223+
if (etag) {
224+
cacheTouch(key, { etag, sha: body.sha, encoding: body.encoding, parsed })
225+
}
226+
}
227+
94228
return { parsed, sha: body.sha, encoding: body.encoding }
95229
}
96230

231+
export interface GithubWriteOptions {
232+
/** Throw 413 when the new file content exceeds this size in bytes. */
233+
maxBytes?: number
234+
/** Emit a console.warn (once per path) when the new content exceeds this size. */
235+
warnAtBytes?: number
236+
}
237+
97238
export async function putGithubJsonFile(
98239
token: string,
99240
owner: string,
100241
repo: string,
101242
path: string,
102243
payload: GithubFilePayload,
244+
opts?: GithubWriteOptions,
103245
): Promise<{ commitSha?: string }> {
246+
const where = locator(owner, repo, path, payload.branch)
247+
if (opts?.maxBytes !== undefined || opts?.warnAtBytes !== undefined) {
248+
// payload.content is base64; check the raw byte size that will land in the repo.
249+
const rawSize = Buffer.byteLength(payload.content, 'base64')
250+
checkSize(rawSize, owner, repo, path, payload.branch, opts)
251+
}
104252
const url = `https://api.github.com/repos/${owner}/${repo}/contents/${path.replace(/^\//, '')}`
105253
const res = await fetch(url, {
106254
method: 'PUT',
@@ -118,17 +266,33 @@ export async function putGithubJsonFile(
118266
catch {
119267
body = { message: text }
120268
}
269+
// Defensive: drop any cached read for this path on every PUT (success or
270+
// conflict). Cache invalidation is cheap; stale caches are not. This is a
271+
// no-op when caching is disabled (the cache is then always empty).
272+
const cKey = cacheKey(owner, repo, path, payload.branch)
121273
if (res.status === 409) {
274+
readCache.delete(cKey)
122275
throw createError({
123276
statusCode: 409,
124-
statusMessage: body?.message || 'GitHub file changed on the server (sha conflict). Refresh and try again.',
277+
statusMessage: body?.message
278+
? `${body.message} (${where})`
279+
: `GitHub file ${where} changed on the server (sha conflict). Refresh and try again.`,
125280
})
126281
}
127282
if (!res.ok) {
128283
throw createError({
129284
statusCode: res.status >= 500 ? 502 : 400,
130-
statusMessage: body?.message || `GitHub API error (${res.status})`,
285+
statusMessage: body?.message
286+
? `${body.message} (${where})`
287+
: `GitHub API error (${res.status}) for ${where}`,
131288
})
132289
}
290+
readCache.delete(cKey)
133291
return { commitSha: body?.commit?.sha }
134292
}
293+
294+
/** Test/diagnostic helper: clear the in-memory ETag cache. */
295+
export function clearGithubReadCache(): void {
296+
readCache.clear()
297+
sizeWarned.clear()
298+
}

server/utils/jsonStorage/factory.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,25 @@ export type JsonStorageConfig
1919
* never commit tokens or pass them from untrusted clients.
2020
*/
2121
token?: string
22+
/**
23+
* Throw 413 when the file size exceeds this many bytes (read or write).
24+
* Use as a hard ceiling to prevent runaway growth. See docs/storage-limits.md.
25+
*/
26+
maxBytes?: number
27+
/**
28+
* Emit a console.warn once per path when content crosses this many bytes.
29+
* Useful as an early-warning threshold (e.g. 1 MB to flag the Blobs-API fallback boundary).
30+
*/
31+
warnAtBytes?: number
32+
/**
33+
* Opt-in: cache reads in-process by ETag and send `If-None-Match` on subsequent
34+
* requests. **Disabled by default.** When `undefined`, falls back to the global
35+
* `runtimeConfig.autoadmin.github.cacheReads` flag, which also defaults to
36+
* `false`. Enable only when you control writes (so the cache is reliably
37+
* invalidated) and your runtime keeps the module in memory long enough for
38+
* the cache to pay for itself.
39+
*/
40+
cacheReads?: boolean
2241
}
2342
| {
2443
kind: 'local'
@@ -45,6 +64,7 @@ export function getAutoadminGithubRuntime() {
4564
owner: trimString(g.owner),
4665
repo: trimString(g.repo),
4766
ref: trimString(g.ref),
67+
cacheReads: g.cacheReads === true,
4868
}
4969
}
5070

@@ -98,6 +118,10 @@ export function createJsonStorageRepository(
98118
path: storage.path,
99119
ref: storage.ref,
100120
defaultIfMissing: defaultParsedForKind(resourceKind),
121+
maxBytes: storage.maxBytes,
122+
warnAtBytes: storage.warnAtBytes,
123+
// Per-resource override beats the global runtimeConfig default.
124+
cacheReads: storage.cacheReads ?? getAutoadminGithubRuntime().cacheReads,
101125
})
102126
}
103127

server/utils/jsonStorage/githubJsonRepository.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,15 @@ export interface GithubJsonRepositoryOptions {
1111
ref?: string
1212
/** When the path has no file yet (404), `read` returns this as `parsed` and revision `'0'` (same as local). */
1313
defaultIfMissing: unknown
14+
/** Throw 413 when read or written content exceeds this many bytes. */
15+
maxBytes?: number
16+
/** Emit a console.warn once when content exceeds this many bytes (soft limit). */
17+
warnAtBytes?: number
18+
/**
19+
* Opt-in: enable in-process ETag caching for reads. Disabled by default.
20+
* See `GithubReadOptions.cacheReads` in `../githubContents.ts`.
21+
*/
22+
cacheReads?: boolean
1423
}
1524

1625
export class GithubJsonRepository implements JsonStorageRepository {
@@ -26,6 +35,7 @@ export class GithubJsonRepository implements JsonStorageRepository {
2635
this.opts.repo,
2736
this.opts.path,
2837
this.opts.ref,
38+
{ maxBytes: this.opts.maxBytes, warnAtBytes: this.opts.warnAtBytes, cacheReads: this.opts.cacheReads },
2939
)
3040
return { parsed, revision: sha }
3141
}
@@ -58,6 +68,7 @@ export class GithubJsonRepository implements JsonStorageRepository {
5868
this.opts.repo,
5969
this.opts.path,
6070
payload,
71+
{ maxBytes: this.opts.maxBytes, warnAtBytes: this.opts.warnAtBytes },
6172
)
6273
}
6374
}

0 commit comments

Comments
 (0)