1010 * Set CACHE_SPREADSHEET_ID to a valid Google Sheet ID (must be owned by
1111 * the same account). When enabled, public GET requests are stored in the
1212 * sheet and served from there on repeat visits, reducing UrlFetchApp
13- * quota consumption. The cache is Vary-aware (Accept-Encoding and
14- * Accept-Language are hashed into the compound cache key). Leave
15- * CACHE_SPREADSHEET_ID as-is to disable caching entirely — zero overhead.
13+ * quota consumption. Bodies are gzipped before base64 storage so larger
14+ * responses fit under the per-cell character limit, and persistent
15+ * 4xx (404/410/451) get a short negative-cache TTL so buggy clients
16+ * that hammer dead URLs cost zero quota; 5xx is never cached so a
17+ * flapping upstream cannot poison a 24h slot with a transient outage.
18+ * The cache is Vary-aware (Accept-Encoding and Accept-Language are
19+ * hashed into the compound cache key). Leave CACHE_SPREADSHEET_ID as-is
20+ * to disable caching entirely — zero overhead.
1621 *
1722 * DEPLOYMENT:
1823 * 1. Go to https://script.google.com → New project
@@ -54,6 +59,22 @@ const CACHE_MAX_ROWS = 5000; // circular buffer capacity
5459const CACHE_MAX_BODY_BYTES = 35000 ; // skip responses larger than ~35 KB
5560const CACHE_DEFAULT_TTL_SECONDS = 86400 ; // 24-hour fallback when no Cache-Control
5661
62+ // ── Negative Caching ────────────────────────────────────────
63+ // Persistent 4xx errors get a short TTL when the upstream is silent on
64+ // Cache-Control. Buggy clients hammer dead URLs (favicons, telemetry
65+ // pixels, dev-tools probes); a 5-minute floor absorbs the storm at
66+ // zero quota cost while letting transient 404s self-heal quickly.
67+ // 5xx is never cached — see _fetchAndCache.
68+ const NEGATIVE_CACHE_STATUSES = { 404 : 1 , 410 : 1 , 451 : 1 } ;
69+ const NEGATIVE_CACHE_TTL_SECONDS = 300 ;
70+
71+ // ── Body Compression ────────────────────────────────────────
72+ // Bodies are gzipped before base64 storage when worthwhile. Gzip has
73+ // ~20 bytes of header overhead, so very small payloads can bloat;
74+ // skip below this threshold. Already-encoded responses (gzip/br/etc.)
75+ // are stored as-is to avoid double-compression.
76+ const GZIP_MIN_BYTES = 256 ;
77+
5778// ── Vary-Aware Cache Key ────────────────────────────────────
5879// These request headers are hashed into the compound cache key
5980// alongside the URL so that responses with different encodings
@@ -320,9 +341,12 @@ function _initCacheSheet() {
320341 var sheet = ss . getSheetByName ( CACHE_SHEET_NAME ) ;
321342 if ( ! sheet ) {
322343 sheet = ss . insertSheet ( CACHE_SHEET_NAME ) ;
323- // Schema: URL_Hash | URL | Status | Headers | Body | Timestamp | Expires_At
324- sheet . getRange ( 1 , 1 , 1 , 7 ) . setValues ( [ [
325- "URL_Hash" , "URL" , "Status" , "Headers" , "Body" , "Timestamp" , "Expires_At"
344+ // Schema: URL_Hash | URL | Status | Headers | Body | Timestamp | Expires_At | Z
345+ // Z is 1 when Body is base64(gzip(rawBytes)), 0/empty when base64(rawBytes).
346+ // Legacy 7-column rows from older deployments read back as Z=undefined,
347+ // which the cache hit path treats as "not gzipped" — fully compatible.
348+ sheet . getRange ( 1 , 1 , 1 , 8 ) . setValues ( [ [
349+ "URL_Hash" , "URL" , "Status" , "Headers" , "Body" , "Timestamp" , "Expires_At" , "Z"
326350 ] ] ) ;
327351 }
328352 return sheet ;
@@ -539,27 +563,46 @@ function _getFromCache(url, reqHeaders) {
539563 var found = finder . findNext ( ) ;
540564
541565 if ( found ) {
542- var row = sheet . getRange ( found . getRow ( ) , 1 , 1 , 7 ) . getValues ( ) [ 0 ] ;
566+ // 8-column read. Legacy 7-column rows return undefined for the Z slot,
567+ // which is falsy and falls through the not-gzipped branch below — fully
568+ // compatible with caches written before the gzip-storage change.
569+ var row = sheet . getRange ( found . getRow ( ) , 1 , 1 , 8 ) . getValues ( ) [ 0 ] ;
543570
544571 var expiresAt = row [ 6 ] ;
545572 if ( expiresAt && expiresAt instanceof Date && expiresAt < new Date ( ) ) {
546573 return null ;
547574 }
548575
576+ var storedBody = row [ 4 ] ;
577+ var body ;
578+ if ( row [ 7 ] ) {
579+ // Stored as base64(gzip(rawBytes)). The relay protocol's `b` field
580+ // is base64(rawBytes), so decompress and re-encode for the wire.
581+ var gzipped = Utilities . base64Decode ( storedBody ) ;
582+ var raw = Utilities
583+ . ungzip ( Utilities . newBlob ( gzipped , "application/x-gzip" ) )
584+ . getBytes ( ) ;
585+ body = Utilities . base64Encode ( raw ) ;
586+ } else {
587+ body = storedBody ;
588+ }
589+
549590 return {
550591 status : row [ 2 ] ,
551592 headers : _refreshCachedHeaders ( row [ 3 ] , row [ 5 ] ) ,
552- body : row [ 4 ] ,
593+ body : body ,
553594 } ;
554595 }
555596 return null ;
556597}
557598
558599/**
559600 * Fetch a URL and store the response in the spreadsheet cache
560- * using a circular buffer (O(1) writes). Skips storage when the
561- * encoded body exceeds CACHE_MAX_BODY_BYTES or when Cache-Control
562- * forbids caching. Returns the fetch result regardless.
601+ * using a circular buffer (O(1) writes). Skips storage on 5xx
602+ * (transient outages must not poison a 24h slot), when Cache-Control
603+ * forbids caching, or when the post-compression body exceeds
604+ * CACHE_MAX_BODY_BYTES. Always returns the fetch result so the caller
605+ * can serve the live response even when the cache write is skipped.
563606 */
564607function _fetchAndCache ( url , reqHeaders ) {
565608 var sheet = _initCacheSheet ( ) ;
@@ -569,22 +612,52 @@ function _fetchAndCache(url, reqHeaders) {
569612 var response = UrlFetchApp . fetch ( url , { muteHttpExceptions : true } ) ;
570613 var status = response . getResponseCode ( ) ;
571614 var headers = _respHeaders ( response ) ;
572- var body = Utilities . base64Encode ( response . getContent ( ) ) ;
615+ var bodyBytes = response . getContent ( ) ;
616+ var rawB64 = Utilities . base64Encode ( bodyBytes ) ;
617+ var headersJson = JSON . stringify ( headers ) ;
618+ var liveResult = { status : status , headers : headersJson , body : rawB64 } ;
573619
574- // Cell-size safety gate
575- if ( body . length > CACHE_MAX_BODY_BYTES ) {
576- return { status : status , headers : JSON . stringify ( headers ) , body : body } ;
577- }
620+ // 5xx never enters the cache. A flapping upstream returning 503 once
621+ // would otherwise pin that response for 24h and break the URL for
622+ // every subsequent client until expiry.
623+ if ( status >= 500 ) return liveResult ;
578624
579- // TTL extraction
580625 var cacheControl =
581626 headers [ "Cache-Control" ] || headers [ "cache-control" ] || null ;
582627 var ttlSeconds = _parseMaxAge ( cacheControl ) ;
583628
584- if ( ttlSeconds === 0 ) {
585- return { status : status , headers : JSON . stringify ( headers ) , body : body } ;
629+ if ( ttlSeconds === 0 ) return liveResult ;
630+
631+ // Negative caching: cap TTL on persistent 4xx when upstream is silent.
632+ // If they explicitly stated a max-age for the 404, we honor it instead
633+ // — the origin knows best when it spoke up.
634+ if ( NEGATIVE_CACHE_STATUSES [ status ] && ! cacheControl ) {
635+ ttlSeconds = NEGATIVE_CACHE_TTL_SECONDS ;
586636 }
587637
638+ // Decide whether to gzip-store. Skip when upstream is already encoded
639+ // (avoids double-compressing gzip/br/zstd payloads) and when the body
640+ // is too small to overcome gzip's header overhead.
641+ var contentEncoding = String (
642+ headers [ "Content-Encoding" ] || headers [ "content-encoding" ] || ""
643+ ) . toLowerCase ( ) ;
644+ var alreadyEncoded = contentEncoding && contentEncoding !== "identity" ;
645+ var storedBody ;
646+ var storedZ ;
647+ if ( alreadyEncoded || bodyBytes . length < GZIP_MIN_BYTES ) {
648+ storedBody = rawB64 ;
649+ storedZ = 0 ;
650+ } else {
651+ storedBody = Utilities . base64Encode (
652+ Utilities . gzip ( Utilities . newBlob ( bodyBytes ) ) . getBytes ( )
653+ ) ;
654+ storedZ = 1 ;
655+ }
656+
657+ // Cell-size safety gate, applied after compression so that a 100 KB
658+ // text body that gzips to ~15 KB now fits where it previously bailed.
659+ if ( storedBody . length > CACHE_MAX_BODY_BYTES ) return liveResult ;
660+
588661 var hash = _getCacheKey ( url , reqHeaders ) ;
589662 var timestamp = new Date ( ) ;
590663 var expiresAt = new Date ( timestamp . getTime ( ) + ttlSeconds * 1000 ) ;
@@ -598,25 +671,26 @@ function _fetchAndCache(url, reqHeaders) {
598671 hash ,
599672 url ,
600673 status ,
601- JSON . stringify ( headers ) ,
602- body ,
674+ headersJson ,
675+ storedBody ,
603676 timestamp . toISOString ( ) ,
604677 expiresAt ,
678+ storedZ ,
605679 ] ;
606680
607681 // Circular buffer write (O(1))
608682 var metaSheet = _getMetaSheet ( ) ;
609683 if ( metaSheet ) {
610684 _ensureRowsAllocated ( sheet ) ;
611685 var writeRow = _getNextCursor ( sheet , metaSheet ) ;
612- sheet . getRange ( writeRow , 1 , 1 , 7 ) . setValues ( [ rowData ] ) ;
686+ sheet . getRange ( writeRow , 1 , 1 , 8 ) . setValues ( [ rowData ] ) ;
613687 _advanceCursor ( metaSheet , writeRow ) ;
614688 } else {
615689 // Fallback: simple append if meta sheet is unavailable
616690 sheet . appendRow ( rowData ) ;
617691 }
618692
619- return { status : status , headers : JSON . stringify ( headers ) , body : body } ;
693+ return liveResult ;
620694 } catch ( e ) {
621695 return null ;
622696 }
@@ -684,7 +758,7 @@ function clearExpiredCache() {
684758 }
685759
686760 for ( var j = 0 ; j < rowsToClear . length ; j ++ ) {
687- sheet . getRange ( rowsToClear [ j ] , 1 , 1 , 7 ) . clearContent ( ) ;
761+ sheet . getRange ( rowsToClear [ j ] , 1 , 1 , 8 ) . clearContent ( ) ;
688762 }
689763
690764 console . log ( "Cleared " + rowsToClear . length + " expired entries (" +
@@ -696,7 +770,7 @@ function clearEntireCache() {
696770 if ( sheet ) {
697771 var totalRows = sheet . getDataRange ( ) . getNumRows ( ) ;
698772 if ( totalRows > 1 ) {
699- sheet . getRange ( 2 , 1 , totalRows - 1 , 7 ) . clearContent ( ) ;
773+ sheet . getRange ( 2 , 1 , totalRows - 1 , 8 ) . clearContent ( ) ;
700774 }
701775 }
702776
0 commit comments