-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBuildKitCacheForensics.js
More file actions
34 lines (29 loc) · 37.2 KB
/
Copy pathBuildKitCacheForensics.js
File metadata and controls
34 lines (29 loc) · 37.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
(function (root, factory) {
const api = factory();
if (typeof module === "object" && module.exports) {
module.exports = api;
if (typeof require === "function" && require.main === module) {
api.runCli(process.argv.slice(2)).catch((error) => {
const message = error && error.stack ? error.stack : String(error);
process.stderr.write(message + "\n");
process.exitCode = 1;
});
}
} else {
root.BuildKitCacheForensics = api.BuildKitCacheForensics;
root.BuildKitCacheForensicsApi = api;
}
})(typeof globalThis !== "undefined" ? globalThis : this, function () {
"use strict";
const VERSION = "1.0.0";
const DEFAULTS = { slowStepMs: 15000, contextWarnBytes: 50 * 1024 * 1024, hotspotLimit: 10 };
const LOCKFILE_PATTERNS = [/^package(-lock)?\.json$/i,/^pnpm-lock\.yaml$/i,/^yarn\.lock$/i,/^bun\.lockb?$/i,/^npm-shrinkwrap\.json$/i,/^requirements(-dev)?\.txt$/i,/^poetry\.lock$/i,/^pyproject\.toml$/i,/^pipfile(\.lock)?$/i,/^go\.(mod|sum)$/i,/^cargo\.(toml|lock)$/i,/^gemfile(\.lock)?$/i,/^composer\.(json|lock)$/i,/^mix\.(exs|lock)$/i,/^pom\.xml$/i,/^build\.gradle(\.kts)?$/i,/^gradle\.properties$/i,/^settings\.gradle(\.kts)?$/i,/^project\.clj$/i,/^deps\.edn$/i];
const PACKAGE_INSTALL_MATCHERS = [/\bnpm\s+(ci|install)\b/i,/\bpnpm\s+(install|fetch)\b/i,/\byarn\s+(install|workspaces)\b/i,/\bbun\s+install\b/i,/\bpip(3)?\s+install\b/i,/\bpoetry\s+install\b/i,/\buv\s+(sync|pip install)\b/i,/\bgo\s+mod\s+download\b/i,/\bcargo\s+(build|fetch|chef\s+cook)\b/i,/\bcomposer\s+install\b/i,/\bbundle\s+install\b/i,/\bmvn\b.*\b(resolve|dependency:go-offline|package)\b/i,/\bgradle\b.*\b(build|dependencies|assemble)\b/i];
function isObject(value){return value!==null&&typeof value==="object"&&!Array.isArray(value);} function splitLines(text){return String(text||"").replace(/\r\n/g,"\n").replace(/\r/g,"\n").split("\n");} function safeJsonParse(text){try{return JSON.parse(text);}catch(_){return null;}} function coerceTimestamp(value){if(value===null||value===undefined||value==="")return null;if(typeof value==="number"&&Number.isFinite(value))return value>1e12?value:value*1000;const numeric=Number(value);if(Number.isFinite(numeric)&&String(value).trim()!=="")return numeric>1e12?numeric:numeric*1000;const parsed=Date.parse(String(value));return Number.isFinite(parsed)?parsed:null;} function deriveDurationMs(startedAt,completedAt,fallbackMs){if(Number.isFinite(fallbackMs)&&fallbackMs>=0)return fallbackMs;if(startedAt!==null&&completedAt!==null&&completedAt>=startedAt)return completedAt-startedAt;return null;} function parseDurationToMs(input){if(input===null||input===undefined)return null;if(typeof input==="number"&&Number.isFinite(input))return input;const text=String(input).trim();if(!text)return null;if(/^\d+(\.\d+)?$/.test(text))return Number(text);let total=0;const regex=/(\d+(?:\.\d+)?)(ms|s|m|h)/g;let match;let consumed=0;while((match=regex.exec(text))!==null){const amount=Number(match[1]);const unit=match[2];consumed+=match[0].length;if(unit==="ms")total+=amount;else if(unit==="s")total+=amount*1000;else if(unit==="m")total+=amount*60000;else if(unit==="h")total+=amount*3600000;}return consumed>0?Math.round(total):null;} function parseByteSize(input){if(input===null||input===undefined)return null;if(typeof input==="number"&&Number.isFinite(input))return input;const text=String(input).trim();if(!text)return null;const match=text.match(/([0-9]+(?:\.[0-9]+)?)\s*([kmgt]?i?b)/i);if(!match){const numeric=Number(text);return Number.isFinite(numeric)?numeric:null;}const amount=Number(match[1]);const unit=match[2].toLowerCase();const powerMap={b:0,kb:1,kib:1,mb:2,mib:2,gb:3,gib:3,tb:4,tib:4};const power=powerMap[unit];if(power===undefined)return null;const base=unit.includes("i")?1024:1000;return Math.round(amount*Math.pow(base,power));} function formatBytes(bytes){if(!Number.isFinite(bytes))return"n/a";const units=["B","KiB","MiB","GiB","TiB"];let value=bytes;let index=0;while(value>=1024&&index<units.length-1){value/=1024;index+=1;}return `${value.toFixed(value>=10||index===0?0:1)} ${units[index]}`;} function formatDuration(ms){if(!Number.isFinite(ms))return"n/a";if(ms<1000)return`${Math.round(ms)}ms`;const hours=Math.floor(ms/3600000);const minutes=Math.floor((ms%3600000)/60000);const seconds=(ms%60000)/1000;const parts=[];if(hours)parts.push(`${hours}h`);if(minutes)parts.push(`${minutes}m`);if(!hours&&!minutes)parts.push(`${seconds.toFixed(seconds>=10?0:1)}s`);else if(seconds)parts.push(`${seconds.toFixed(0)}s`);return parts.join(" ");} function uniq(values){return Array.from(new Set(values.filter((v)=>v!==null&&v!==undefined&&v!=="")));} function round(value,decimals){if(!Number.isFinite(value))return null;const power=Math.pow(10,decimals||0);return Math.round(value*power)/power;} function normalizeWhitespace(text){return String(text||"").replace(/\s+/g," ").trim();} function tokenizeLikeShell(text){const tokens=[];let current="";let quote=null;let escaped=false;const source=String(text||"");for(let i=0;i<source.length;i+=1){const char=source[i];if(escaped){current+=char;escaped=false;continue;}if(char==="\\"){escaped=true;continue;}if(quote){if(char===quote)quote=null;else current+=char;continue;}if(char==='"'||char==="'"){quote=char;continue;}if(/\s/.test(char)){if(current){tokens.push(current);current="";}continue;}current+=char;}if(current)tokens.push(current);return tokens;} function parseVertexDescriptor(name){const value=normalizeWhitespace(name);const match=value.match(/^\[(.+?)\]\s*(.+)$/);const descriptor={original:value,stage:null,stepIndex:null,totalSteps:null,instruction:value,instructionKind:"OTHER",commandSignature:normalizeCommandSignature(value),platform:null,internal:/^\[internal\]/i.test(value)||/\[internal\]/i.test(value)};if(!match){descriptor.instructionKind=inferInstructionKind(value);return descriptor;}const prefix=match[1];const instruction=match[2];descriptor.instruction=instruction;const stepMatch=prefix.match(/^(?:(.+?)\s+)?(\d+)\/(\d+)$/);if(stepMatch){descriptor.stage=stepMatch[1]?stepMatch[1].trim():null;descriptor.stepIndex=Number(stepMatch[2]);descriptor.totalSteps=Number(stepMatch[3]);}else if(/internal/i.test(prefix)){descriptor.stage="internal";descriptor.internal=true;}else{descriptor.stage=prefix.trim();}const platformMatch=`${prefix} ${instruction}`.match(/linux\/(amd64|arm64|arm\/v7|arm\/v6|386|s390x|ppc64le)/i);if(platformMatch)descriptor.platform=`linux/${platformMatch[1].toLowerCase()}`;descriptor.instructionKind=inferInstructionKind(instruction);descriptor.commandSignature=normalizeCommandSignature(instruction);return descriptor;} function inferInstructionKind(text){const value=normalizeWhitespace(text).toUpperCase();if(value.startsWith("RUN "))return"RUN";if(value.startsWith("COPY "))return"COPY";if(value.startsWith("ADD "))return"ADD";if(value.startsWith("FROM "))return"FROM";if(value.startsWith("WORKDIR "))return"WORKDIR";if(/load build context/i.test(value)||/transferring context/i.test(value))return"LOAD_CONTEXT";if(/load metadata/i.test(value))return"LOAD_METADATA";return"OTHER";} function normalizeCommandSignature(text){return normalizeWhitespace(text).replace(/^\[(.+?)\]\s*/g,"").replace(/\bsha256:[a-f0-9]{16,}\b/gi,"sha256:<digest>").replace(/\b[0-9]+\/[0-9]+\b/g,"<step>").trim();} function makeVertexKey(vertex){const baseKey=vertex.digest||vertex.id||`${vertex.name}:${vertex.startedAt||"na"}`;return vertex.traceSource?`${baseKey}@@${vertex.traceSource}`:baseKey;} function mergeVertex(existing,incoming){if(!existing)return{...incoming,logs:Array.isArray(incoming.logs)?incoming.logs.slice():[]};const merged={...existing};for(const [key,value] of Object.entries(incoming)){if(value===null||value===undefined||value==="")continue;if(key==="logs"){merged.logs=(merged.logs||[]).concat(value||[]);continue;}if(key==="inputs"){merged.inputs=uniq((merged.inputs||[]).concat(value||[]));continue;}merged[key]=value;}merged.durationMs=deriveDurationMs(merged.startedAt??null,merged.completedAt??null,merged.durationMs??null);return merged;} function finalizeVertex(vertex){const descriptor=parseVertexDescriptor(vertex.name||"");return{...vertex,stage:vertex.stage||descriptor.stage,stepIndex:vertex.stepIndex||descriptor.stepIndex,totalSteps:vertex.totalSteps||descriptor.totalSteps,instructionKind:vertex.instructionKind||descriptor.instructionKind,commandSignature:vertex.commandSignature||descriptor.commandSignature,platform:vertex.platform||descriptor.platform,internal:vertex.internal===undefined?descriptor.internal:vertex.internal,durationMs:deriveDurationMs(vertex.startedAt??null,vertex.completedAt??null,vertex.durationMs??null),logs:(vertex.logs||[]).slice(-250)};} function buildTraceFromState(state,sourceType){const vertices=Array.from(state.vertices.values()).map(finalizeVertex);return{sourceType,vertices,contextTransfers:state.contextTransfers.slice(),platforms:uniq(vertices.map((v)=>v.platform).concat(Array.from(state.platforms))),rawLineCount:state.rawLineCount};} function createState(){return{vertices:new Map(),contextTransfers:[],platforms:new Set(),rawLineCount:0,plainKeys:new Map()};} function trackContextTransfer(state,partial){state.contextTransfers.push({bytes:Number.isFinite(partial.bytes)?partial.bytes:null,durationMs:Number.isFinite(partial.durationMs)?partial.durationMs:null,label:partial.label||"build context",vertex:partial.vertex||null,lineNumber:partial.lineNumber||null});} function parseRawJsonText(text){const state=createState();const lines=splitLines(text).filter((line)=>line.trim());state.rawLineCount=lines.length;for(let i=0;i<lines.length;i+=1){const parsed=safeJsonParse(lines[i]);if(!parsed)throw new Error(`Invalid BuildKit rawjson line ${i+1}`);ingestRawJsonChunk(state,parsed,i+1);}return buildTraceFromState(state,"rawjson");} function ingestRawJsonChunk(state,chunk,lineNumber){if(Array.isArray(chunk)){for(const item of chunk)ingestRawJsonChunk(state,item,lineNumber);return;}if(!isObject(chunk))return;for(const item of Array.isArray(chunk.vertexes)?chunk.vertexes:[]){const descriptor=parseVertexDescriptor(item.name||"");const vertex={id:item.digest||item.id||item.name,digest:item.digest||null,name:item.name||item.digest||"unknown vertex",startedAt:coerceTimestamp(item.started||item.startedAt||item.startTime),completedAt:coerceTimestamp(item.completed||item.completedAt||item.endTime),cached:item.cached===true,error:item.error||null,inputs:(item.inputs||[]).map((input)=>input&&(input.digest||input.id||input)).filter(Boolean),stage:descriptor.stage,stepIndex:descriptor.stepIndex,totalSteps:descriptor.totalSteps,instructionKind:descriptor.instructionKind,commandSignature:descriptor.commandSignature,platform:descriptor.platform,internal:descriptor.internal,logs:[]};if(vertex.platform)state.platforms.add(vertex.platform);const key=makeVertexKey(vertex);state.vertices.set(key,mergeVertex(state.vertices.get(key),vertex));}for(const item of Array.isArray(chunk.statuses)?chunk.statuses:[]){const label=`${item.id||""} ${item.name||""}`.trim();if(/context/i.test(label))trackContextTransfer(state,{bytes:parseByteSize(item.total||item.current||item.name),durationMs:deriveDurationMs(coerceTimestamp(item.started),coerceTimestamp(item.completed),null),label,vertex:item.vertex||null,lineNumber});}for(const item of Array.isArray(chunk.logs)?chunk.logs:[]){const vertexId=item.vertex||item.digest||null;const key=vertexId&&Array.from(state.vertices.keys()).find((candidate)=>candidate===vertexId||candidate.startsWith(`${vertexId}:`)||candidate.startsWith(`${vertexId}@@`));const text=typeof item.data==="string"?item.data:item.data?String(item.data):"";if(key&&state.vertices.has(key)){const current=state.vertices.get(key);current.logs=current.logs||[];current.logs.push(normalizeWhitespace(text));state.vertices.set(key,current);}}} function ensurePlainVertex(state,id,name,lineNumber){if(state.plainKeys.has(id)){const existingKey=state.plainKeys.get(id);const existing=state.vertices.get(existingKey);if(existing){const descriptor=parseVertexDescriptor(name||existing.name||`#${id}`);state.vertices.set(existingKey,mergeVertex(existing,{name:name||existing.name,stage:existing.stage||descriptor.stage,stepIndex:existing.stepIndex||descriptor.stepIndex,totalSteps:existing.totalSteps||descriptor.totalSteps,instructionKind:existing.instructionKind||descriptor.instructionKind,commandSignature:existing.commandSignature||descriptor.commandSignature,platform:existing.platform||descriptor.platform,internal:existing.internal===undefined?descriptor.internal:existing.internal,sourceLineStart:existing.sourceLineStart||lineNumber}));return existingKey;}}const key=`plain:${id}:${normalizeCommandSignature(name||`#${id}`)}`;state.plainKeys.set(id,key);const descriptor=parseVertexDescriptor(name||"");const candidate={id:key,digest:null,name:name||`#${id}`,startedAt:null,completedAt:null,cached:false,error:null,inputs:[],stage:descriptor.stage,stepIndex:descriptor.stepIndex,totalSteps:descriptor.totalSteps,instructionKind:descriptor.instructionKind,commandSignature:descriptor.commandSignature,platform:descriptor.platform,internal:descriptor.internal,sourceLineStart:lineNumber,logs:[]};if(descriptor.platform)state.platforms.add(descriptor.platform);state.vertices.set(key,mergeVertex(state.vertices.get(key),candidate));return key;} function parsePlainProgressText(text){const state=createState();const lines=splitLines(text);state.rawLineCount=lines.length;let activeContextVertex=null;for(let i=0;i<lines.length;i+=1){const trimmed=lines[i].trim();if(!trimmed)continue;const descriptorMatch=trimmed.match(/^#(\d+)\s+\[(.+?)\]\s*(.+)?$/);if(descriptorMatch){const vertexId=descriptorMatch[1];const body=`[${descriptorMatch[2]}] ${descriptorMatch[3]||""}`.trim();const key=ensurePlainVertex(state,vertexId,body,i+1);const current=state.vertices.get(key);if(current.startedAt===null)current.startedAt=i*1000;state.vertices.set(key,current);if(/load build context/i.test(body))activeContextVertex=key;continue;}const cachedMatch=trimmed.match(/^#(\d+)\s+CACHED$/i);if(cachedMatch){const key=ensurePlainVertex(state,cachedMatch[1],null,i+1);const current=state.vertices.get(key);current.cached=true;current.completedAt=current.completedAt||i*1000;current.durationMs=current.durationMs??0;state.vertices.set(key,current);continue;}const doneMatch=trimmed.match(/^#(\d+)\s+DONE\s+(.+)$/i);if(doneMatch){const key=ensurePlainVertex(state,doneMatch[1],null,i+1);const current=state.vertices.get(key);current.completedAt=current.completedAt||i*1000;current.durationMs=parseDurationToMs(doneMatch[2]);state.vertices.set(key,current);continue;}const errorMatch=trimmed.match(/^#(\d+)\s+ERROR\s+(.+)$/i);if(errorMatch){const key=ensurePlainVertex(state,errorMatch[1],null,i+1);const current=state.vertices.get(key);current.error=errorMatch[2].trim();current.completedAt=current.completedAt||i*1000;state.vertices.set(key,current);continue;}const logMatch=trimmed.match(/^#(\d+)\s+(.+)$/);if(logMatch){const key=ensurePlainVertex(state,logMatch[1],null,i+1);const current=state.vertices.get(key);current.logs=current.logs||[];current.logs.push(logMatch[2]);if(/transferring context:/i.test(logMatch[2]))trackContextTransfer(state,{bytes:parseByteSize(logMatch[2]),durationMs:parseDurationToMs(logMatch[2]),label:logMatch[2],vertex:activeContextVertex||key,lineNumber:i+1});state.vertices.set(key,current);}}return buildTraceFromState(state,"plain");} function parseBuildKitTrace(input){if(input&&isObject(input)&&Array.isArray(input.vertices)){const state=createState();for(const vertex of input.vertices)state.vertices.set(makeVertexKey(vertex),mergeVertex(state.vertices.get(makeVertexKey(vertex)),vertex));for(const transfer of input.contextTransfers||[])trackContextTransfer(state,transfer);return buildTraceFromState(state,input.sourceType||"object");}const text=typeof input==="string"?input:String(input||"");const trimmed=text.trim();if(!trimmed)return{sourceType:"empty",vertices:[],contextTransfers:[],platforms:[],rawLineCount:0};const asSingleJson=safeJsonParse(trimmed);if(isObject(asSingleJson)&&(Array.isArray(asSingleJson.vertexes)||Array.isArray(asSingleJson.logs)||Array.isArray(asSingleJson.statuses))){const state=createState();ingestRawJsonChunk(state,asSingleJson,1);return buildTraceFromState(state,"rawjson");}if(trimmed.startsWith("{")&&splitLines(trimmed).every((line)=>!line.trim()||line.trim().startsWith("{")))return parseRawJsonText(trimmed);return parsePlainProgressText(trimmed);} function parseDockerfile(text){const lines=splitLines(text);const instructions=[];let buffer="";let bufferStart=0;let stageIndex=-1;let currentStage=null;for(let i=0;i<lines.length;i+=1){const raw=lines[i];const trimmed=raw.trim();if(!buffer&&(!trimmed||trimmed.startsWith("#")))continue;if(!buffer)bufferStart=i+1;const continued=/\\\s*$/.test(raw);buffer+=`${raw.replace(/\\\s*$/,"")} `;if(continued)continue;const statement=normalizeWhitespace(buffer);buffer="";const match=statement.match(/^([A-Za-z]+)\s+(.*)$/);if(!match)continue;const keyword=match[1].toUpperCase();const value=match[2];if(keyword==="FROM"){stageIndex+=1;const aliasMatch=value.match(/\s+AS\s+([^\s]+)$/i);currentStage=aliasMatch?aliasMatch[1]:`stage${stageIndex}`;}instructions.push({lineNumber:bufferStart,keyword,value,raw:statement,stage:currentStage||"stage0",stageIndex:Math.max(stageIndex,0)});}return instructions;} function extractCopySources(value){const trimmed=String(value||"").trim();if(!trimmed)return[];if(trimmed.startsWith("[")){const parsed=safeJsonParse(trimmed);if(Array.isArray(parsed)&&parsed.length>=2)return parsed.slice(0,-1).map(String);return[];}const tokens=tokenizeLikeShell(trimmed).filter((token)=>!token.startsWith("--"));if(tokens.length<2)return[];return tokens.slice(0,-1);} function isLockfilePath(pathValue){const fileName=String(pathValue||"").split("/").pop();return LOCKFILE_PATTERNS.some((pattern)=>pattern.test(fileName));} function isLikelyLockfileCopy(instruction){if(!instruction||!["COPY","ADD"].includes(instruction.keyword))return false;const sources=extractCopySources(instruction.value);return sources.length>0&&sources.every((source)=>isLockfilePath(source));} function isBroadContextCopy(instruction){if(!instruction||!["COPY","ADD"].includes(instruction.keyword))return false;const sources=extractCopySources(instruction.value);if(!sources.length)return false;return sources.some((source)=>{const normalized=source.replace(/^\.\//,"");return normalized==="."||normalized==="*"||normalized.endsWith("/")||normalized.includes("**")||normalized.includes("src")||(!isLockfilePath(normalized)&&normalized.includes("."))||(!isLockfilePath(normalized)&&!normalized.includes("/")&&normalized.length>3);});} function isPackageInstallCommand(text){return PACKAGE_INSTALL_MATCHERS.some((pattern)=>pattern.test(text||""));} function usesCacheMount(text){return /--mount=type=cache/i.test(text||"");} function isAptUpdateCommand(text){return /\bapt(-get)?\s+update\b/i.test(text||"")||/\bapk\s+add\b/i.test(text||"");} function suggestCacheMount(command){const text=String(command||"");if(/pnpm/i.test(text))return"RUN --mount=type=cache,target=/root/.local/share/pnpm/store pnpm install --frozen-lockfile";if(/npm/i.test(text))return"RUN --mount=type=cache,target=/root/.npm npm ci";if(/yarn/i.test(text))return"RUN --mount=type=cache,target=/usr/local/share/.cache/yarn yarn install --frozen-lockfile";if(/bun/i.test(text))return"RUN --mount=type=cache,target=/root/.bun/install/cache bun install --frozen-lockfile";if(/pip|uv|poetry/i.test(text))return"RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements.txt";if(/go\s+mod|go\s+build/i.test(text))return"RUN --mount=type=cache,target=/go/pkg/mod go mod download";if(/cargo/i.test(text))return"RUN --mount=type=cache,target=/usr/local/cargo/registry --mount=type=cache,target=/usr/local/cargo/git/db cargo build --locked";if(/apt|apk/i.test(text))return"RUN --mount=type=cache,target=/var/cache/apt --mount=type=cache,target=/var/lib/apt/lists apt-get update && apt-get install -y <packages>";return"RUN --mount=type=cache,target=/tmp/build-cache <command>";} function analyzeDockerfileInstructions(instructions){const findings=[];const byStage=new Map();for(const instruction of instructions){if(!byStage.has(instruction.stage))byStage.set(instruction.stage,[]);byStage.get(instruction.stage).push(instruction);}for(const [stage,stageInstructions] of byStage.entries()){let lastBroadCopy=null;let sawLockfileCopy=false;for(const instruction of stageInstructions){if(isLikelyLockfileCopy(instruction))sawLockfileCopy=true;if(isBroadContextCopy(instruction))lastBroadCopy=instruction;if(instruction.keyword==="RUN"&&isPackageInstallCommand(instruction.value)){if(lastBroadCopy&&!sawLockfileCopy)findings.push({type:"install-after-broad-copy",stage,lineNumber:instruction.lineNumber,command:instruction.value,broadCopyLine:lastBroadCopy.lineNumber,broadCopy:lastBroadCopy.raw});if(!usesCacheMount(instruction.value))findings.push({type:"install-without-cache-mount",stage,lineNumber:instruction.lineNumber,command:instruction.value,suggestion:suggestCacheMount(instruction.value)});}if(instruction.keyword==="RUN"&&isAptUpdateCommand(instruction.value)&&!usesCacheMount(instruction.value))findings.push({type:"system-package-without-cache-mount",stage,lineNumber:instruction.lineNumber,command:instruction.value,suggestion:suggestCacheMount(instruction.value)});}}return findings;} function buildGraph(vertices){const byKey=new Map();const children=new Map();const parents=new Map();for(const vertex of vertices)byKey.set(makeVertexKey(vertex),vertex);for(const vertex of vertices){const key=makeVertexKey(vertex);const inputKeys=uniq((vertex.inputs||[]).map((input)=>{if(byKey.has(input))return input;const match=Array.from(byKey.entries()).find(([candidate,candidateVertex])=>(candidate===input||candidate.startsWith(`${input}:`)||candidate.startsWith(`${input}@@`))&&candidateVertex.traceSource===vertex.traceSource);return match?match[0]:null;}));parents.set(key,inputKeys);for(const inputKey of inputKeys){if(!children.has(inputKey))children.set(inputKey,[]);children.get(inputKey).push(key);}}return{byKey,children,parents};} function inferLikelyCause(vertex){const name=String(vertex.name||"");if(/COPY\s+\.\s+/i.test(name)||/ADD\s+\.\s+/i.test(name))return"A broad source copy changed and forced downstream rebuilds.";if(/load build context|transferring context/i.test(name))return"The build context changed enough to invalidate later layers.";if(isPackageInstallCommand(name))return"A dependency restore layer lost cache reuse.";if(/RUN/i.test(name))return"A compute-heavy RUN step became the first uncached edge.";return"This was the first uncached vertex in its rebuild cluster.";} function computeMissRoots(vertices,graph){const uncachedKeys=new Set(vertices.filter((vertex)=>!vertex.cached&&!vertex.error).map(makeVertexKey));const results=[];for(const vertex of vertices){const key=makeVertexKey(vertex);if(!uncachedKeys.has(key))continue;const parentKeys=graph.parents.get(key)||[];const hasUncachedParent=parentKeys.some((parentKey)=>uncachedKeys.has(parentKey));if(hasUncachedParent)continue;let impactCount=0;let impactDurationMs=0;const queue=[key];const seen=new Set();while(queue.length){const currentKey=queue.shift();if(seen.has(currentKey))continue;seen.add(currentKey);if(!uncachedKeys.has(currentKey))continue;const current=graph.byKey.get(currentKey);if(current){impactCount+=1;impactDurationMs+=current.durationMs||0;}for(const childKey of graph.children.get(currentKey)||[])queue.push(childKey);}results.push({key,name:vertex.name,stage:vertex.stage,stepIndex:vertex.stepIndex,instructionKind:vertex.instructionKind,durationMs:vertex.durationMs,impactCount,impactDurationMs,likelyCause:inferLikelyCause(vertex)});}return results.sort((a,b)=>(b.impactDurationMs||0)-(a.impactDurationMs||0)||(b.impactCount||0)-(a.impactCount||0));} function computeSummary(vertices,contextTransfers){const totalVertices=vertices.length;const cachedVertices=vertices.filter((vertex)=>vertex.cached).length;const erroredVertices=vertices.filter((vertex)=>vertex.error).length;const executedVertices=vertices.filter((vertex)=>!vertex.cached).length;const durationValues=vertices.map((vertex)=>vertex.durationMs||0);const totalDurationMs=durationValues.reduce((sum,value)=>sum+value,0);const longestStepMs=durationValues.reduce((max,value)=>Math.max(max,value),0);const contextBytes=contextTransfers.reduce((sum,transfer)=>sum+(transfer.bytes||0),0);const contextDurationMs=contextTransfers.reduce((sum,transfer)=>sum+(transfer.durationMs||0),0);const platforms=uniq(vertices.map((vertex)=>vertex.platform));return{totalVertices,cachedVertices,executedVertices,erroredVertices,cacheHitRate:totalVertices?cachedVertices/totalVertices:0,totalDurationMs,longestStepMs,contextBytes,contextTransfers:contextTransfers.length,contextDurationMs,platforms};} function computeSlowSteps(vertices,limit){return vertices.filter((vertex)=>Number.isFinite(vertex.durationMs)&&vertex.durationMs>0).sort((a,b)=>(b.durationMs||0)-(a.durationMs||0)).slice(0,limit).map((vertex)=>({name:vertex.name,stage:vertex.stage,stepIndex:vertex.stepIndex,durationMs:vertex.durationMs,cached:Boolean(vertex.cached),error:vertex.error||null}));} function computeRepeatedCommands(vertices){const groups=new Map();for(const vertex of vertices){const signature=vertex.commandSignature;if(!signature||signature.length<8)continue;if(!groups.has(signature))groups.set(signature,[]);groups.get(signature).push(vertex);}const results=[];for(const [signature,group] of groups.entries()){if(group.length<2)continue;const uncached=group.filter((vertex)=>!vertex.cached);if(uncached.length<2)continue;const totalDurationMs=uncached.reduce((sum,vertex)=>sum+(vertex.durationMs||0),0);results.push({signature,occurrences:group.length,uncachedOccurrences:uncached.length,totalDurationMs,platforms:uniq(group.map((vertex)=>vertex.platform))});}return results.sort((a,b)=>(b.totalDurationMs||0)-(a.totalDurationMs||0));} function buildRecommendations(context){const recommendations=[];const {summary,missRoots,dockerfileFindings,repeatedCommands,options,vertices}=context;const push=(rec)=>recommendations.push({severity:rec.severity||"medium",title:rec.title,why:rec.why,evidence:rec.evidence||[],actions:rec.actions||[]});if(summary.contextBytes>=options.contextWarnBytes)push({severity:summary.contextBytes>=options.contextWarnBytes*2?"high":"medium",title:"Trim the build context before BuildKit sees it",why:`The build uploaded ${formatBytes(summary.contextBytes)} of context data, which is large enough to invalidate cache cheaply but expensively.`,evidence:[`Context transfers: ${summary.contextTransfers}`,`Context upload time: ${formatDuration(summary.contextDurationMs)}`],actions:["Tighten .dockerignore for dist, coverage, local caches, model artifacts, and VCS noise.","Copy only the directories needed by each stage instead of relying on COPY . . everywhere.","Move generated assets and test outputs outside the Docker build context when they are not required in the image."]});for(const finding of dockerfileFindings.filter((f)=>f.type==="install-after-broad-copy").slice(0,2))push({severity:"high",title:"Move dependency restore before the broad source copy",why:`Stage ${finding.stage} runs a dependency install on line ${finding.lineNumber} after a broad copy on line ${finding.broadCopyLine}, so any source churn busts the restore layer.`,evidence:[finding.broadCopy,finding.command],actions:["Copy lockfiles and manifest files first.","Run the package restore step immediately after that narrow copy.","Copy the rest of the source tree only after the dependency layer is sealed."]});for(const finding of dockerfileFindings.filter((f)=>f.type==="install-without-cache-mount"||f.type==="system-package-without-cache-mount").slice(0,2))push({severity:finding.type==="system-package-without-cache-mount"?"medium":"high",title:"Add a cache mount to the slow restore layer",why:`Line ${finding.lineNumber} in stage ${finding.stage} runs a reusable download step without a BuildKit cache mount, so every cold rebuild pays the full network and extraction cost again.`,evidence:[finding.command],actions:[finding.suggestion]});const largestRoot=missRoots[0];if(largestRoot&&largestRoot.impactCount>=3)push({severity:largestRoot.impactCount>=5?"high":"medium",title:"Attack the first uncached root, not just the slowest leaf",why:`${largestRoot.name} was the first uncached root for ${largestRoot.impactCount} downstream steps consuming ${formatDuration(largestRoot.impactDurationMs)} of work.`,evidence:[largestRoot.likelyCause],actions:["Stabilize the inputs that feed this root vertex.","If it is a COPY or ADD step, narrow the files it receives.","If it is a RUN step, split deterministic setup from frequently changing source-dependent work."]});const duplicated=repeatedCommands.find((item)=>item.uncachedOccurrences>=2&&item.platforms.filter(Boolean).length>=2);if(duplicated)push({severity:"medium",title:"Deduplicate repeated work across platforms",why:`The command \"${duplicated.signature}\" re-ran uncached ${duplicated.uncachedOccurrences} times across ${duplicated.platforms.join(", ")}, which usually means the remote cache is not being shared effectively.`,evidence:[`Combined runtime: ${formatDuration(duplicated.totalDurationMs)}`],actions:["Export a registry-backed cache with buildx --cache-to and import it with --cache-from in CI.","Keep architecture-neutral dependency resolution in an earlier stage when possible.","Delay platform-specific downloads or native compilation until the last responsible stage."]});if(summary.cacheHitRate<0.35&&vertices.length>=6)push({severity:"medium",title:"Persist cache state across CI runs",why:`Only ${Math.round(summary.cacheHitRate*100)}% of vertices were cache hits, which is low for a steady branch build unless the cache is ephemeral or the Dockerfile is overly broad.`,evidence:[`Vertices: ${summary.totalVertices}`,`Cache hits: ${summary.cachedVertices}`],actions:["Verify that your CI builder reuses the same buildx cache backend across runs and branches.","Prefer registry or object-storage backed cache exports over runner-local disk when runners are ephemeral.","Track cache hit rate over time and alert when it drops after Dockerfile or monorepo changes."]});if(!recommendations.length)push({severity:"low",title:"No obvious structural cache bug detected",why:"The trace did not show a large context upload, a broad-copy dependency restore pattern, or repeated uncached download steps.",evidence:[`Cache hit rate: ${Math.round(summary.cacheHitRate*100)}%`,`Longest step: ${formatDuration(summary.longestStepMs)}`],actions:["Compare two traces from a warm build and a cold build to isolate environment-specific misses.","Capture rawjson from buildx so vertex dependencies can be traced more precisely than plain logs allow."]});return recommendations;} function analyzeBuildKitTrace(input,options){const tool=new BuildKitCacheForensics(options);return tool.ingest(input).analyze();} class BuildKitCacheForensics{constructor(options){this.options={...DEFAULTS,...(options||{})};this.vertices=new Map();this.contextTransfers=[];this.platforms=new Set();this.sourceTypes=new Set();this.dockerfileText=null;this.traceCounter=0;} ingest(input,sourceTag){const trace=parseBuildKitTrace(input);const traceSource=sourceTag||`trace${++this.traceCounter}`;this.sourceTypes.add(trace.sourceType);for(const vertex of trace.vertices){const namespaced={...vertex,traceSource};const key=makeVertexKey(namespaced);this.vertices.set(key,mergeVertex(this.vertices.get(key),namespaced));}for(const transfer of trace.contextTransfers||[])this.contextTransfers.push({...transfer,traceSource});for(const platform of trace.platforms||[])this.platforms.add(platform);return this;} setDockerfile(text){this.dockerfileText=text;return this;} analyze(dockerfileText){const effectiveDockerfile=dockerfileText!==undefined?dockerfileText:this.dockerfileText;const vertices=Array.from(this.vertices.values()).map(finalizeVertex);const graph=buildGraph(vertices);const summary=computeSummary(vertices,this.contextTransfers);const missRoots=computeMissRoots(vertices,graph);const slowSteps=computeSlowSteps(vertices,this.options.hotspotLimit);const repeatedCommands=computeRepeatedCommands(vertices);const dockerfileInstructions=effectiveDockerfile?parseDockerfile(effectiveDockerfile):[];const dockerfileFindings=dockerfileInstructions.length?analyzeDockerfileInstructions(dockerfileInstructions):[];const recommendations=buildRecommendations({summary,missRoots,dockerfileFindings,repeatedCommands,options:this.options,vertices});return{version:VERSION,analyzedAt:new Date().toISOString(),sourceTypes:Array.from(this.sourceTypes),summary:{...summary,cacheHitRate:round(summary.cacheHitRate,4)},missRoots,slowSteps,repeatedCommands,dockerfileFindings,recommendations};}} function formatTextReport(report){const lines=[];lines.push(`BuildKit Cache Forensics v${report.version}`);lines.push(`Sources: ${report.sourceTypes.join(", ")||"unknown"}`);lines.push(`Summary: ${report.summary.totalVertices} vertices, ${report.summary.cachedVertices} cache hits (${Math.round((report.summary.cacheHitRate||0)*100)}%), ${formatBytes(report.summary.contextBytes)} context, longest step ${formatDuration(report.summary.longestStepMs)}`);if(report.missRoots.length){lines.push("");lines.push("Top miss roots:");for(const root of report.missRoots.slice(0,5))lines.push(`- ${root.name} | impact ${root.impactCount} steps / ${formatDuration(root.impactDurationMs)} | ${root.likelyCause}`);}if(report.slowSteps.length){lines.push("");lines.push("Slow steps:");for(const step of report.slowSteps.slice(0,5))lines.push(`- ${formatDuration(step.durationMs)} | ${step.name}${step.cached?" | cached":""}`);}if(report.recommendations.length){lines.push("");lines.push("Recommendations:");for(const rec of report.recommendations){lines.push(`- [${rec.severity}] ${rec.title}`);lines.push(` Why: ${rec.why}`);if(rec.actions.length)lines.push(` Actions: ${rec.actions.join(" | ")}`);}}return lines.join("\n");} function parseCliArgs(argv){const options={inputFiles:[],format:"text",dockerfile:null,slowStepMs:DEFAULTS.slowStepMs,contextWarnBytes:DEFAULTS.contextWarnBytes};for(let i=0;i<argv.length;i+=1){const token=argv[i];if(token==="--input")options.inputFiles.push(argv[++i]);else if(token==="--dockerfile")options.dockerfile=argv[++i];else if(token==="--format")options.format=argv[++i]||"text";else if(token==="--slow-step-ms")options.slowStepMs=Number(argv[++i]);else if(token==="--context-warn-mb")options.contextWarnBytes=Number(argv[++i])*1024*1024;else if(token==="--help"||token==="-h")options.help=true;else if(token)options.inputFiles.push(token);}return options;} async function readAllStdin(){return new Promise((resolve,reject)=>{let data="";process.stdin.setEncoding("utf8");process.stdin.on("data",(chunk)=>{data+=chunk;});process.stdin.on("end",()=>resolve(data));process.stdin.on("error",reject);});} async function runCli(argv){const fs=require("node:fs");const options=parseCliArgs(argv);if(options.help){process.stdout.write(["Usage: node BuildKitCacheForensics.js [--input build.rawjson] [--dockerfile Dockerfile] [--format text|json]"," docker buildx build --progress=plain . | node BuildKitCacheForensics.js","","Reads BuildKit rawjson or plain progress logs and reports cache miss roots, slow steps, and Dockerfile fixes."].join("\n")+"\n");return;}const tool=new BuildKitCacheForensics({slowStepMs:options.slowStepMs,contextWarnBytes:options.contextWarnBytes});if(options.dockerfile)tool.setDockerfile(require("node:fs").readFileSync(options.dockerfile,"utf8"));if(options.inputFiles.length)for(const filePath of options.inputFiles)tool.ingest(fs.readFileSync(filePath,"utf8"),filePath);else tool.ingest(await readAllStdin(),"stdin");const report=tool.analyze();if(options.format==="json")process.stdout.write(JSON.stringify(report,null,2)+"\n");else process.stdout.write(formatTextReport(report)+"\n");} return{VERSION,BuildKitCacheForensics,analyzeBuildKitTrace,parseBuildKitTrace,parseDockerfile,formatTextReport,runCli}; });
/*
This solves slow Docker BuildKit and docker buildx builds where cache misses are hard to explain, especially in CI, monorepos, multi-platform images, and AI or infra repositories that rebuild large containers too often. It reads BuildKit rawjson logs or plain progress output, finds the first uncached vertex that triggered the rebuild fanout, measures large build-context uploads, spots repeated slow commands, and points at Dockerfile ordering mistakes like running package restore after a broad COPY.
Built because I keep seeing teams stare at a 15 minute container build and only know that one RUN step was slow, not why the cache got busted three layers earlier. That is the real debugging gap. The painful part is not timing one command. The painful part is tracing the earliest cache break, proving whether the context changed too much, and showing the exact Dockerfile pattern that keeps invalidating dependency layers.
Use it when a GitHub Actions build, buildx pipeline, remote builder, or local Docker BuildKit run suddenly goes cold and you need an answer you can act on quickly. It is useful for Node, Python, Go, Rust, Java, polyglot monorepos, and any image pipeline where CI minutes and registry traffic matter.
The trick: keep the tool dependency-free, parse both rawjson and plain logs, preserve enough vertex graph information to compute rebuild roots, then combine that with lightweight Dockerfile heuristics so the output is not just observability, it is a concrete fix list.
Drop this into a repo as BuildKit cache analysis JavaScript, Docker build performance debugging CLI, buildx cache miss forensics tool, or CI container optimization utility. Run it against saved logs in automation, pipe live build output into it locally, or wire the JSON mode into a quality gate so regressions in Docker cache hit rate become visible before they get expensive.
*/