fix(decompose): address bot follow-up #2 (abort leak, dedup checks, maxTokens)

HomenShum · HomenShum · commit e10ef3b05a74 · 2026-05-02T17:06:43.000-07:00
Bot review on commit fe42888 flagged 3 items, all real, all fixed: 1. [Minor] AbortSignal listener leak in render-ui-kit.ts. `signal.removeEventListener('abort', onAbort)` ran only on the did-finish-load success path. did-fail-load, the loadURL catch, and the hard timeout all left the listener registered until the abort actually fired (or never). Fix: centralized cleanup inside `finish()` itself — clears the timeout AND removes the abort listener on every exit path (success, fail, catch, timeout, abort). One source of truth, no path can forget. 2. [Minor] Duplicate STANDARD_CHECKS in judge-visual-parity.ts vs verify-ui-kit-visual-parity.ts. Both held identical 12-check sets; if one was edited the other would silently drift. Fix: imported STANDARD_VISUAL_PARITY_CHECKS from @open-codesign/core (already exported at packages/core/src/index.ts:59). The host prompt only needs {id, question}, so we project the core's {id, dimension, question} down via map. Single source of truth. 3. [Nit] Hardcoded `maxTokens: 8000` in the vision judge call. Worst-case-safety buffer that wastes budget on cheaper models (Gemini Flash, GPT-4o-mini) and doesn't help correctness — actual judge output (12 short {passed, reason} entries + 1-2 sentence summary) lands around 1.5k tokens. Fix: dropped to 4096 with an inline comment explaining the envelope. Fits under every routed model's output cap (Gemini Flash ~8k, Sonnet ~8k, GPT-4o ~16k) with comfortable headroom. Verification: - apps/desktop: pnpm typecheck (node + web tsconfig) clean - packages/core: pnpm typecheck clean - pnpm lint (biome) clean Bot also confirmed all previous review items resolved on this commit: ✅ fork image URLs → main branch ✅ changesets consolidated to one ✅ case-insensitive base64 check ✅ scope creep (.github/* changes) no longer in diff
diff --git a/apps/desktop/src/main/judge-visual-parity.ts b/apps/desktop/src/main/judge-visual-parity.ts
@@ -10,66 +10,18 @@
  * plumbing.
  */
 
-import type { JudgeVisualParityFn, VisualParityImageRef } from '@open-codesign/core';
-
-const STANDARD_CHECKS: Array<{ id: string; question: string }> = [
-  {
-    id: 'layout.column_count_match',
-    question: 'Does the candidate have the same number of major columns / regions as the source?',
-  },
-  {
-    id: 'layout.region_positions_match',
-    question:
-      'Are major regions (header / sidebar / main / right rail / footer) in the same positions as the source?',
-  },
-  {
-    id: 'layout.hierarchy_preserved',
-    question: 'Is the visual hierarchy (heading > subhead > body > footer) preserved?',
-  },
-  {
-    id: 'color.accent_color_match',
-    question:
-      'Is the primary accent color visually equivalent to the source (same hue family, similar saturation)?',
-  },
-  {
-    id: 'color.palette_consistency_match',
-    question:
-      'Does the overall palette feel match the source (warm/cool, saturated/muted, contrast level)?',
-  },
-  {
-    id: 'typography.font_family_match',
-    question:
-      'Does the font family character (serif / sans / mono) match the source for each text role?',
-  },
-  {
-    id: 'typography.heading_hierarchy_match',
-    question: 'Are heading weights and sizes stepped similarly (H1 vs body vs caption)?',
-  },
-  {
-    id: 'content.text_labels_present',
-    question:
-      'Are all visible text labels from the source present in the candidate (nav items, headings, button text)?',
-  },
-  {
-    id: 'content.all_sections_present',
-    question:
-      'Are all distinct sections from the source present in the candidate (not just one missing region)?',
-  },
-  {
-    id: 'components.repeated_pattern_count_match',
-    question:
-      'Does the candidate have approximately the same count of repeated patterns (cards / list items / nav links) as the source?',
-  },
-  {
-    id: 'components.component_structure_match',
-    question:
-      'Do repeated components have the same internal anatomy (header + body + footer pieces)?',
-  },
-  {
-    id: 'components.icon_motif_match',
-    question: 'Are icons / glyphs in the same style (line vs filled, monochrome vs colored)?',
-  },
-];
+import {
+  type JudgeVisualParityFn,
+  STANDARD_VISUAL_PARITY_CHECKS,
+  type VisualParityImageRef,
+} from '@open-codesign/core';
+
+// Use the canonical check list from core. Previously this file kept its own
+// duplicate copy; that risked drift if one was updated without the other
+// (review finding on PR #241). The host prompt only needs id + question, so
+// we project the core's `{id, dimension, question}` down to `{id, question}`.
+const STANDARD_CHECKS: ReadonlyArray<{ id: string; question: string }> =
+  STANDARD_VISUAL_PARITY_CHECKS.map((c) => ({ id: c.id, question: c.question }));
 
 export const SYSTEM_PROMPT = `You are a meticulous visual QA judge comparing two UI screenshots.
 
@@ -191,11 +143,17 @@ export function makeJudgeVisualParity(runVisionPrompt: RunVisionPromptFn): Judge
       { data: dataUrlToBase64(candidate.dataUrl), mimeType: candidate.mediaType },
     ];
 
+    // 4096 fits comfortably under the output cap of every vision model we
+    // currently route to (Gemini Flash ~8k, Sonnet ~8k, GPT-4o ~16k) and
+    // matches the judge's actual envelope: 12 short {passed, reason} entries
+    // + a 1-2 sentence summary lands at ~1.5k tokens in practice. 8000 was a
+    // worst-case safety buffer that caused waste on cheap models without
+    // helping correctness — review nit on PR #241.
     const result = await runVisionPrompt({
       systemPrompt: SYSTEM_PROMPT,
       userText: USER_PROMPT,
       userImages,
-      maxTokens: 8000,
+      maxTokens: 4096,
       ...(signal ? { signal } : {}),
     });
 
diff --git a/apps/desktop/src/main/render-ui-kit.ts b/apps/desktop/src/main/render-ui-kit.ts
@@ -47,40 +47,38 @@ export function makeUiKitRenderer(): RenderUiKitFn {
         capturePage: () => Promise<{ toPNG: () => Buffer }>;
       };
 
-      // Race: load + settle window vs hard timeout vs abort signal
+      // Race: load + settle window vs hard timeout vs abort signal.
+      // Centralize cleanup in `finish()` so EVERY exit path
+      // (success, fail, catch, timeout, abort) drops the timeout +
+      // unregisters the abort listener. Earlier this lived only in the
+      // success branch, which leaked the listener on fail/catch/timeout
+      // paths (review finding on PR #241).
       await new Promise<void>((resolve, reject) => {
         let settled = false;
         const finish = (err?: Error) => {
           if (settled) return;
           settled = true;
+          clearTimeout(hardTimeout);
+          signal?.removeEventListener('abort', onAbort);
           if (err) reject(err);
           else resolve();
         };
+        const onAbort = () => finish(new Error('renderUiKit aborted by signal'));
         const hardTimeout = setTimeout(
           () => finish(new Error(`renderUiKit hard timeout after ${HARD_TIMEOUT_MS}ms`)),
           HARD_TIMEOUT_MS,
         );
-        const onAbort = () => {
-          clearTimeout(hardTimeout);
-          finish(new Error('renderUiKit aborted by signal'));
-        };
         signal?.addEventListener('abort', onAbort, { once: true });
 
         wc.once('did-finish-load', () => {
           // Give fonts + CSS animations a moment to settle for visual parity
-          setTimeout(() => {
-            clearTimeout(hardTimeout);
-            signal?.removeEventListener('abort', onAbort);
-            finish();
-          }, SETTLE_AFTER_LOAD_MS);
+          setTimeout(() => finish(), SETTLE_AFTER_LOAD_MS);
         });
         wc.once('did-fail-load', () => {
-          clearTimeout(hardTimeout);
           finish(new Error('renderUiKit did-fail-load'));
         });
 
         void win.loadURL(dataUrl).catch((err: unknown) => {
-          clearTimeout(hardTimeout);
           finish(err instanceof Error ? err : new Error(String(err)));
         });
       });