Skip to content

Commit 47cf1bc

Browse files
feat(app): add MapPage with force-directed similarity graph
New /map page renders one image-thumbnail node per spec via react-force-graph-2d, positioned by tag overlap: specs that share many tags pull together, specs that share few drift apart. Similarity is computed entirely client-side as pure functions: - flattenTags() folds nested spec.tags + impl_tags into a single category-prefixed string set (e.g. plot_type:scatter), preventing collisions like "numeric" appearing in two categories. - computeIDF() weights tags by log(N / df), so ubiquitous tags like data_type:numeric get ~zero weight and rare tags carry the signal. - weightedJaccard() + buildKNNLinks() emit each spec's top-K most similar neighbors (K=5, min-sim=0.05), producing ~1.6k edges over ~327 specs — dense enough to cluster, sparse enough to avoid hairball rendering. react-force-graph-2d's built-in d3-force engine handles the layout. Thumbnails are eager-preloaded and attached to nodes as they resolve so the canvas re-paints organically without restarting physics. Hover highlights neighbors and dims the rest. Click navigates to the spec page. A visually-hidden anchor list mirrors the canvas content for screen-reader and keyboard users. Refs #5646 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent cfb73c4 commit 47cf1bc

6 files changed

Lines changed: 998 additions & 1 deletion

File tree

app/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@
2222
"@emotion/styled": "^11.14.1",
2323
"@mui/icons-material": "^9.0.0",
2424
"@mui/material": "^9.0.0",
25+
"force-graph": "^1.51.4",
2526
"fuse.js": "^7.3.0",
2627
"react": "^19.2.5",
2728
"react-dom": "^19.2.5",
29+
"react-force-graph-2d": "^1.29.1",
2830
"react-helmet-async": "^3.0.0",
2931
"react-router-dom": "^7.14.2",
3032
"react-syntax-highlighter": "^16.1.1",
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
import { describe, it, expect } from 'vitest';
2+
3+
import {
4+
flattenTags,
5+
computeIDF,
6+
weightedJaccard,
7+
buildKNNLinks,
8+
selectMapThumbUrl,
9+
type SpecMapItem,
10+
} from './MapPage.helpers';
11+
12+
13+
function spec(id: string, tags: SpecMapItem['tags'], implTags: SpecMapItem['impl_tags'] = null): SpecMapItem {
14+
return {
15+
id,
16+
title: id,
17+
preview_url_light: `https://example.com/${id}-light.png`,
18+
preview_url_dark: `https://example.com/${id}-dark.png`,
19+
quality_score: 90,
20+
tags,
21+
impl_tags: implTags,
22+
};
23+
}
24+
25+
26+
describe('flattenTags', () => {
27+
it('prefixes values with their category', () => {
28+
const s = spec('a', { plot_type: ['scatter'], features: ['basic', '2d'] });
29+
expect(flattenTags(s).sort()).toEqual(['features:2d', 'features:basic', 'plot_type:scatter']);
30+
});
31+
32+
it('merges spec.tags with impl_tags by default', () => {
33+
const s = spec('a', { plot_type: ['scatter'] }, { dependencies: ['scipy'] });
34+
expect(flattenTags(s).sort()).toEqual(['dependencies:scipy', 'plot_type:scatter']);
35+
});
36+
37+
it('skips impl_tags when includeImpl=false', () => {
38+
const s = spec('a', { plot_type: ['scatter'] }, { dependencies: ['scipy'] });
39+
expect(flattenTags(s, false)).toEqual(['plot_type:scatter']);
40+
});
41+
42+
it('handles missing dicts and empty arrays', () => {
43+
expect(flattenTags(spec('a', null, null))).toEqual([]);
44+
expect(flattenTags(spec('a', { plot_type: [] }, null))).toEqual([]);
45+
});
46+
47+
it('deduplicates identical category:value pairs', () => {
48+
const s = spec('a', { plot_type: ['scatter', 'scatter'] }, { plot_type: ['scatter'] });
49+
expect(flattenTags(s)).toEqual(['plot_type:scatter']);
50+
});
51+
});
52+
53+
54+
describe('computeIDF', () => {
55+
it('assigns log(N / df) to every tag', () => {
56+
const specs = [
57+
spec('a', { plot_type: ['scatter'] }),
58+
spec('b', { plot_type: ['scatter'] }),
59+
spec('c', { plot_type: ['line'] }),
60+
];
61+
const idf = computeIDF(specs);
62+
expect(idf.get('plot_type:scatter')).toBeCloseTo(Math.log(3 / 2));
63+
expect(idf.get('plot_type:line')).toBeCloseTo(Math.log(3 / 1));
64+
});
65+
66+
it('gives ubiquitous tags weight ~0', () => {
67+
const specs = [
68+
spec('a', { data_type: ['numeric'] }),
69+
spec('b', { data_type: ['numeric'] }),
70+
];
71+
expect(computeIDF(specs).get('data_type:numeric')).toBeCloseTo(0);
72+
});
73+
74+
it('survives empty input without dividing by zero', () => {
75+
expect(computeIDF([]).size).toBe(0);
76+
});
77+
});
78+
79+
80+
describe('weightedJaccard', () => {
81+
const idf = new Map([
82+
['plot_type:scatter', 1.0],
83+
['plot_type:line', 1.0],
84+
['features:basic', 0.5],
85+
]);
86+
87+
it('returns 1 when sets are identical', () => {
88+
expect(weightedJaccard(['plot_type:scatter'], ['plot_type:scatter'], idf)).toBeCloseTo(1);
89+
});
90+
91+
it('returns 0 when sets are disjoint', () => {
92+
expect(weightedJaccard(['plot_type:scatter'], ['plot_type:line'], idf)).toBe(0);
93+
});
94+
95+
it('weights overlap by IDF (rare overlap > common overlap)', () => {
96+
const rareIdf = new Map([['plot_type:scatter', 2], ['features:basic', 0.1]]);
97+
const sharedRare = weightedJaccard(['plot_type:scatter'], ['plot_type:scatter', 'features:basic'], rareIdf);
98+
const sharedCommon = weightedJaccard(['features:basic'], ['features:basic', 'plot_type:scatter'], rareIdf);
99+
expect(sharedRare).toBeGreaterThan(sharedCommon);
100+
});
101+
102+
it('returns 0 when either set is empty', () => {
103+
expect(weightedJaccard([], ['plot_type:scatter'], idf)).toBe(0);
104+
expect(weightedJaccard(['plot_type:scatter'], [], idf)).toBe(0);
105+
});
106+
});
107+
108+
109+
describe('buildKNNLinks', () => {
110+
it('keeps top-K neighbors above the similarity threshold', () => {
111+
const specs = [
112+
spec('scatter1', { plot_type: ['scatter'], features: ['basic'] }),
113+
spec('scatter2', { plot_type: ['scatter'], features: ['basic'] }),
114+
spec('line1', { plot_type: ['line'], features: ['basic'] }),
115+
spec('bar1', { plot_type: ['bar'] }),
116+
];
117+
const idf = computeIDF(specs);
118+
const links = buildKNNLinks(specs, idf, 2, 0.0);
119+
// scatter1 ↔ scatter2 should be linked (most similar pair)
120+
const ids = links.map(l => `${l.source}-${l.target}`).sort();
121+
expect(ids).toContain('scatter1-scatter2');
122+
});
123+
124+
it('produces undirected links (no A→B and B→A duplicate)', () => {
125+
// Need a 3-spec corpus so IDF gives non-zero weight to scatter (otherwise
126+
// a universal tag has weight 0 and no link is emitted — correct behavior).
127+
const specs = [
128+
spec('a', { plot_type: ['scatter'] }),
129+
spec('b', { plot_type: ['scatter'] }),
130+
spec('c', { plot_type: ['line'] }),
131+
];
132+
const links = buildKNNLinks(specs, computeIDF(specs), 5, 0.0);
133+
const keys = links.map(l => `${l.source}|${l.target}`);
134+
// a-b should appear exactly once, not twice
135+
expect(keys.filter(k => k === 'a|b' || k === 'b|a').length).toBe(1);
136+
});
137+
138+
it('drops links below minSim', () => {
139+
const specs = [
140+
spec('a', { plot_type: ['scatter'] }),
141+
spec('b', { plot_type: ['line'] }),
142+
];
143+
const links = buildKNNLinks(specs, computeIDF(specs), 5, 0.5);
144+
expect(links).toHaveLength(0);
145+
});
146+
147+
it('every link weight is in (0, 1]', () => {
148+
const specs = [
149+
spec('a', { plot_type: ['scatter'], features: ['basic'] }),
150+
spec('b', { plot_type: ['scatter'], features: ['regression'] }),
151+
spec('c', { plot_type: ['line'], features: ['basic'] }),
152+
];
153+
const links = buildKNNLinks(specs, computeIDF(specs), 3, 0.0);
154+
for (const l of links) {
155+
expect(l.weight).toBeGreaterThan(0);
156+
expect(l.weight).toBeLessThanOrEqual(1);
157+
}
158+
});
159+
});
160+
161+
162+
describe('selectMapThumbUrl', () => {
163+
it('returns dark URL in dark mode, light in light mode', () => {
164+
const s = spec('a', null);
165+
expect(selectMapThumbUrl(s, true)).toBe('https://example.com/a-dark.png');
166+
expect(selectMapThumbUrl(s, false)).toBe('https://example.com/a-light.png');
167+
});
168+
169+
it('falls back to the other theme when the preferred URL is missing', () => {
170+
const s: SpecMapItem = { ...spec('a', null), preview_url_dark: null };
171+
expect(selectMapThumbUrl(s, true)).toBe('https://example.com/a-light.png');
172+
});
173+
174+
it('returns null when no preview URLs at all', () => {
175+
const s: SpecMapItem = { ...spec('a', null), preview_url_light: null, preview_url_dark: null };
176+
expect(selectMapThumbUrl(s, false)).toBeNull();
177+
});
178+
});

app/src/pages/MapPage.helpers.ts

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
/**
2+
* Pure helpers for the /map page: tag flattening, IDF weighting,
3+
* weighted Jaccard similarity, and sparse KNN edge construction.
4+
*
5+
* Kept side-effect-free so the math is exhaustively unit-testable
6+
* in MapPage.helpers.test.ts. The page component imports these and
7+
* feeds the result into react-force-graph-2d.
8+
*/
9+
10+
import { selectPreviewUrl } from '../utils/themedPreview';
11+
12+
13+
/** Backend response shape from GET /api/specs/map. Mirrors api/schemas.py::SpecMapItem. */
14+
export interface SpecMapItem {
15+
id: string;
16+
title: string;
17+
preview_url_light: string | null;
18+
preview_url_dark: string | null;
19+
quality_score: number | null;
20+
tags: Record<string, string[]> | null;
21+
impl_tags: Record<string, string[]> | null;
22+
}
23+
24+
/** Node shape passed to ForceGraph2D. `img` populated lazily as thumbnails resolve. */
25+
export interface MapNode {
26+
id: string;
27+
title: string;
28+
tags: string[];
29+
thumbUrl: string | null;
30+
img?: HTMLImageElement;
31+
}
32+
33+
/** Link shape passed to ForceGraph2D. `weight` = weighted-Jaccard sim ∈ (0, 1]. */
34+
export interface MapLink {
35+
source: string;
36+
target: string;
37+
weight: number;
38+
}
39+
40+
/**
41+
* Flatten a spec's nested tag dicts to a single `category:value` string set.
42+
* Prefixing prevents collisions like `numeric` appearing in both `data_type`
43+
* and `dataprep` and gives the IDF/Jaccard math distinct tokens to weigh.
44+
*/
45+
export function flattenTags(spec: SpecMapItem, includeImpl = true): string[] {
46+
const out: string[] = [];
47+
const push = (dict: Record<string, string[]> | null | undefined) => {
48+
if (!dict) return;
49+
for (const [category, values] of Object.entries(dict)) {
50+
if (!Array.isArray(values)) continue;
51+
for (const v of values) {
52+
if (typeof v === 'string' && v.length > 0) out.push(`${category}:${v}`);
53+
}
54+
}
55+
};
56+
push(spec.tags);
57+
if (includeImpl) push(spec.impl_tags);
58+
return Array.from(new Set(out));
59+
}
60+
61+
/**
62+
* Inverse-document-frequency weights: w_t = log(N / df_t).
63+
* Down-weights ubiquitous tags (`data_type:numeric` is in nearly every spec)
64+
* and amplifies rare ones. Returns weight ≥ 0; tags absent from the corpus
65+
* default to 0 when looked up.
66+
*/
67+
export function computeIDF(specs: SpecMapItem[]): Map<string, number> {
68+
const N = specs.length || 1;
69+
const df = new Map<string, number>();
70+
for (const spec of specs) {
71+
for (const tag of flattenTags(spec)) {
72+
df.set(tag, (df.get(tag) ?? 0) + 1);
73+
}
74+
}
75+
const idf = new Map<string, number>();
76+
for (const [tag, count] of df) {
77+
idf.set(tag, Math.log(N / count));
78+
}
79+
return idf;
80+
}
81+
82+
/**
83+
* Weighted Jaccard similarity over two tag sets.
84+
* sim = Σ w_t for t∈a∩b / Σ w_t for t∈a∪b
85+
* Returns 0 when both sets are empty (no signal) or denominator collapses.
86+
*/
87+
export function weightedJaccard(a: string[], b: string[], idf: Map<string, number>): number {
88+
if (a.length === 0 || b.length === 0) return 0;
89+
const setA = new Set(a);
90+
const setB = new Set(b);
91+
let num = 0;
92+
let denom = 0;
93+
const seen = new Set<string>();
94+
for (const t of setA) {
95+
seen.add(t);
96+
const w = idf.get(t) ?? 0;
97+
denom += w;
98+
if (setB.has(t)) num += w;
99+
}
100+
for (const t of setB) {
101+
if (seen.has(t)) continue;
102+
denom += idf.get(t) ?? 0;
103+
}
104+
return denom > 0 ? num / denom : 0;
105+
}
106+
107+
/**
108+
* Build a sparse KNN link list: each spec keeps its top-K most similar
109+
* neighbors above `minSim`. Output is deduplicated (no A→B + B→A pair) and
110+
* symmetric — the link with the higher weight wins on tie.
111+
*
112+
* With ~327 specs × K=5 the result is ~1.6k edges: dense enough for
113+
* cohesive clustering, sparse enough to avoid hairball rendering.
114+
*/
115+
export function buildKNNLinks(
116+
specs: SpecMapItem[],
117+
idf: Map<string, number>,
118+
k = 5,
119+
minSim = 0.05
120+
): MapLink[] {
121+
const tagsByIdx = specs.map(s => flattenTags(s));
122+
const linkSet = new Map<string, MapLink>();
123+
for (let i = 0; i < specs.length; i++) {
124+
const sims: { j: number; sim: number }[] = [];
125+
for (let j = 0; j < specs.length; j++) {
126+
if (i === j) continue;
127+
const sim = weightedJaccard(tagsByIdx[i], tagsByIdx[j], idf);
128+
// sim > 0 drops zero-weight links (no shared tags or all-zero IDF) — pure visual noise.
129+
if (sim > 0 && sim >= minSim) sims.push({ j, sim });
130+
}
131+
sims.sort((x, y) => y.sim - x.sim);
132+
for (const { j, sim } of sims.slice(0, k)) {
133+
const a = specs[i].id;
134+
const b = specs[j].id;
135+
const key = a < b ? `${a}|${b}` : `${b}|${a}`;
136+
const existing = linkSet.get(key);
137+
if (!existing || sim > existing.weight) {
138+
linkSet.set(key, { source: a < b ? a : b, target: a < b ? b : a, weight: sim });
139+
}
140+
}
141+
}
142+
return Array.from(linkSet.values());
143+
}
144+
145+
/** Pick the best thumbnail URL for the current theme. Wraps selectPreviewUrl. */
146+
export function selectMapThumbUrl(spec: SpecMapItem, isDark: boolean): string | null {
147+
return selectPreviewUrl(spec, isDark);
148+
}
149+
150+
/**
151+
* Eager-preload every node's thumbnail. Resolves once all images either
152+
* loaded or errored — failures are swallowed (image stays undefined and
153+
* the node renders as a plain dot in nodeCanvasObject's fallback path).
154+
*
155+
* `onLoad` fires per-image so the page can call fgRef.refresh() to re-paint
156+
* without re-running the simulation. This is what produces the "thumbnails
157+
* pop in organically" UX rather than a blocking wait.
158+
*/
159+
export async function preloadImages(
160+
items: { id: string; thumbUrl: string | null }[],
161+
onLoad?: (id: string, img: HTMLImageElement) => void
162+
): Promise<Map<string, HTMLImageElement>> {
163+
const out = new Map<string, HTMLImageElement>();
164+
await Promise.all(
165+
items.map(({ id, thumbUrl }) => {
166+
if (!thumbUrl) return Promise.resolve();
167+
return new Promise<void>(resolve => {
168+
// document.createElement is preferred over `new Image()` here only because
169+
// some lint configs don't surface browser globals on plain .ts files.
170+
const img = document.createElement('img');
171+
img.crossOrigin = 'anonymous';
172+
img.onload = () => {
173+
out.set(id, img);
174+
onLoad?.(id, img);
175+
resolve();
176+
};
177+
img.onerror = () => resolve();
178+
img.src = thumbUrl;
179+
});
180+
})
181+
);
182+
return out;
183+
}

0 commit comments

Comments
 (0)