Skip to content

Commit 4fa73c3

Browse files
authored
Merge pull request #155 from DenisValeev/codex/clarify-synthetic-64d-visualization-method
Highlight repeated tiles in embedding explorer
2 parents dd4b303 + cdcb4c1 commit 4fa73c3

1 file changed

Lines changed: 104 additions & 0 deletions

File tree

apps/embedding-explorer/app.js

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,78 @@
225225
return Math.sqrt(sum);
226226
}
227227

228+
function detectVectorTiling(values) {
229+
if (!(values instanceof Float32Array)) {
230+
return null;
231+
}
232+
233+
const length = values.length;
234+
if (length <= 1) {
235+
return null;
236+
}
237+
238+
for (let tileSize = 1; tileSize <= Math.floor(length / 2); tileSize += 1) {
239+
if (length % tileSize !== 0) {
240+
continue;
241+
}
242+
243+
let isRepeated = true;
244+
for (let index = tileSize; index < length; index += 1) {
245+
const compareIndex = index % tileSize;
246+
if (values[index] !== values[compareIndex]) {
247+
isRepeated = false;
248+
break;
249+
}
250+
}
251+
252+
if (isRepeated) {
253+
const repeatCount = length / tileSize;
254+
if (repeatCount > 1) {
255+
return { tileSize, repeatCount };
256+
}
257+
}
258+
}
259+
260+
return null;
261+
}
262+
263+
function analyzeVector(values) {
264+
if (!(values instanceof Float32Array) || !values.length) {
265+
return null;
266+
}
267+
268+
const uniqueValues = new Set();
269+
let min = Number.POSITIVE_INFINITY;
270+
let max = Number.NEGATIVE_INFINITY;
271+
let finiteCount = 0;
272+
273+
for (let index = 0; index < values.length; index += 1) {
274+
const value = values[index];
275+
if (!Number.isFinite(value)) {
276+
continue;
277+
}
278+
finiteCount += 1;
279+
uniqueValues.add(value);
280+
if (value < min) {
281+
min = value;
282+
}
283+
if (value > max) {
284+
max = value;
285+
}
286+
}
287+
288+
const range = Number.isFinite(min) && Number.isFinite(max) ? { min, max } : null;
289+
const tiling = detectVectorTiling(values);
290+
291+
return {
292+
length: values.length,
293+
finiteCount,
294+
uniqueCount: uniqueValues.size,
295+
range,
296+
tiling,
297+
};
298+
}
299+
228300
function getVisualizationConfig(mode) {
229301
if (typeof mode !== 'string') {
230302
return visualizationModes[DEFAULT_VISUALIZATION_MODE];
@@ -647,6 +719,7 @@
647719

648720
ensureVectorData(record);
649721
const bytes = record.vectorBytes instanceof Uint8Array ? record.vectorBytes : new Uint8Array();
722+
const analysis = analyzeVector(record.floatVector);
650723
let renderResult = null;
651724

652725
if (visualizationMode === 'diverging') {
@@ -697,6 +770,37 @@
697770
{ label: 'Updated', value: record.updatedAt ? formatDate(record.updatedAt) : '—' },
698771
];
699772

773+
if (analysis) {
774+
if (analysis.range) {
775+
const minLabel = analysis.range.min.toFixed(3);
776+
const maxLabel = analysis.range.max.toFixed(3);
777+
metaItems.push({ label: 'Value range', value: `${minLabel}${maxLabel}` });
778+
}
779+
780+
if (analysis.length) {
781+
const uniqueShare = analysis.length ? Math.round((analysis.uniqueCount / analysis.length) * 100) : 0;
782+
const finiteLabel = analysis.finiteCount === analysis.length
783+
? 'All values finite'
784+
: `${analysis.finiteCount.toLocaleString()} finite`;
785+
metaItems.push({
786+
label: 'Unique values',
787+
value: `${analysis.uniqueCount.toLocaleString()} (${uniqueShare}% distinct)`,
788+
title: finiteLabel,
789+
});
790+
}
791+
792+
if (analysis.tiling) {
793+
const { tileSize, repeatCount } = analysis.tiling;
794+
const repeatLabel = tileSize === 1
795+
? `Single value repeated ${repeatCount.toLocaleString()}×`
796+
: `${tileSize.toLocaleString()}D tile repeated ${repeatCount.toLocaleString()}×`;
797+
const repeatTitle = tileSize === 1
798+
? 'Every dimension shares the same value.'
799+
: `The leading ${tileSize.toLocaleString()} dimensions repeat to fill the vector.`;
800+
metaItems.push({ label: 'Detected tiling', value: repeatLabel, title: repeatTitle });
801+
}
802+
}
803+
700804
const extraMeta = Array.isArray(options.extraMeta) ? options.extraMeta.filter((item) => item && item.label) : [];
701805
const combined = [...metaItems];
702806

0 commit comments

Comments
 (0)