|
| 1 | +import { describe, expect, it } from 'vitest' |
| 2 | +import { pushHammingHeap, pushHeap, sortHeap } from '../src/search/heap.js' |
| 3 | + |
| 4 | +/** |
| 5 | + * Run a sequence of candidates through a bounded heap and return the kept |
| 6 | + * rowIndices, sorted ascending for comparison. |
| 7 | + * @param {{ rowIndex: number, hamming: number }[]} candidates |
| 8 | + * @param {number} k |
| 9 | + * @returns {number[]} |
| 10 | + */ |
| 11 | +function keepHamming(candidates, k) { |
| 12 | + /** @type {{ rowIndex: number, hamming: number }[]} */ |
| 13 | + const heap = [] |
| 14 | + for (const c of candidates) pushHammingHeap(heap, c, k) |
| 15 | + return heap.map(e => e.rowIndex).sort((a, b) => a - b) |
| 16 | +} |
| 17 | + |
| 18 | +describe('heap tie-breaking is deterministic', () => { |
| 19 | + it('pushHammingHeap keeps the lowest rowIndices when hamming ties, regardless of insertion order', () => { |
| 20 | + // Five rows all at the same hamming distance; keep 3 -> must keep rows 0,1,2. |
| 21 | + const rows = [0, 1, 2, 3, 4].map(rowIndex => ({ rowIndex, hamming: 7 })) |
| 22 | + const forward = keepHamming(rows, 3) |
| 23 | + const reversed = keepHamming([...rows].reverse(), 3) |
| 24 | + const shuffled = keepHamming([rows[3], rows[0], rows[4], rows[2], rows[1]], 3) |
| 25 | + expect(forward).toEqual([0, 1, 2]) |
| 26 | + expect(reversed).toEqual([0, 1, 2]) |
| 27 | + expect(shuffled).toEqual([0, 1, 2]) |
| 28 | + }) |
| 29 | + |
| 30 | + it('pushHammingHeap prefers strictly nearer candidates over ties', () => { |
| 31 | + const rows = [ |
| 32 | + { rowIndex: 5, hamming: 2 }, |
| 33 | + { rowIndex: 9, hamming: 9 }, |
| 34 | + { rowIndex: 1, hamming: 9 }, |
| 35 | + { rowIndex: 8, hamming: 1 }, |
| 36 | + ] |
| 37 | + // keep 2 -> the two nearest by hamming (1 then 2): rows 8 and 5. |
| 38 | + expect(keepHamming(rows, 2)).toEqual([5, 8]) |
| 39 | + expect(keepHamming([...rows].reverse(), 2)).toEqual([5, 8]) |
| 40 | + }) |
| 41 | + |
| 42 | + it('pushHeap keeps best score, breaking ties by lower rowIndex (order-independent)', () => { |
| 43 | + // cosine: higher score is better. Tied at 0.9 -> keep lower rowIndices. |
| 44 | + const rows = [0, 1, 2, 3].map(rowIndex => ({ rowIndex, score: 0.9 })) |
| 45 | + /** @type {{ rowIndex: number, score: number }[]} */ |
| 46 | + const a = [] |
| 47 | + for (const c of rows) pushHeap(a, c, 2, 'cosine') |
| 48 | + /** @type {{ rowIndex: number, score: number }[]} */ |
| 49 | + const b = [] |
| 50 | + for (const c of [...rows].reverse()) pushHeap(b, c, 2, 'cosine') |
| 51 | + expect(a.map(e => e.rowIndex).sort((x, y) => x - y)).toEqual([0, 1]) |
| 52 | + expect(b.map(e => e.rowIndex).sort((x, y) => x - y)).toEqual([0, 1]) |
| 53 | + }) |
| 54 | + |
| 55 | + it('sortHeap orders tied scores by ascending rowIndex', () => { |
| 56 | + const results = [ |
| 57 | + { rowIndex: 4, score: 0.5 }, |
| 58 | + { rowIndex: 1, score: 0.9 }, |
| 59 | + { rowIndex: 7, score: 0.9 }, |
| 60 | + { rowIndex: 2, score: 0.9 }, |
| 61 | + ] |
| 62 | + expect(sortHeap(results, 'cosine').map(e => e.rowIndex)).toEqual([1, 2, 7, 4]) |
| 63 | + // euclidean: lower score is better, ties still ascend by rowIndex. |
| 64 | + expect(sortHeap(results, 'euclidean').map(e => e.rowIndex)).toEqual([4, 1, 2, 7]) |
| 65 | + }) |
| 66 | +}) |
0 commit comments