Skip to content

Commit e0eccbd

Browse files
committed
fix(lib): guard Unlimiformer attentionScore against non-finite embeddings
Return NaN when vectors contain NaN, Infinity, or non-numbers instead of throwing from cosineSimilarity. Skip non-finite query and item embeddings in rankItemsByAttentionTopK. Add tests.
1 parent 33f47db commit e0eccbd

File tree

2 files changed

+58
-4
lines changed

2 files changed

+58
-4
lines changed

packages/lib/unlimiformer.test.ts

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { describe, expect, test } from "bun:test"
22
import {
3+
attentionScore,
34
attentionScores,
45
rankItemsByAttentionTopK,
56
topKAttentionKeys,
@@ -60,6 +61,28 @@ describe("topKAttentionKeys", () => {
6061
),
6162
).toEqual([])
6263
})
64+
65+
test("skips keys with NaN components without throwing", () => {
66+
const q = unit(1, 0, 0)
67+
const keys = [unit(1, 0, 0), [1, Number.NaN, 0], unit(0, 1, 0)]
68+
const top = topKAttentionKeys(q, keys, 5)
69+
expect(top.map((t) => t.index)).toEqual([0, 2])
70+
})
71+
})
72+
73+
describe("attentionScore", () => {
74+
test("returns NaN instead of throwing when a vector contains NaN", () => {
75+
const k = unit(1, 0, 0)
76+
expect(attentionScore([1, Number.NaN, 0], k)).toBeNaN()
77+
expect(attentionScore(k, [1, Number.NaN, 0])).toBeNaN()
78+
})
79+
80+
test("returns NaN for non-number or non-finite components", () => {
81+
const k = unit(1, 0, 0)
82+
const stringSlot = [1, "x", 0] as unknown as number[]
83+
expect(attentionScore(stringSlot, k)).toBeNaN()
84+
expect(attentionScore([Number.POSITIVE_INFINITY, 0, 0], k)).toBeNaN()
85+
})
6386
})
6487

6588
describe("attentionScores", () => {
@@ -113,4 +136,22 @@ describe("rankItemsByAttentionTopK", () => {
113136
expect(ranked).toHaveLength(1)
114137
expect(ranked[0]?.item.id).toBe("ok")
115138
})
139+
140+
test("returns empty when query embedding is non-finite", () => {
141+
const items = [{ id: "a", e: unit(1, 0, 0) }]
142+
expect(
143+
rankItemsByAttentionTopK([Number.NaN, 0, 0], items, (x) => x.e, 2),
144+
).toEqual([])
145+
})
146+
147+
test("skips items with non-finite embeddings", () => {
148+
const items = [
149+
{ id: "bad", e: [1, Number.NaN, 0] },
150+
{ id: "ok", e: unit(0, 1, 0) },
151+
]
152+
const q = unit(0, 1, 0)
153+
const ranked = rankItemsByAttentionTopK(q, items, (x) => x.e, 2)
154+
expect(ranked).toHaveLength(1)
155+
expect(ranked[0]?.item.id).toBe("ok")
156+
})
116157
})

packages/lib/unlimiformer.ts

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99

1010
import { cosineSimilarity } from "./similarity"
1111

12+
/** True when every entry is a finite number (empty arrays allowed). */
13+
const isFiniteEmbeddingVector = (v: number[]): boolean =>
14+
v.every((x) => typeof x === "number" && Number.isFinite(x))
15+
1216
export type AttentionTopK = {
1317
index: number
1418
score: number
@@ -19,19 +23,23 @@ export type AttentionTopK = {
1923
* For normalized embeddings this matches cosine similarity.
2024
*
2125
* Returns `NaN` when `query` and `key` have different lengths (e.g. mixed embedding
22-
* models) so callers can avoid throwing from `cosineSimilarity`.
26+
* models), or when either vector contains non-finite values (`NaN`, `±Infinity`), so
27+
* callers avoid throwing from `cosineSimilarity`.
2328
*/
2429
export const attentionScore = (query: number[], key: number[]): number => {
2530
if (query.length !== key.length) {
2631
return Number.NaN
2732
}
33+
if (!isFiniteEmbeddingVector(query) || !isFiniteEmbeddingVector(key)) {
34+
return Number.NaN
35+
}
2836
return cosineSimilarity(query, key)
2937
}
3038

3139
/**
3240
* Attention scores for `query` against every row in `keys`, aligned by index.
33-
* Entries are `NaN` when a key length does not match the query (same embedding model
34-
* is required for a meaningful score).
41+
* Entries are `NaN` when a key length does not match the query, or when either vector
42+
* has non-finite components.
3543
*/
3644
export const attentionScores = (query: number[], keys: number[][]): number[] =>
3745
keys.map((key) => attentionScore(query, key))
@@ -97,6 +105,10 @@ export const rankItemsByAttentionTopK = <T>(
97105
return []
98106
}
99107

108+
if (!isFiniteEmbeddingVector(queryEmbedding)) {
109+
return []
110+
}
111+
100112
const packed: Array<{ item: T; originalIndex: number; embedding: number[] }> =
101113
[]
102114

@@ -107,7 +119,8 @@ export const rankItemsByAttentionTopK = <T>(
107119
if (
108120
embedding &&
109121
embedding.length > 0 &&
110-
embedding.length === queryEmbedding.length
122+
embedding.length === queryEmbedding.length &&
123+
isFiniteEmbeddingVector(embedding)
111124
) {
112125
packed.push({ item, originalIndex: i, embedding })
113126
}

0 commit comments

Comments
 (0)