Skip to content

Commit 070aa41

Browse files
committed
Fuzzy filtering
1 parent e43d869 commit 070aa41

File tree

3 files changed

+486
-23
lines changed

3 files changed

+486
-23
lines changed

src/utils/filtering.bench.ts

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
import { bench, describe } from "vitest";
2+
import { readFileSync } from "node:fs";
3+
import { join } from "node:path";
4+
import { parseSchemas, type SchemasJson } from "../data/schemas";
5+
import { allDeclarations } from "../data/derived";
6+
import { parseSearch, searchDeclarations, fuzzyScore } from "./filtering";
7+
8+
function loadSchema(name: string) {
9+
const path = join(__dirname, "../../schemas", `${name}.json`);
10+
const data = JSON.parse(readFileSync(path, "utf8")) as SchemasJson;
11+
return parseSchemas(data);
12+
}
13+
14+
const cs2All = [...allDeclarations(loadSchema("cs2").declarations)];
15+
const dota2All = [...allDeclarations(loadSchema("dota2").declarations)];
16+
const deadlockAll = [...allDeclarations(loadSchema("deadlock").declarations)];
17+
18+
// Pre-parse queries outside bench callbacks to measure searchDeclarations only
19+
const queries = {
20+
cbaseentity: parseSearch("CBaseEntity"),
21+
cbase: parseSearch("CBase"),
22+
weapon: parseSearch("weapon"),
23+
cbe: parseSearch("CBE"),
24+
cswb: parseSearch("cswb"),
25+
baseEntity: parseSearch("base entity"),
26+
flRadius: parseSearch("m_flRadius"),
27+
weaponClient: parseSearch("weapon module:client"),
28+
initfromsnapshot: parseSearch("initfromsnapshot"),
29+
};
30+
31+
describe("fuzzyScore", () => {
32+
bench("exact match", () => {
33+
fuzzyScore("cbaseentity", "CBaseEntity");
34+
});
35+
36+
bench("prefix match", () => {
37+
fuzzyScore("cbase", "CBaseEntity");
38+
});
39+
40+
bench("substring match", () => {
41+
fuzzyScore("entity", "CBaseEntity");
42+
});
43+
44+
bench("fuzzy boundary match (CBE)", () => {
45+
fuzzyScore("cbe", "CBaseEntity");
46+
});
47+
48+
bench("fuzzy long pattern (initfromsnapshot)", () => {
49+
fuzzyScore("initfromsnapshot", "C_INIT_InitFromCPSnapshot");
50+
});
51+
52+
bench("no match (null)", () => {
53+
fuzzyScore("xyz", "CBaseEntity");
54+
});
55+
56+
bench("no match long target", () => {
57+
fuzzyScore("xyz", "C_DOTA_Ability_Special_Bonus_Unique_Hoodwink_SharpshooterPierceHeroes");
58+
});
59+
});
60+
61+
describe("searchDeclarations — CS2", () => {
62+
bench("exact: CBaseEntity", () => {
63+
searchDeclarations(cs2All, queries.cbaseentity);
64+
});
65+
66+
bench("prefix: CBase", () => {
67+
searchDeclarations(cs2All, queries.cbase);
68+
});
69+
70+
bench("substring: weapon", () => {
71+
searchDeclarations(cs2All, queries.weapon);
72+
});
73+
74+
bench("fuzzy: CBE", () => {
75+
searchDeclarations(cs2All, queries.cbe);
76+
});
77+
78+
bench("fuzzy: cswb", () => {
79+
searchDeclarations(cs2All, queries.cswb);
80+
});
81+
82+
bench("multi-word: base entity", () => {
83+
searchDeclarations(cs2All, queries.baseEntity);
84+
});
85+
86+
bench("field: m_flRadius", () => {
87+
searchDeclarations(cs2All, queries.flRadius);
88+
});
89+
90+
bench("combined: weapon module:client", () => {
91+
searchDeclarations(cs2All, queries.weaponClient);
92+
});
93+
});
94+
95+
describe("searchDeclarations — Dota2 (largest)", () => {
96+
bench("exact: CBaseEntity", () => {
97+
searchDeclarations(dota2All, queries.cbaseentity);
98+
});
99+
100+
bench("prefix: CBase", () => {
101+
searchDeclarations(dota2All, queries.cbase);
102+
});
103+
104+
bench("substring: weapon", () => {
105+
searchDeclarations(dota2All, queries.weapon);
106+
});
107+
108+
bench("fuzzy: CBE", () => {
109+
searchDeclarations(dota2All, queries.cbe);
110+
});
111+
112+
bench("fuzzy: initfromsnapshot", () => {
113+
searchDeclarations(dota2All, queries.initfromsnapshot);
114+
});
115+
116+
bench("multi-word: base entity", () => {
117+
searchDeclarations(dota2All, queries.baseEntity);
118+
});
119+
120+
bench("field: m_flRadius", () => {
121+
searchDeclarations(dota2All, queries.flRadius);
122+
});
123+
124+
bench("short fuzzy: cbe (worst case)", () => {
125+
searchDeclarations(dota2All, queries.cbe);
126+
});
127+
});
128+
129+
describe("searchDeclarations — Deadlock", () => {
130+
bench("exact: CBaseEntity", () => {
131+
searchDeclarations(deadlockAll, queries.cbaseentity);
132+
});
133+
134+
bench("fuzzy: CBE", () => {
135+
searchDeclarations(deadlockAll, queries.cbe);
136+
});
137+
138+
bench("fuzzy: initfromsnapshot", () => {
139+
searchDeclarations(deadlockAll, queries.initfromsnapshot);
140+
});
141+
142+
bench("multi-word: base entity", () => {
143+
searchDeclarations(deadlockAll, queries.baseEntity);
144+
});
145+
});

src/utils/filtering.test.ts

Lines changed: 168 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {
77
matchesMetadataKeys,
88
matchesMetadataValues,
99
searchDeclarations,
10+
fuzzyScore,
1011
EMPTY_PARSED,
1112
} from "./filtering";
1213
import type { SchemaClass, SchemaEnum } from "../data/types";
@@ -1247,18 +1248,18 @@ describe("search result ranking", () => {
12471248
expect(lastEffectIdx).toBeLessThan(firstFieldOnlyIdx);
12481249
});
12491250

1250-
it("alphabetical within same tier (sphere)", () => {
1251+
it("substring position affects ranking (sphere)", () => {
12511252
const result = searchDeclarations(declarations, parseSearch("sphere"));
12521253
const names = result.map((d) => d.name);
1253-
// All are tier 2 (substring match), alphabetical by name then module
1254+
// Sorted by substring position (earlier = better), then alphabetical
12541255
expect(names).toEqual([
1255-
"CAnimationGraphVisualizerSphere",
1256+
"CastSphereSATParams_t",
12561257
"CNavVolumeSphere",
1257-
"CSoundAreaEntitySphere",
12581258
"CSoundEventSphereEntity",
1259-
"C_SoundAreaEntitySphere",
12601259
"C_SoundEventSphereEntity",
1261-
"CastSphereSATParams_t",
1260+
"CSoundAreaEntitySphere",
1261+
"C_SoundAreaEntitySphere",
1262+
"CAnimationGraphVisualizerSphere",
12621263
]);
12631264
});
12641265

@@ -1300,12 +1301,12 @@ describe("search result ranking", () => {
13001301
it("multi-word search", () => {
13011302
const result = searchDeclarations(declarations, parseSearch("sound sphere"));
13021303
const names = result.map((d) => d.name);
1303-
// Only declarations matching both words
1304+
// Only declarations matching both words, scored by combined substring positions
13041305
expect(names).toEqual([
1305-
"CSoundAreaEntitySphere",
13061306
"CSoundEventSphereEntity",
1307-
"C_SoundAreaEntitySphere",
13081307
"C_SoundEventSphereEntity",
1308+
"CSoundAreaEntitySphere",
1309+
"C_SoundAreaEntitySphere",
13091310
]);
13101311
});
13111312

@@ -2846,3 +2847,161 @@ describe("boundary and edge cases", () => {
28462847
expect(result.every((d) => d.name !== "CEnvSoundscape")).toBe(true);
28472848
});
28482849
});
2850+
2851+
// -- fuzzyScore --
2852+
2853+
describe("fuzzyScore", () => {
2854+
it("returns 0 for exact match", () => {
2855+
expect(fuzzyScore("cbaseentity", "CBaseEntity")).toBe(0);
2856+
});
2857+
2858+
it("returns 0 for exact match same length", () => {
2859+
expect(fuzzyScore("abc", "abc")).toBe(0);
2860+
});
2861+
2862+
it("returns prefix score for prefix match", () => {
2863+
const score = fuzzyScore("cbase", "CBaseEntity")!;
2864+
expect(score).toBeGreaterThanOrEqual(100);
2865+
expect(score).toBeLessThan(200);
2866+
});
2867+
2868+
it("shorter target ranks higher for prefix", () => {
2869+
const short = fuzzyScore("cbase", "CBaseEnt")!;
2870+
const long = fuzzyScore("cbase", "CBaseEntity")!;
2871+
expect(short).toBeLessThan(long);
2872+
});
2873+
2874+
it("returns substring score for contiguous substring", () => {
2875+
const score = fuzzyScore("entity", "CBaseEntity")!;
2876+
expect(score).toBeGreaterThanOrEqual(200);
2877+
expect(score).toBeLessThan(1000);
2878+
});
2879+
2880+
it("earlier substring position scores better", () => {
2881+
const early = fuzzyScore("base", "CBaseEntity")!; // index 1
2882+
const late = fuzzyScore("base", "SomeClassBase")!; // index 9
2883+
expect(early).toBeLessThan(late);
2884+
});
2885+
2886+
it("returns null for no match", () => {
2887+
expect(fuzzyScore("xyz", "CBaseEntity")).toBeNull();
2888+
});
2889+
2890+
it("returns null for pattern longer than target", () => {
2891+
expect(fuzzyScore("cbaseentitylong", "CBase")).toBeNull();
2892+
});
2893+
2894+
it("returns fuzzy score for non-contiguous match", () => {
2895+
const score = fuzzyScore("cbe", "CBaseEntity")!;
2896+
expect(score).toBeGreaterThanOrEqual(1000);
2897+
expect(score).toBeLessThan(5000);
2898+
});
2899+
2900+
it("does not fuzzy-match 1-char patterns", () => {
2901+
// 'c' exists in 'Base' but single chars are substring-only
2902+
expect(fuzzyScore("c", "Base")).toBeNull();
2903+
});
2904+
2905+
it("does not fuzzy-match 2-char patterns", () => {
2906+
expect(fuzzyScore("cb", "CxxxxxByyy")).toBeNull();
2907+
// But substring still works
2908+
expect(fuzzyScore("cb", "xcby")).toBe(201);
2909+
});
2910+
2911+
it("fuzzy matches 3+ char patterns", () => {
2912+
expect(fuzzyScore("cbe", "CBaseEntity")).not.toBeNull();
2913+
});
2914+
2915+
it("boundary matches score better than scattered", () => {
2916+
// CBE -> CBaseEntity (all boundary hits: C, B, E)
2917+
const boundary = fuzzyScore("cbe", "CBaseEntity")!;
2918+
// cbe -> xCxxxBxxxxxExx (scattered)
2919+
const scattered = fuzzyScore("cbe", "xCxxxBxxxxxExx")!;
2920+
expect(boundary).toBeLessThan(scattered);
2921+
});
2922+
2923+
it("exact always beats prefix", () => {
2924+
const exact = fuzzyScore("cbase", "CBase")!;
2925+
const prefix = fuzzyScore("cbase", "CBaseEntity")!;
2926+
expect(exact).toBeLessThan(prefix);
2927+
});
2928+
2929+
it("prefix always beats substring", () => {
2930+
const prefix = fuzzyScore("base", "BaseEntity")!;
2931+
const substr = fuzzyScore("base", "CBaseEntity")!;
2932+
expect(prefix).toBeLessThan(substr);
2933+
});
2934+
2935+
it("substring always beats fuzzy", () => {
2936+
const substr = fuzzyScore("base", "CBaseEntity")!;
2937+
const fuz = fuzzyScore("bse", "CBaseEntity")!;
2938+
expect(substr).toBeLessThan(fuz);
2939+
});
2940+
2941+
it("matches camelCase boundaries", () => {
2942+
// "cswb" -> C_CSWeaponBase (C, S, W, B at boundaries)
2943+
const score = fuzzyScore("cswb", "C_CSWeaponBase");
2944+
expect(score).not.toBeNull();
2945+
expect(score!).toBeGreaterThanOrEqual(1000);
2946+
});
2947+
2948+
it("handles m_ prefix naturally", () => {
2949+
// "fl" is a substring of m_flFoo
2950+
const score = fuzzyScore("fl", "m_flFalloff");
2951+
expect(score).not.toBeNull();
2952+
expect(score!).toBeGreaterThanOrEqual(200);
2953+
expect(score!).toBeLessThan(1000);
2954+
});
2955+
2956+
it("handles initfromsnapshot pattern", () => {
2957+
const score = fuzzyScore("initfromsnapshot", "C_INIT_InitFromCPSnapshot");
2958+
expect(score).not.toBeNull();
2959+
expect(score!).toBeGreaterThanOrEqual(1000);
2960+
});
2961+
2962+
it("case-insensitive matching", () => {
2963+
expect(fuzzyScore("cbase", "CBASE")).toBe(0);
2964+
expect(fuzzyScore("cbase", "cbase")).toBe(0);
2965+
});
2966+
2967+
it("empty pattern returns 0", () => {
2968+
expect(fuzzyScore("", "anything")).toBe(0);
2969+
});
2970+
});
2971+
2972+
// -- fuzzy search integration --
2973+
2974+
describe("fuzzy search integration", () => {
2975+
it("fuzzy query finds declarations with non-contiguous match", () => {
2976+
const result = searchDeclarations(declarations, parseSearch("cnvsph"));
2977+
const names = result.map((d) => d.name);
2978+
expect(names).toContain("CNavVolumeSphere");
2979+
});
2980+
2981+
it("exact match ranks above fuzzy match", () => {
2982+
const result = searchDeclarations(declarations, parseSearch("CEffectData"));
2983+
expect(result[0].name).toBe("CEffectData");
2984+
});
2985+
2986+
it("prefix ranks above substring which ranks above fuzzy", () => {
2987+
const result = searchDeclarations(declarations, parseSearch("CFilter"));
2988+
const names = result.map((d) => d.name);
2989+
// Both CFilterEnemy and CFilterProximity are prefix matches, shorter name scores better
2990+
const enemyIdx = names.indexOf("CFilterEnemy");
2991+
const proxIdx = names.indexOf("CFilterProximity");
2992+
expect(enemyIdx).toBeLessThan(proxIdx);
2993+
// Fuzzy match (C_OP_RemapTransformVisibilityToVector) ranks after prefix matches
2994+
const fuzzyIdx = names.indexOf("C_OP_RemapTransformVisibilityToVector");
2995+
if (fuzzyIdx >= 0) {
2996+
expect(proxIdx).toBeLessThan(fuzzyIdx);
2997+
}
2998+
});
2999+
3000+
it("two-char query uses substring only, no fuzzy", () => {
3001+
// "cb" with 2 chars: fuzzyScore returns null for non-substring matches
3002+
// But field-level substring matching can still find "cb" in field names like m_CBodyComponent
3003+
const result = searchDeclarations(declarations, parseSearch("cb"));
3004+
// All results should have "cb" somewhere — in declaration name or in a field/member name
3005+
expect(result.length).toBeGreaterThan(0);
3006+
});
3007+
});

0 commit comments

Comments
 (0)