|
| 1 | +# Substrate-distance code retrieval — content-addressed code search. |
| 2 | + |
| 3 | +fn assert_eq(actual, expected, msg) { |
| 4 | + if actual != expected { |
| 5 | + test_record_failure(msg + ": expected " + to_string(expected) + " got " + to_string(actual)); |
| 6 | + } |
| 7 | +} |
| 8 | + |
| 9 | +fn assert_true(cond, msg) { if !cond { test_record_failure(msg); } } |
| 10 | + |
| 11 | +# Distance 0 for exact alpha-match |
| 12 | +fn test_alpha_equivalent_matches_first() { |
| 13 | + h corpus = [ |
| 14 | + "fn unrelated() { return 99; }", |
| 15 | + "fn f(a) { return a + 1; }", |
| 16 | + "fn another() { return 42; }", |
| 17 | + ]; |
| 18 | + h ranked = omc_find_similar("fn f(x) { return x + 1; }", corpus); |
| 19 | + h first = arr_get(ranked, 0); |
| 20 | + assert_eq(dict_get(first, "index"), 1, "alpha-equivalent at index 1"); |
| 21 | + assert_eq(dict_get(first, "distance"), 0, "distance 0"); |
| 22 | +} |
| 23 | + |
| 24 | +# Top-k limit respected |
| 25 | +fn test_top_k_limit() { |
| 26 | + h corpus = [ |
| 27 | + "fn a() { return 1; }", |
| 28 | + "fn b() { return 2; }", |
| 29 | + "fn c() { return 3; }", |
| 30 | + "fn d() { return 4; }", |
| 31 | + "fn e() { return 5; }", |
| 32 | + ]; |
| 33 | + h ranked = omc_find_similar("fn a() { return 1; }", corpus, 3); |
| 34 | + assert_eq(arr_len(ranked), 3, "top-3 respected"); |
| 35 | +} |
| 36 | + |
| 37 | +# All results when no top_k |
| 38 | +fn test_full_ranking() { |
| 39 | + h corpus = [ |
| 40 | + "fn a() { return 1; }", |
| 41 | + "fn b() { return 2; }", |
| 42 | + "fn c() { return 3; }", |
| 43 | + ]; |
| 44 | + h ranked = omc_find_similar("fn a() { return 1; }", corpus); |
| 45 | + assert_eq(arr_len(ranked), 3, "full ranking"); |
| 46 | +} |
| 47 | + |
| 48 | +# Ranking is ascending distance |
| 49 | +fn test_ascending_distance() { |
| 50 | + h corpus = [ |
| 51 | + "fn similar(x) { return x + 1; }", |
| 52 | + "fn totally_different() { return arr_softmax(arr_neg([1.0, 2.0, 3.0])); }", |
| 53 | + "fn close(y) { return y + 1; }", |
| 54 | + ]; |
| 55 | + h ranked = omc_find_similar("fn similar(x) { return x + 1; }", corpus); |
| 56 | + h r0 = arr_get(ranked, 0); |
| 57 | + h r1 = arr_get(ranked, 1); |
| 58 | + h r2 = arr_get(ranked, 2); |
| 59 | + assert_true(dict_get(r0, "distance") <= dict_get(r1, "distance"), "0 ≤ 1"); |
| 60 | + assert_true(dict_get(r1, "distance") <= dict_get(r2, "distance"), "1 ≤ 2"); |
| 61 | +} |
| 62 | + |
| 63 | +# Empty corpus is fine |
| 64 | +fn test_empty_corpus() { |
| 65 | + h ranked = omc_find_similar("fn f() {}", []); |
| 66 | + assert_eq(arr_len(ranked), 0, "empty"); |
| 67 | +} |
| 68 | + |
| 69 | +# Singleton corpus |
| 70 | +fn test_singleton_corpus() { |
| 71 | + h ranked = omc_find_similar("fn f() {}", ["fn g() { return 1; }"]); |
| 72 | + assert_eq(arr_len(ranked), 1, "one entry"); |
| 73 | + h r = arr_get(ranked, 0); |
| 74 | + assert_eq(dict_get(r, "index"), 0, "index 0"); |
| 75 | +} |
| 76 | + |
| 77 | +# Self-match in corpus is distance 0 |
| 78 | +fn test_self_match() { |
| 79 | + h q = "fn loss(p, t) { return (p - t) * (p - t); }"; |
| 80 | + h ranked = omc_find_similar(q, [q]); |
| 81 | + h r = arr_get(ranked, 0); |
| 82 | + assert_eq(dict_get(r, "distance"), 0, "self → 0"); |
| 83 | +} |
| 84 | + |
| 85 | +# Rename-only match in corpus → distance 0 |
| 86 | +fn test_rename_match() { |
| 87 | + h q = "fn loss(p, t) { return (p - t) * (p - t); }"; |
| 88 | + h renamed = "fn loss(pred, target) { return (pred - target) * (pred - target); }"; |
| 89 | + h ranked = omc_find_similar(q, [renamed]); |
| 90 | + assert_eq(dict_get(arr_get(ranked, 0), "distance"), 0, "rename → 0"); |
| 91 | +} |
0 commit comments