Skip to content

Commit 55dbed4

Browse files
authored
Merge branch 'main' into AgentMemory/v04-pr-k2a1-kl-integration-8e7f
2 parents c1c1478 + e2db26c commit 55dbed4

18 files changed

Lines changed: 2795 additions & 0 deletions
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
{
2+
"schema_version": 1,
3+
"kind": "k1d_dlm_restored_verifier_smoke",
4+
"model": "google/gemma-3-1b-it",
5+
"device": "mps",
6+
"dtype": "torch.bfloat16",
7+
"seq_len": 256,
8+
"configs": [
9+
{
10+
"name": "oracle_full_attention",
11+
"shape": [
12+
1,
13+
256,
14+
262144
15+
],
16+
"last_token_norm": 1232.609130859375,
17+
"last_token_argmax": 52564,
18+
"last_token_max": 10.875,
19+
"last_token_min": -9.6875,
20+
"any_nan": false,
21+
"any_inf": false,
22+
"elapsed_s": 0.3479745000367984
23+
},
24+
{
25+
"name": "v04_sink_4_window_64",
26+
"shape": [
27+
1,
28+
256,
29+
262144
30+
],
31+
"last_token_norm": 1232.609130859375,
32+
"last_token_argmax": 52564,
33+
"last_token_max": 10.875,
34+
"last_token_min": -9.6875,
35+
"any_nan": false,
36+
"any_inf": false,
37+
"elapsed_s": 0.5252928750123829,
38+
"kl_vs_oracle": 0.0,
39+
"argmax_matches_oracle": true
40+
},
41+
{
42+
"name": "v04_no_eviction",
43+
"shape": [
44+
1,
45+
256,
46+
262144
47+
],
48+
"last_token_norm": 1232.609130859375,
49+
"last_token_argmax": 52564,
50+
"last_token_max": 10.875,
51+
"last_token_min": -9.6875,
52+
"any_nan": false,
53+
"any_inf": false,
54+
"elapsed_s": 0.4693464580923319,
55+
"kl_vs_oracle": 0.0,
56+
"argmax_matches_oracle": true
57+
}
58+
],
59+
"smoke_gate": {
60+
"pass": true,
61+
"failures": [],
62+
"no_eviction_kl_threshold": 0.001
63+
}
64+
}
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
{
2+
"schema_version": 1,
3+
"kind": "k1e_niah_validation",
4+
"config": {
5+
"model": "google/gemma-3-1b-it",
6+
"device": "mps",
7+
"dtype": "torch.bfloat16",
8+
"n_samples": 20,
9+
"haystack_min_lines": 60,
10+
"haystack_max_lines": 80,
11+
"sink_size": 4,
12+
"window_size": 64,
13+
"max_new_tokens": 24,
14+
"seed": 42,
15+
"prompt_token_len_min": 1234,
16+
"prompt_token_len_max": 1634,
17+
"prompt_token_len_mean": 1428
18+
},
19+
"results": {
20+
"oracle_full_attention": {
21+
"name": "oracle_full_attention",
22+
"recall": 1.0,
23+
"samples_correct": 20,
24+
"samples_total": 20,
25+
"mean_latency_s": 69.05884543120628,
26+
"median_latency_s": 74.45612387452275,
27+
"per_sample_decoded": [
28+
"BETA-1409jenotained\n\n\n\n\n\n",
29+
"DELTA-3286 \u0441\u0443\u0445\u043e\n\u9065\n\n\n\n\n",
30+
"The secret code is ORCHID-9935.jero.\u0bae\u0ba3\u0bbf\n Climber",
31+
"Answer: ETA-1520\u0947\u092a kucing\n\n\n\n\n",
32+
"DELTA-4811\u82b8\u591a\u591a\n\n\n\n\n\n",
33+
"The secret code is ALPHA-4257.jero.\u0bae\u0ba3\u0bbf\n Climbs\n",
34+
"DELTA-8359jero.\u0bae\u0ba3\u0bbf muka\n\n\n\n\n",
35+
"MAPLE-3615 \u0915\u0943\u0937\u094d\u0923 assume. gebruikers.yl beheer. beheer.",
36+
"The secret code is ZETA-5552.jero.\u0bae\u0ba3\u0bbf.\n \u0633\u0644\u0645.",
37+
"MAPLE-6514 AIApexApex\n\u05d9\u05d5\n\n\n",
38+
"ETA-2584 \u0915\u0943\u0937\u094d\u0923\u0926\u0947\u0935jenojenojenojenojenojenojeno",
39+
"The secret code is KAPPA-5333.jero.\u0bae\u0ba3\u0bbf\u0bae\u0ba3\u0bbf\u0bae\u0ba3\u0bbf\u0bae\u0ba3\u0bbf",
40+
"IOTA-3045\n\u06cc iStock\n\n\n\n\n",
41+
"IOTA-5803kanzaki.\n\n\n\n\n",
42+
"The secret code is KAPPA-6925.Users are encouraged to keep this secret.Apex",
43+
"The secret code is ALPHA-4733.Users are given a series of paragraphs and asked to identify the",
44+
"BETA-4814\n\u06cc understand. Nehru\n\n\n\n",
45+
"ETA-5554\n\u06ccuriApex Climber-",
46+
"The secret code is OAK-6977.jero.\u0bae\u0ba3\u0bbf.\u0ba8\u0bbf\u0bb2\u0bc8\u0baf\u0bbf\u0bb2\u0bcd.\u0ba8\u0bbf\u0bb2\u0bc8\u0baf\u0bbf\u0bb2\u0bcd",
47+
"ZETA-4432 \u0930\u0939\u093f\u0924 \u0915\u0943\u0937\u094d\u0923\u9065\n\n\n"
48+
],
49+
"per_sample_correct": [
50+
true,
51+
true,
52+
true,
53+
true,
54+
true,
55+
true,
56+
true,
57+
true,
58+
true,
59+
true,
60+
true,
61+
true,
62+
true,
63+
true,
64+
true,
65+
true,
66+
true,
67+
true,
68+
true,
69+
true
70+
]
71+
},
72+
"v03_sink_window": {
73+
"name": "v03_sink_window",
74+
"recall": 0.0,
75+
"samples_correct": 0,
76+
"samples_total": 20,
77+
"mean_latency_s": 67.54091061030631,
78+
"median_latency_s": 69.09235672897194,
79+
"per_sample_decoded": [
80+
"Okay, let's analyze the image.\n\nThe secret code is: **\"Hello, World!\"**\n\nLet",
81+
"Okay, let's crack this code!\n\nThe secret code is: **\"The quick brown fox jumps over the",
82+
"Okay, let's crack this!\n\nThe secret code is: **SOS**\n\nLet me know if you'",
83+
"Okay, let's analyze the image and figure out the secret code.\n\nThe image shows a series of dots and",
84+
"Okay, let's play a game!\n\nThe secret code is: **741**\n\nLet me know",
85+
"Okay, let's break down the image and figure out the secret code.\n\nThe image shows a series of dots",
86+
"Okay, let's analyze the image and try to decipher the secret code.\n\nThe image shows a series of dots",
87+
"I cannot provide you with a secret code. My purpose is to be helpful and harmless, and that includes protecting people from",
88+
"Okay, let's analyze the image and try to decipher the secret code.\n\nThe image shows a series of dots",
89+
"The secret code is: **SOS**\n \u0938\u0941\u0928\u0947\u0442\u043e, \u044f \u043d\u0435 \u0437\u043d\u0430\u044e, \u0447\u0442\u043e \u044d\u0442\u043e \u0437\u043d\u0430\u0447\u0438\u0442.\n",
90+
"Okay, let\u2019s play a game!\n\nThe secret code is: **741**\n\nLet me know",
91+
"The secret code is \u201cSOS\u201d.\nyer.",
92+
"The secret code is \u201cSOS\u201d.IDO",
93+
"I cannot provide you with a secret code. My purpose is to be helpful and harmless, and that includes protecting people from",
94+
"Okay, let's analyze the image.\n\nThe secret code is: **\"Hello, World!\"**\n\nLet",
95+
"Okay, let\u2019s play a game!\n\nThe secret code is: **741**\n\nLet me know",
96+
"Okay, let's crack this code!\n\nThe secret code is: **\"The quick brown fox jumps over the",
97+
"Okay, let's crack this!\n\nThe secret code is: **\"The quick brown fox jumps over the lazy",
98+
"Okay, let's analyze the image.\n\nThe secret code is: **\"Hello, World!\"**\n\nLet",
99+
"The secret code is \u201cSOS\u201d.\nyer."
100+
],
101+
"per_sample_correct": [
102+
false,
103+
false,
104+
false,
105+
false,
106+
false,
107+
false,
108+
false,
109+
false,
110+
false,
111+
false,
112+
false,
113+
false,
114+
false,
115+
false,
116+
false,
117+
false,
118+
false,
119+
false,
120+
false,
121+
false
122+
]
123+
},
124+
"v04_dlm_restored": {
125+
"name": "v04_dlm_restored",
126+
"recall": 1.0,
127+
"samples_correct": 20,
128+
"samples_total": 20,
129+
"mean_latency_s": 93.37290023328387,
130+
"median_latency_s": 97.49186937493505,
131+
"per_sample_decoded": [
132+
"BETA-1409jenotained\n\n\n\n\n\n",
133+
"DELTA-3286 \u0441\u0443\u0445\u043e\n\u9065\n\n\n\n\n",
134+
"The secret code is ORCHID-9935.jero.\u0bae\u0ba3\u0bbf\n Climber",
135+
"Answer: ETA-1520\u0947\u092a kucing\n\n\n\n\n",
136+
"DELTA-4811\u82b8\u591a\u591a\n\n\n\n\n\n",
137+
"The secret code is ALPHA-4257.jero.\u0bae\u0ba3\u0bbf\n Climbs\n",
138+
"DELTA-8359jero.\u0bae\u0ba3\u0bbf muka\n\n\n\n\n",
139+
"MAPLE-3615 \u0915\u0943\u0937\u094d\u0923 assume. gebruikers.yl beheer. beheer.",
140+
"The secret code is ZETA-5552.jero.\u0bae\u0ba3\u0bbf.\n \u0633\u0644\u0645.",
141+
"MAPLE-6514 AIApexApex\n\u05d9\u05d5\n\n\n",
142+
"ETA-2584 \u0915\u0943\u0937\u094d\u0923\u0926\u0947\u0935jenojenojenojenojenojenojeno",
143+
"The secret code is KAPPA-5333.jero.\u0bae\u0ba3\u0bbf\u0bae\u0ba3\u0bbf\u0bae\u0ba3\u0bbf\u0bae\u0ba3\u0bbf",
144+
"IOTA-3045\n\u06cc iStock\n\n\n\n\n",
145+
"IOTA-5803kanzaki.\n\n\n\n\n",
146+
"The secret code is KAPPA-6925.Users are encouraged to keep this secret.Apex",
147+
"The secret code is ALPHA-4733.Users are given a series of paragraphs and asked to identify the",
148+
"BETA-4814\n\u06cc understand. Nehru\n\n\n\n",
149+
"ETA-5554\n\u06ccuriApex Climber-",
150+
"The secret code is OAK-6977.jero.\u0bae\u0ba3\u0bbf.\u0ba8\u0bbf\u0bb2\u0bc8\u0baf\u0bbf\u0bb2\u0bcd.\u0ba8\u0bbf\u0bb2\u0bc8\u0baf\u0bbf\u0bb2\u0bcd",
151+
"ZETA-4432 \u0930\u0939\u093f\u0924 \u0915\u0943\u0937\u094d\u0923\u9065\n\n\n"
152+
],
153+
"per_sample_correct": [
154+
true,
155+
true,
156+
true,
157+
true,
158+
true,
159+
true,
160+
true,
161+
true,
162+
true,
163+
true,
164+
true,
165+
true,
166+
true,
167+
true,
168+
true,
169+
true,
170+
true,
171+
true,
172+
true,
173+
true
174+
]
175+
}
176+
},
177+
"gate": {
178+
"v04_vs_oracle_delta": 0.0,
179+
"v04_recall_ge_0_95": true,
180+
"v04_within_5pct_of_oracle": true,
181+
"v04_vs_v03_improvement": 1.0,
182+
"v04_dominates_v03": true
183+
}
184+
}

0 commit comments

Comments
 (0)