|
17 | 17 | "id": "exact_03", |
18 | 18 | "query": "What is the Linear workspace ID?", |
19 | 19 | "category": "needle_in_haystack", |
20 | | - "ground_truth": ["memory/2026-02-11.md", "memory/topics/linear-integration.md"], |
| 20 | + "ground_truth": [ |
| 21 | + "memory/2026-02-11.md", |
| 22 | + "memory/topics/linear-integration.md" |
| 23 | + ], |
21 | 24 | "expected_content": "ws_stl_prod_7x2k" |
22 | 25 | }, |
23 | 26 | { |
|
31 | 34 | "id": "exact_05", |
32 | 35 | "query": "What is the Linear API rate limit?", |
33 | 36 | "category": "exact_fact", |
34 | | - "ground_truth": ["memory/topics/linear-integration.md", "memory/2026-02-12.md"], |
| 37 | + "ground_truth": [ |
| 38 | + "memory/topics/linear-integration.md", |
| 39 | + "memory/2026-02-12.md" |
| 40 | + ], |
35 | 41 | "expected_content": "100 req/min" |
36 | 42 | }, |
37 | 43 | { |
38 | 44 | "id": "exact_06", |
39 | 45 | "query": "What is the trial to paid conversion rate?", |
40 | 46 | "category": "exact_fact", |
41 | | - "ground_truth": ["memory/2026-02-12.md", "memory/tasks/onboarding-redesign.md"], |
| 47 | + "ground_truth": [ |
| 48 | + "memory/2026-02-12.md", |
| 49 | + "memory/tasks/onboarding-redesign.md" |
| 50 | + ], |
42 | 51 | "expected_content": "23%" |
43 | 52 | }, |
44 | 53 | { |
45 | 54 | "id": "exact_07", |
46 | 55 | "query": "How many GitHub stars do we have?", |
47 | 56 | "category": "exact_fact", |
48 | | - "ground_truth": ["memory/2026-02-12.md", "MEMORY.md", "memory/topics/competitive-landscape.md"], |
| 57 | + "ground_truth": [ |
| 58 | + "memory/2026-02-12.md", |
| 59 | + "MEMORY.md", |
| 60 | + "memory/topics/competitive-landscape.md" |
| 61 | + ], |
49 | 62 | "expected_content": "1,847" |
50 | 63 | }, |
51 | 64 | { |
|
70 | 83 | "id": "semantic_04", |
71 | 84 | "query": "What's our approach to user onboarding?", |
72 | 85 | "category": "semantic", |
73 | | - "ground_truth": ["memory/tasks/onboarding-redesign.md", "memory/2026-02-11.md"] |
| 86 | + "ground_truth": [ |
| 87 | + "memory/tasks/onboarding-redesign.md", |
| 88 | + "memory/2026-02-11.md" |
| 89 | + ] |
74 | 90 | }, |
75 | 91 | { |
76 | 92 | "id": "semantic_05", |
|
100 | 116 | "id": "temporal_04", |
101 | 117 | "query": "What's the timeline for the onboarding redesign?", |
102 | 118 | "category": "temporal", |
103 | | - "ground_truth": ["memory/tasks/onboarding-redesign.md", "memory/2026-02-11.md"] |
| 119 | + "ground_truth": [ |
| 120 | + "memory/tasks/onboarding-redesign.md", |
| 121 | + "memory/2026-02-11.md" |
| 122 | + ] |
104 | 123 | }, |
105 | 124 | { |
106 | 125 | "id": "temporal_05", |
|
113 | 132 | "id": "relational_01", |
114 | 133 | "query": "What is Raj working on?", |
115 | 134 | "category": "relational", |
116 | | - "ground_truth": ["memory/people/raj-patel.md", "memory/2026-02-12.md", "memory/2026-02-14.md"] |
| 135 | + "ground_truth": [ |
| 136 | + "memory/people/raj-patel.md", |
| 137 | + "memory/2026-02-12.md", |
| 138 | + "memory/2026-02-14.md" |
| 139 | + ] |
117 | 140 | }, |
118 | 141 | { |
119 | 142 | "id": "relational_02", |
120 | 143 | "query": "Who is involved in the onboarding redesign?", |
121 | 144 | "category": "relational", |
122 | | - "ground_truth": ["memory/tasks/onboarding-redesign.md", "memory/people/lena-vogt.md", "memory/2026-02-11.md"] |
| 145 | + "ground_truth": [ |
| 146 | + "memory/tasks/onboarding-redesign.md", |
| 147 | + "memory/people/lena-vogt.md", |
| 148 | + "memory/2026-02-11.md" |
| 149 | + ] |
123 | 150 | }, |
124 | 151 | { |
125 | 152 | "id": "relational_03", |
|
137 | 164 | "id": "relational_05", |
138 | 165 | "query": "What's connected to the Linear integration?", |
139 | 166 | "category": "relational", |
140 | | - "ground_truth": ["memory/topics/linear-integration.md", "memory/2026-02-10.md", "memory/2026-02-12.md", "memory/2026-02-14.md"] |
| 167 | + "ground_truth": [ |
| 168 | + "memory/topics/linear-integration.md", |
| 169 | + "memory/2026-02-10.md", |
| 170 | + "memory/2026-02-12.md", |
| 171 | + "memory/2026-02-14.md" |
| 172 | + ] |
141 | 173 | }, |
142 | 174 | { |
143 | 175 | "id": "cross_note_01", |
144 | 176 | "query": "Summarize all architecture decisions we've made", |
145 | 177 | "category": "cross_note", |
146 | | - "ground_truth": ["MEMORY.md", "memory/2026-02-10.md", "memory/2026-02-11.md"] |
| 178 | + "ground_truth": [ |
| 179 | + "MEMORY.md", |
| 180 | + "memory/2026-02-10.md", |
| 181 | + "memory/2026-02-11.md" |
| 182 | + ] |
147 | 183 | }, |
148 | 184 | { |
149 | 185 | "id": "cross_note_02", |
|
161 | 197 | "id": "cross_note_04", |
162 | 198 | "query": "What are all the things Lena has done?", |
163 | 199 | "category": "cross_note", |
164 | | - "ground_truth": ["memory/people/lena-vogt.md", "memory/2026-02-10.md", "memory/2026-02-11.md", "memory/2026-02-14.md", "memory/tasks/onboarding-redesign.md"] |
| 200 | + "ground_truth": [ |
| 201 | + "memory/people/lena-vogt.md", |
| 202 | + "memory/2026-02-10.md", |
| 203 | + "memory/2026-02-11.md", |
| 204 | + "memory/2026-02-14.md", |
| 205 | + "memory/tasks/onboarding-redesign.md" |
| 206 | + ] |
165 | 207 | }, |
166 | 208 | { |
167 | 209 | "id": "task_01", |
168 | 210 | "query": "What are the active tasks?", |
169 | 211 | "category": "task_recall", |
170 | | - "ground_truth": ["memory/tasks/onboarding-redesign.md", "memory/tasks/incident-runbook.md"] |
| 212 | + "ground_truth": [ |
| 213 | + "memory/tasks/onboarding-redesign.md", |
| 214 | + "memory/tasks/incident-runbook.md" |
| 215 | + ] |
171 | 216 | }, |
172 | 217 | { |
173 | 218 | "id": "task_02", |
|
206 | 251 | "id": "needle_03", |
207 | 252 | "query": "What version was the Linear integration shipped in?", |
208 | 253 | "category": "needle_in_haystack", |
209 | | - "ground_truth": ["memory/2026-02-14.md", "memory/topics/linear-integration.md"], |
| 254 | + "ground_truth": [ |
| 255 | + "memory/2026-02-14.md", |
| 256 | + "memory/topics/linear-integration.md" |
| 257 | + ], |
210 | 258 | "expected_content": "v0.9.0-beta.1" |
211 | 259 | }, |
212 | 260 | { |
213 | 261 | "id": "needle_04", |
214 | 262 | "query": "What is the webhook queue max depth?", |
215 | 263 | "category": "needle_in_haystack", |
216 | | - "ground_truth": ["memory/2026-02-14.md", "memory/topics/linear-integration.md"], |
| 264 | + "ground_truth": [ |
| 265 | + "memory/2026-02-14.md", |
| 266 | + "memory/topics/linear-integration.md" |
| 267 | + ], |
217 | 268 | "expected_content": "5000" |
218 | 269 | }, |
219 | 270 | { |
|
243 | 294 | "id": "evolving_02", |
244 | 295 | "query": "What's the current state of the webhook architecture?", |
245 | 296 | "category": "evolving_fact", |
246 | | - "ground_truth": ["memory/2026-02-14.md", "memory/2026-02-12.md", "memory/2026-02-11.md"], |
| 297 | + "ground_truth": [ |
| 298 | + "memory/2026-02-14.md", |
| 299 | + "memory/2026-02-12.md", |
| 300 | + "memory/2026-02-11.md" |
| 301 | + ], |
247 | 302 | "note": "Evolved from proposal (Feb 11) → incident (Feb 12) → fix (Feb 14)" |
248 | 303 | } |
249 | 304 | ] |
0 commit comments