|
| 1 | +#!/usr/bin/env bun |
| 2 | +/** |
| 3 | + * Convert LoCoMo dataset into Basic Memory markdown corpus + queries. |
| 4 | + * |
| 5 | + * LoCoMo conversations → daily session notes (like an agent's memory) |
| 6 | + * LoCoMo QA annotations → benchmark queries with ground truth |
| 7 | + * |
| 8 | + * Usage: |
| 9 | + * bun benchmark/convert-locomo.ts # Convert all 10 conversations |
| 10 | + * bun benchmark/convert-locomo.ts --conv=0 # Convert conversation 0 only |
| 11 | + * bun benchmark/convert-locomo.ts --conv=0 --conv=1 # Multiple conversations |
| 12 | + */ |
| 13 | + |
| 14 | +import { mkdir, readFile, writeFile } from "node:fs/promises" |
| 15 | +import { resolve } from "node:path" |
| 16 | + |
| 17 | +// --------------------------------------------------------------------------- |
| 18 | +// Types |
| 19 | +// --------------------------------------------------------------------------- |
| 20 | + |
| 21 | +interface LoCoMoTurn { |
| 22 | + speaker: string |
| 23 | + text: string |
| 24 | + dia_id: string |
| 25 | + img_url?: string |
| 26 | + blip_caption?: string |
| 27 | +} |
| 28 | + |
| 29 | +interface LoCoMoQA { |
| 30 | + question: string |
| 31 | + answer?: string |
| 32 | + adversarial_answer?: string |
| 33 | + category: number |
| 34 | + evidence: string[] |
| 35 | +} |
| 36 | + |
| 37 | +interface LoCoMoConversation { |
| 38 | + sample_id: string |
| 39 | + conversation: Record<string, any> |
| 40 | + qa: LoCoMoQA[] |
| 41 | + observation?: Record<string, string> |
| 42 | + session_summary?: Record<string, string> |
| 43 | + event_summary?: Record<string, any> |
| 44 | +} |
| 45 | + |
| 46 | +interface BenchmarkQuery { |
| 47 | + id: string |
| 48 | + query: string |
| 49 | + category: string |
| 50 | + ground_truth: string[] |
| 51 | + expected_content?: string |
| 52 | + note?: string |
| 53 | +} |
| 54 | + |
| 55 | +// --------------------------------------------------------------------------- |
| 56 | +// Config |
| 57 | +// --------------------------------------------------------------------------- |
| 58 | + |
| 59 | +const BENCHMARK_DIR = resolve(import.meta.dirname!, ".") |
| 60 | +const DATASET_PATH = resolve(BENCHMARK_DIR, "datasets/locomo10.json") |
| 61 | + |
| 62 | +const CATEGORY_MAP: Record<number, string> = { |
| 63 | + 1: "single_hop", |
| 64 | + 2: "multi_hop", |
| 65 | + 3: "temporal", |
| 66 | + 4: "open_domain", |
| 67 | + 5: "adversarial", |
| 68 | +} |
| 69 | + |
| 70 | +// --------------------------------------------------------------------------- |
| 71 | +// Helpers |
| 72 | +// --------------------------------------------------------------------------- |
| 73 | + |
| 74 | +function parseDateTime(dateStr: string): { date: string; time: string } | null { |
| 75 | + // "8:56 pm on 20 July, 2023" → { date: "2023-07-20", time: "20:56" } |
| 76 | + const match = dateStr.match( |
| 77 | + /(\d{1,2}):(\d{2})\s*(am|pm)\s+on\s+(\d{1,2})\s+(\w+),?\s+(\d{4})/i, |
| 78 | + ) |
| 79 | + if (!match) return null |
| 80 | + |
| 81 | + let [, hour, min, ampm, day, month, year] = match |
| 82 | + let h = Number.parseInt(hour) |
| 83 | + if (ampm.toLowerCase() === "pm" && h !== 12) h += 12 |
| 84 | + if (ampm.toLowerCase() === "am" && h === 12) h = 0 |
| 85 | + |
| 86 | + const months: Record<string, string> = { |
| 87 | + January: "01", February: "02", March: "03", April: "04", |
| 88 | + May: "05", June: "06", July: "07", August: "08", |
| 89 | + September: "09", October: "10", November: "11", December: "12", |
| 90 | + } |
| 91 | + |
| 92 | + const m = months[month] |
| 93 | + if (!m) return null |
| 94 | + |
| 95 | + return { |
| 96 | + date: `${year}-${m}-${day.padStart(2, "0")}`, |
| 97 | + time: `${String(h).padStart(2, "0")}:${min}`, |
| 98 | + } |
| 99 | +} |
| 100 | + |
| 101 | +function dialogIdToSessionNum(diaId: string): number | null { |
| 102 | + // "D1:3" → session 1, "D15:7" → session 15 |
| 103 | + const match = diaId.match(/^D(\d+):/) |
| 104 | + return match ? Number.parseInt(match[1]) : null |
| 105 | +} |
| 106 | + |
| 107 | +// --------------------------------------------------------------------------- |
| 108 | +// Conversion |
| 109 | +// --------------------------------------------------------------------------- |
| 110 | + |
| 111 | +function convertConversation( |
| 112 | + conv: LoCoMoConversation, |
| 113 | + convIndex: number, |
| 114 | +): { files: Map<string, string>; queries: BenchmarkQuery[] } { |
| 115 | + const c = conv.conversation |
| 116 | + const speakerA = c.speaker_a || "Speaker A" |
| 117 | + const speakerB = c.speaker_b || "Speaker B" |
| 118 | + const files = new Map<string, string>() |
| 119 | + |
| 120 | + // Find all sessions |
| 121 | + const sessionKeys = Object.keys(c) |
| 122 | + .filter((k) => k.match(/^session_\d+$/) && Array.isArray(c[k])) |
| 123 | + .sort((a, b) => { |
| 124 | + const na = Number.parseInt(a.split("_")[1]) |
| 125 | + const nb = Number.parseInt(b.split("_")[1]) |
| 126 | + return na - nb |
| 127 | + }) |
| 128 | + |
| 129 | + // Create a people note for each speaker |
| 130 | + const speakerANote = `--- |
| 131 | +title: ${speakerA} |
| 132 | +type: Person |
| 133 | +--- |
| 134 | +
|
| 135 | +# ${speakerA} |
| 136 | +
|
| 137 | +## Observations |
| 138 | +- [role] Conversation participant |
| 139 | +- [relationship] Regularly chats with ${speakerB} |
| 140 | +` |
| 141 | + files.set(`people/${speakerA.toLowerCase().replace(/\s+/g, "-")}.md`, speakerANote) |
| 142 | + |
| 143 | + const speakerBNote = `--- |
| 144 | +title: ${speakerB} |
| 145 | +type: Person |
| 146 | +--- |
| 147 | +
|
| 148 | +# ${speakerB} |
| 149 | +
|
| 150 | +## Observations |
| 151 | +- [role] Conversation participant |
| 152 | +- [relationship] Regularly chats with ${speakerA} |
| 153 | +` |
| 154 | + files.set(`people/${speakerB.toLowerCase().replace(/\s+/g, "-")}.md`, speakerBNote) |
| 155 | + |
| 156 | + // Build a MEMORY.md with key facts that accumulate |
| 157 | + let memoryLines: string[] = [ |
| 158 | + `# Long-Term Memory`, |
| 159 | + "", |
| 160 | + `## People`, |
| 161 | + `- ${speakerA} and ${speakerB} are close friends who chat regularly`, |
| 162 | + "", |
| 163 | + `## Key Events`, |
| 164 | + ] |
| 165 | + |
| 166 | + // Convert each session to a dated note |
| 167 | + for (const sessionKey of sessionKeys) { |
| 168 | + const sessionNum = Number.parseInt(sessionKey.split("_")[1]) |
| 169 | + const turns: LoCoMoTurn[] = c[sessionKey] |
| 170 | + const dateTimeStr = c[`${sessionKey}_date_time`] |
| 171 | + const parsed = dateTimeStr ? parseDateTime(dateTimeStr) : null |
| 172 | + |
| 173 | + const date = parsed?.date || `2023-01-${String(sessionNum).padStart(2, "0")}` |
| 174 | + const time = parsed?.time || "12:00" |
| 175 | + |
| 176 | + // Get session summary and observations if available |
| 177 | + const summary = conv.session_summary?.[`${sessionKey}_summary`] || "" |
| 178 | + const rawObs = conv.observation?.[`${sessionKey}_observation`] |
| 179 | + let observation = "" |
| 180 | + if (rawObs && typeof rawObs === "object") { |
| 181 | + // { "Speaker": [["observation text", "D1:3"], ...] } |
| 182 | + const lines: string[] = [] |
| 183 | + for (const [speaker, obs] of Object.entries(rawObs)) { |
| 184 | + if (Array.isArray(obs)) { |
| 185 | + for (const item of obs) { |
| 186 | + const text = Array.isArray(item) ? item[0] : item |
| 187 | + if (typeof text === "string") lines.push(`- [${speaker.toLowerCase()}] ${text}`) |
| 188 | + } |
| 189 | + } |
| 190 | + } |
| 191 | + observation = lines.join("\n") |
| 192 | + } else if (typeof rawObs === "string") { |
| 193 | + observation = rawObs |
| 194 | + } |
| 195 | + |
| 196 | + let content = `--- |
| 197 | +title: ${date} Session ${sessionNum} |
| 198 | +type: note |
| 199 | +date: ${date} |
| 200 | +--- |
| 201 | +
|
| 202 | +# ${date} — Session ${sessionNum} |
| 203 | +
|
| 204 | +*${speakerA} and ${speakerB} — ${time}* |
| 205 | +
|
| 206 | +` |
| 207 | + |
| 208 | + // Add observation as a summary if available |
| 209 | + if (observation) { |
| 210 | + content += `## Summary\n${observation}\n\n` |
| 211 | + } else if (summary) { |
| 212 | + content += `## Summary\n${summary}\n\n` |
| 213 | + } |
| 214 | + |
| 215 | + // Add conversation |
| 216 | + content += `## Conversation\n` |
| 217 | + for (const turn of turns) { |
| 218 | + const text = turn.text.replace(/\n/g, "\n> ") |
| 219 | + content += `**${turn.speaker}:** ${text}\n\n` |
| 220 | + } |
| 221 | + |
| 222 | + // Add relations |
| 223 | + content += `## Relations\n` |
| 224 | + content += `- mentions [[${speakerA}]]\n` |
| 225 | + content += `- mentions [[${speakerB}]]\n` |
| 226 | + |
| 227 | + // Add to memory summary |
| 228 | + if (observation) { |
| 229 | + const firstObs = observation.split("\n")[0]?.replace(/^- \[\w+\] /, "") || "" |
| 230 | + if (firstObs) memoryLines.push(`- [${date}] ${firstObs}`) |
| 231 | + } |
| 232 | + |
| 233 | + files.set(`conversations/${date}-session-${sessionNum}.md`, content) |
| 234 | + } |
| 235 | + |
| 236 | + // Write MEMORY.md |
| 237 | + files.set("MEMORY.md", memoryLines.join("\n") + "\n") |
| 238 | + |
| 239 | + // Convert QA to benchmark queries |
| 240 | + const queries: BenchmarkQuery[] = [] |
| 241 | + |
| 242 | + // Map evidence dialog IDs to file paths |
| 243 | + for (const [qIdx, qa] of conv.qa.entries()) { |
| 244 | + const category = CATEGORY_MAP[qa.category] || `cat_${qa.category}` |
| 245 | + const answer = qa.answer || qa.adversarial_answer || "" |
| 246 | + |
| 247 | + // Map evidence to ground truth file paths |
| 248 | + const groundTruth = new Set<string>() |
| 249 | + for (const ev of qa.evidence || []) { |
| 250 | + const sessionNum = dialogIdToSessionNum(ev) |
| 251 | + if (sessionNum === null) continue |
| 252 | + |
| 253 | + // Find the session's date |
| 254 | + const dateTimeStr = c[`session_${sessionNum}_date_time`] |
| 255 | + const parsed = dateTimeStr ? parseDateTime(dateTimeStr) : null |
| 256 | + const date = parsed?.date || `2023-01-${String(sessionNum).padStart(2, "0")}` |
| 257 | + groundTruth.add(`conversations/${date}-session-${sessionNum}.md`) |
| 258 | + } |
| 259 | + |
| 260 | + // For adversarial questions, ground truth is that the info doesn't exist |
| 261 | + // We still include the evidence files (where the premise is contradicted) |
| 262 | + const isAdversarial = qa.category === 5 |
| 263 | + |
| 264 | + queries.push({ |
| 265 | + id: `locomo_c${convIndex}_q${qIdx}`, |
| 266 | + query: qa.question, |
| 267 | + category, |
| 268 | + ground_truth: [...groundTruth], |
| 269 | + expected_content: isAdversarial ? undefined : answer.length < 100 ? answer : undefined, |
| 270 | + note: isAdversarial ? `Adversarial: correct answer is "${answer}"` : undefined, |
| 271 | + }) |
| 272 | + } |
| 273 | + |
| 274 | + return { files, queries } |
| 275 | +} |
| 276 | + |
| 277 | +// --------------------------------------------------------------------------- |
| 278 | +// Main |
| 279 | +// --------------------------------------------------------------------------- |
| 280 | + |
| 281 | +async function main() { |
| 282 | + const args = process.argv.slice(2) |
| 283 | + const convIndices = args |
| 284 | + .filter((a) => a.startsWith("--conv=")) |
| 285 | + .map((a) => Number.parseInt(a.split("=")[1])) |
| 286 | + |
| 287 | + console.log("Loading LoCoMo dataset...") |
| 288 | + const raw = await readFile(DATASET_PATH, "utf-8") |
| 289 | + const data: LoCoMoConversation[] = JSON.parse(raw) |
| 290 | + console.log(` ${data.length} conversations loaded`) |
| 291 | + |
| 292 | + const indices = convIndices.length > 0 ? convIndices : data.map((_, i) => i) |
| 293 | + let totalFiles = 0 |
| 294 | + let totalQueries = 0 |
| 295 | + |
| 296 | + for (const idx of indices) { |
| 297 | + const conv = data[idx] |
| 298 | + if (!conv) { |
| 299 | + console.error(` Conversation ${idx} not found, skipping`) |
| 300 | + continue |
| 301 | + } |
| 302 | + |
| 303 | + const convDir = `corpus-locomo/conv-${idx}` |
| 304 | + const outDir = resolve(BENCHMARK_DIR, convDir) |
| 305 | + |
| 306 | + console.log(`\nConverting conversation ${idx} (${conv.conversation.speaker_a} & ${conv.conversation.speaker_b})...`) |
| 307 | + |
| 308 | + const { files, queries } = convertConversation(conv, idx) |
| 309 | + |
| 310 | + // Write files |
| 311 | + for (const [path, content] of files) { |
| 312 | + const fullPath = resolve(outDir, path) |
| 313 | + await mkdir(resolve(fullPath, ".."), { recursive: true }) |
| 314 | + await writeFile(fullPath, content) |
| 315 | + } |
| 316 | + |
| 317 | + // Write queries |
| 318 | + const queriesPath = resolve(outDir, "queries.json") |
| 319 | + await writeFile(queriesPath, JSON.stringify(queries, null, 2)) |
| 320 | + |
| 321 | + console.log(` ${files.size} markdown files, ${queries.length} queries`) |
| 322 | + totalFiles += files.size |
| 323 | + totalQueries += queries.length |
| 324 | + |
| 325 | + // Category breakdown |
| 326 | + const cats: Record<string, number> = {} |
| 327 | + for (const q of queries) { |
| 328 | + cats[q.category] = (cats[q.category] || 0) + 1 |
| 329 | + } |
| 330 | + for (const [cat, count] of Object.entries(cats).sort()) { |
| 331 | + console.log(` ${cat}: ${count}`) |
| 332 | + } |
| 333 | + } |
| 334 | + |
| 335 | + console.log(`\n✅ Total: ${totalFiles} files, ${totalQueries} queries across ${indices.length} conversations`) |
| 336 | + console.log(` Output: benchmark/corpus-locomo/`) |
| 337 | +} |
| 338 | + |
| 339 | +main().catch((err) => { |
| 340 | + console.error("Conversion failed:", err) |
| 341 | + process.exit(1) |
| 342 | +}) |
0 commit comments