|
| 1 | +import { mkdir, readFile, writeFile } from 'node:fs/promises'; |
| 2 | +import { dirname } from 'node:path'; |
| 3 | + |
| 4 | +const BIB_URL = 'https://paperpile.com/eb/VDzRdyJpus'; |
| 5 | +const SHARED_PAGE_URL = 'https://paperpile.com/shared/sfRoEJyQ5QA2Oe4GSpZo2~A'; |
| 6 | +const OUTPUT_PATH = 'src/data/related-research.json'; |
| 7 | + |
| 8 | +const MONTH_ORDER = { |
| 9 | + jan: 1, |
| 10 | + feb: 2, |
| 11 | + mar: 3, |
| 12 | + apr: 4, |
| 13 | + may: 5, |
| 14 | + jun: 6, |
| 15 | + jul: 7, |
| 16 | + aug: 8, |
| 17 | + sep: 9, |
| 18 | + oct: 10, |
| 19 | + nov: 11, |
| 20 | + dec: 12, |
| 21 | +}; |
| 22 | + |
| 23 | +function cleanValue(value) { |
| 24 | + return value |
| 25 | + .replace(/[{}]/g, '') |
| 26 | + .replace(/\s+/g, ' ') |
| 27 | + .replace(/\\&/g, '&') |
| 28 | + .trim(); |
| 29 | +} |
| 30 | + |
| 31 | +function parseMonth(rawMonth) { |
| 32 | + if (!rawMonth) return 0; |
| 33 | + const normalized = cleanValue(rawMonth).toLowerCase(); |
| 34 | + if (MONTH_ORDER[normalized]) return MONTH_ORDER[normalized]; |
| 35 | + const numeric = Number.parseInt(normalized, 10); |
| 36 | + if (Number.isFinite(numeric) && numeric >= 1 && numeric <= 12) return numeric; |
| 37 | + return 0; |
| 38 | +} |
| 39 | + |
| 40 | +function extractBalanced(text, start, openChar, closeChar) { |
| 41 | + if (openChar === closeChar) { |
| 42 | + let i = start + 1; |
| 43 | + let escaped = false; |
| 44 | + for (; i < text.length; i += 1) { |
| 45 | + const ch = text[i]; |
| 46 | + if (escaped) { |
| 47 | + escaped = false; |
| 48 | + continue; |
| 49 | + } |
| 50 | + if (ch === '\\') { |
| 51 | + escaped = true; |
| 52 | + continue; |
| 53 | + } |
| 54 | + if (ch === openChar) break; |
| 55 | + } |
| 56 | + |
| 57 | + return { |
| 58 | + value: text.slice(start, i + 1), |
| 59 | + end: i + 1, |
| 60 | + }; |
| 61 | + } |
| 62 | + |
| 63 | + let i = start; |
| 64 | + let depth = 0; |
| 65 | + let escaped = false; |
| 66 | + |
| 67 | + for (; i < text.length; i += 1) { |
| 68 | + const ch = text[i]; |
| 69 | + if (escaped) { |
| 70 | + escaped = false; |
| 71 | + continue; |
| 72 | + } |
| 73 | + if (ch === '\\') { |
| 74 | + escaped = true; |
| 75 | + continue; |
| 76 | + } |
| 77 | + if (ch === openChar) depth += 1; |
| 78 | + if (ch === closeChar) { |
| 79 | + depth -= 1; |
| 80 | + if (depth === 0) break; |
| 81 | + } |
| 82 | + } |
| 83 | + |
| 84 | + return { |
| 85 | + value: text.slice(start, i + 1), |
| 86 | + end: i + 1, |
| 87 | + }; |
| 88 | +} |
| 89 | + |
| 90 | +function parseFields(fieldsText) { |
| 91 | + const fields = {}; |
| 92 | + let i = 0; |
| 93 | + |
| 94 | + while (i < fieldsText.length) { |
| 95 | + while (i < fieldsText.length && /[\s,]/.test(fieldsText[i])) i += 1; |
| 96 | + if (i >= fieldsText.length) break; |
| 97 | + |
| 98 | + let name = ''; |
| 99 | + while (i < fieldsText.length && /[A-Za-z0-9_:-]/.test(fieldsText[i])) { |
| 100 | + name += fieldsText[i]; |
| 101 | + i += 1; |
| 102 | + } |
| 103 | + name = name.trim().toLowerCase(); |
| 104 | + |
| 105 | + while (i < fieldsText.length && /\s/.test(fieldsText[i])) i += 1; |
| 106 | + if (fieldsText[i] !== '=') { |
| 107 | + while (i < fieldsText.length && fieldsText[i] !== ',') i += 1; |
| 108 | + continue; |
| 109 | + } |
| 110 | + i += 1; |
| 111 | + |
| 112 | + while (i < fieldsText.length && /\s/.test(fieldsText[i])) i += 1; |
| 113 | + if (i >= fieldsText.length) break; |
| 114 | + |
| 115 | + let rawValue = ''; |
| 116 | + if (fieldsText[i] === '{') { |
| 117 | + const extracted = extractBalanced(fieldsText, i, '{', '}'); |
| 118 | + rawValue = extracted.value; |
| 119 | + i = extracted.end; |
| 120 | + } else if (fieldsText[i] === '"') { |
| 121 | + const extracted = extractBalanced(fieldsText, i, '"', '"'); |
| 122 | + rawValue = extracted.value; |
| 123 | + i = extracted.end; |
| 124 | + } else { |
| 125 | + const start = i; |
| 126 | + while (i < fieldsText.length && fieldsText[i] !== ',') i += 1; |
| 127 | + rawValue = fieldsText.slice(start, i); |
| 128 | + } |
| 129 | + |
| 130 | + while (i < fieldsText.length && /\s/.test(fieldsText[i])) i += 1; |
| 131 | + if (fieldsText[i] === ',') i += 1; |
| 132 | + |
| 133 | + let normalized = rawValue.trim(); |
| 134 | + if ( |
| 135 | + (normalized.startsWith('{') && normalized.endsWith('}')) || |
| 136 | + (normalized.startsWith('"') && normalized.endsWith('"')) |
| 137 | + ) { |
| 138 | + normalized = normalized.slice(1, -1); |
| 139 | + } |
| 140 | + fields[name] = normalized; |
| 141 | + } |
| 142 | + |
| 143 | + return fields; |
| 144 | +} |
| 145 | + |
| 146 | +function parseBibTeX(input) { |
| 147 | + const entries = []; |
| 148 | + let i = 0; |
| 149 | + |
| 150 | + while (i < input.length) { |
| 151 | + const atIndex = input.indexOf('@', i); |
| 152 | + if (atIndex === -1) break; |
| 153 | + |
| 154 | + const typeMatch = input.slice(atIndex + 1).match(/^([A-Za-z]+)/); |
| 155 | + if (!typeMatch) { |
| 156 | + i = atIndex + 1; |
| 157 | + continue; |
| 158 | + } |
| 159 | + const type = typeMatch[1].toLowerCase(); |
| 160 | + |
| 161 | + const openBraceIndex = input.indexOf('{', atIndex); |
| 162 | + if (openBraceIndex === -1) break; |
| 163 | + |
| 164 | + const entryBody = extractBalanced(input, openBraceIndex, '{', '}'); |
| 165 | + const inside = entryBody.value.slice(1, -1); |
| 166 | + const firstComma = inside.indexOf(','); |
| 167 | + |
| 168 | + if (firstComma === -1) { |
| 169 | + i = entryBody.end; |
| 170 | + continue; |
| 171 | + } |
| 172 | + |
| 173 | + const id = inside.slice(0, firstComma).trim(); |
| 174 | + const fieldsText = inside.slice(firstComma + 1); |
| 175 | + const fields = parseFields(fieldsText); |
| 176 | + |
| 177 | + entries.push({ id, type, fields }); |
| 178 | + i = entryBody.end; |
| 179 | + } |
| 180 | + |
| 181 | + return entries; |
| 182 | +} |
| 183 | + |
| 184 | +function toPaper(entry) { |
| 185 | + const { id, type, fields } = entry; |
| 186 | + const title = cleanValue(fields.title || ''); |
| 187 | + const authors = cleanValue(fields.author || ''); |
| 188 | + const year = Number.parseInt(cleanValue(fields.year || ''), 10) || null; |
| 189 | + const month = parseMonth(fields.month || ''); |
| 190 | + const venue = cleanValue( |
| 191 | + fields.journal || |
| 192 | + fields.booktitle || |
| 193 | + fields.publisher || |
| 194 | + fields.series || |
| 195 | + fields.school || |
| 196 | + '', |
| 197 | + ); |
| 198 | + const doi = cleanValue(fields.doi || ''); |
| 199 | + const url = cleanValue(fields.url || ''); |
| 200 | + |
| 201 | + return { |
| 202 | + id, |
| 203 | + type, |
| 204 | + title, |
| 205 | + authors, |
| 206 | + venue, |
| 207 | + year, |
| 208 | + month, |
| 209 | + doi, |
| 210 | + url, |
| 211 | + }; |
| 212 | +} |
| 213 | + |
| 214 | +function comparePapers(a, b) { |
| 215 | + const yearA = a.year || 0; |
| 216 | + const yearB = b.year || 0; |
| 217 | + if (yearA !== yearB) return yearB - yearA; |
| 218 | + |
| 219 | + const monthA = a.month || 0; |
| 220 | + const monthB = b.month || 0; |
| 221 | + if (monthA !== monthB) return monthB - monthA; |
| 222 | + |
| 223 | + return a.title.localeCompare(b.title); |
| 224 | +} |
| 225 | + |
| 226 | +async function readExistingData() { |
| 227 | + try { |
| 228 | + const raw = await readFile(OUTPUT_PATH, 'utf8'); |
| 229 | + return JSON.parse(raw); |
| 230 | + } catch { |
| 231 | + return null; |
| 232 | + } |
| 233 | +} |
| 234 | + |
| 235 | +async function writeData(data) { |
| 236 | + await mkdir(dirname(OUTPUT_PATH), { recursive: true }); |
| 237 | + await writeFile(OUTPUT_PATH, `${JSON.stringify(data, null, 2)}\n`, 'utf8'); |
| 238 | +} |
| 239 | + |
| 240 | +async function syncRelatedResearch() { |
| 241 | + const response = await fetch(BIB_URL); |
| 242 | + if (!response.ok) { |
| 243 | + throw new Error(`Fetch failed (${response.status})`); |
| 244 | + } |
| 245 | + |
| 246 | + const bibtex = await response.text(); |
| 247 | + const parsed = parseBibTeX(bibtex); |
| 248 | + const papers = parsed.map(toPaper).filter((paper) => paper.title).sort(comparePapers); |
| 249 | + |
| 250 | + const output = { |
| 251 | + sourceBibtexUrl: BIB_URL, |
| 252 | + sharedPageUrl: SHARED_PAGE_URL, |
| 253 | + generatedAt: new Date().toISOString(), |
| 254 | + count: papers.length, |
| 255 | + papers, |
| 256 | + }; |
| 257 | + |
| 258 | + await writeData(output); |
| 259 | + return output; |
| 260 | +} |
| 261 | + |
| 262 | +async function main() { |
| 263 | + try { |
| 264 | + const data = await syncRelatedResearch(); |
| 265 | + console.log(`Synced ${data.count} related research papers.`); |
| 266 | + } catch (error) { |
| 267 | + const cached = await readExistingData(); |
| 268 | + if (cached?.papers?.length) { |
| 269 | + console.warn( |
| 270 | + `Related research sync failed (${error.message}). Using cached data (${cached.papers.length} papers).`, |
| 271 | + ); |
| 272 | + return; |
| 273 | + } |
| 274 | + throw error; |
| 275 | + } |
| 276 | +} |
| 277 | + |
| 278 | +main().catch((error) => { |
| 279 | + console.error(`Related research sync failed: ${error.message}`); |
| 280 | + process.exitCode = 1; |
| 281 | +}); |
0 commit comments