Skip to content

Commit 39177ce

Browse files
cdeustclaude
andcommitted
feat(wiki redesign): Phase 9 — academic extensions
BibTeX-backed citations, figure/equation/table auto-numbering, cross-references, and section numbering. Brings the wiki up to paper-writing fidelity while keeping markdown as source of truth. Backend (handlers/wiki_api.py + both servers): - list_bibliography(wiki_root): enumerates wiki/_bibliography/*.bib with entry counts. - read_bibliography(wiki_root, rel_path): returns raw BibTeX content. Path locked to _bibliography/*.bib; wider reads rejected with a specific error (defence in depth on top of the existing commonpath sanitizer). - GET /api/wiki/bibliography + /api/wiki/bibliography/read wired in http_viz_server and http_standalone. Frontend (ui/unified/js/wiki.js): - Lazy Citation.js (@citation-js/core + bibtex + csl) via esm.sh. Loaded only when a page contains `[@key]` cite tokens. - applyAcademicPasses(bodyEl, meta) runs 4 post-render passes: 1. Heading numbering (gated by frontmatter section_numbering) 2. Figure / .katex-display / table auto-numbering with data-label → label map 3. Cross-references: `{@fig:foo}`, `{@eq:bar}`, `{@sec:intro}` resolved to `Figure N` / `Equation N` / `Section N.M` links 4. Citations: `[@key]` + `[@k1; @K2]` → (Author Year) inline + auto-generated `## References` section via CSL APA format - Bibliography file list is either explicit in frontmatter (bibliography: [_bibliography/foo.bib]) or "all files under _bibliography/" by default. CSS (ui/unified/knowledge.css): - `.wiki-section-num` — mono typeface section numbers - `figure figcaption` / `.wiki-caption` + `.wiki-caption-prefix` for "Figure 3: …" academic captions - `.wiki-xref` — cyan dotted-underline cross-references - `.wiki-cite` — italic inline citations - `.wiki-bibliography` — APA hanging-indent bibliography layout Seeded wiki/_bibliography/cortex.bib with 3 sample entries (Friston 2010, Josselyn & Tonegawa 2020, Tse et al. 2007) so the feature has live data to demo. Phase 10 (Pandoc export to PDF / LaTeX / DOCX) is next. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent a986d22 commit 39177ce

5 files changed

Lines changed: 411 additions & 0 deletions

File tree

mcp_server/handlers/wiki_api.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,58 @@ def execute_view(name: str | None, inline_query: str | None = None) -> dict:
318318
}
319319

320320

321+
def list_bibliography(wiki_root: Path) -> dict:
322+
"""List `.bib` files under wiki/_bibliography/ (Phase 9.1).
323+
324+
Scientists drop BibTeX files there; the frontend fetches them and
325+
Citation.js parses them into a key → entry lookup for cite-key
326+
resolution (`[@author2024]` → formatted citation).
327+
328+
Returns {"files": [{"path": "...", "size": N, "entries": int}]}.
329+
"""
330+
bib_dir = wiki_root / "_bibliography"
331+
if not bib_dir.exists() or not bib_dir.is_dir():
332+
return {"files": []}
333+
out = []
334+
try:
335+
for p in sorted(bib_dir.rglob("*.bib")):
336+
try:
337+
rel = str(p.relative_to(wiki_root)).replace("\\", "/")
338+
# Cheap entry count: every BibTeX record starts with `@`
339+
content = p.read_text(encoding="utf-8", errors="replace")
340+
entries = content.count("\n@") + (1 if content.startswith("@") else 0)
341+
out.append(
342+
{
343+
"path": rel,
344+
"size": p.stat().st_size,
345+
"entries": entries,
346+
}
347+
)
348+
except Exception:
349+
continue
350+
except Exception:
351+
pass
352+
return {"files": out}
353+
354+
355+
def read_bibliography(wiki_root: Path, rel_path: str) -> dict:
356+
"""Return raw BibTeX content for one file.
357+
358+
Path validation via the existing CodeQL-verified wiki_store
359+
commonpath sanitizer — we only serve files whose rel_path resolves
360+
inside wiki_root. Must live under _bibliography/ to prevent
361+
arbitrary file reads under the cover of this endpoint.
362+
"""
363+
if not rel_path or "/../" in rel_path or rel_path.startswith("../"):
364+
return {"error": "invalid path"}
365+
if not rel_path.startswith("_bibliography/") or not rel_path.endswith(".bib"):
366+
return {"error": "must be a .bib file under _bibliography/"}
367+
content = read_page(wiki_root, rel_path)
368+
if content is None:
369+
return {"error": "not found", "path": rel_path}
370+
return {"path": rel_path, "content": content, "size": len(content)}
371+
372+
321373
def save_wiki_page(wiki_root: Path, rel_path: str, body: str) -> dict:
322374
"""Write ``body`` to ``<wiki_root>/<rel_path>`` atomically.
323375
@@ -352,6 +404,8 @@ def save_wiki_page(wiki_root: Path, rel_path: str, body: str) -> dict:
352404
"list_wiki_pages",
353405
"read_wiki_page",
354406
"save_wiki_page",
407+
"list_bibliography",
408+
"read_bibliography",
355409
"page_meta",
356410
"list_concepts",
357411
"list_drafts",

mcp_server/server/http_standalone.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,10 @@ def do_GET(self):
277277
self._serve_wiki_db("views")
278278
elif path_no_qs == "/api/wiki/view":
279279
self._serve_wiki_db("view")
280+
elif path_no_qs == "/api/wiki/bibliography":
281+
self._serve_wiki_db("bibliography")
282+
elif path_no_qs == "/api/wiki/bibliography/read":
283+
self._serve_wiki_db("bibliography_read")
280284
elif self.path == "/api/sankey" or self.path.startswith("/api/sankey?"):
281285
self._serve_sankey()
282286
elif self.path.startswith("/api/file-diff?"):
@@ -391,6 +395,16 @@ def _serve_wiki_db(self, op: str):
391395
data = wiki_api.execute_view(
392396
qs.get("name") or None, qs.get("query") or None
393397
)
398+
elif op == "bibliography":
399+
from mcp_server.infrastructure.config import METHODOLOGY_DIR
400+
401+
data = wiki_api.list_bibliography(METHODOLOGY_DIR / "wiki")
402+
elif op == "bibliography_read":
403+
from mcp_server.infrastructure.config import METHODOLOGY_DIR
404+
405+
data = wiki_api.read_bibliography(
406+
METHODOLOGY_DIR / "wiki", qs.get("path", "")
407+
)
394408
else:
395409
data = {"error": f"unknown op: {op}"}
396410
body = json.dumps(data, default=str).encode()

mcp_server/server/http_viz_server.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,10 @@ def do_GET(self):
163163
self._serve_wiki_views()
164164
elif path_no_qs == "/api/wiki/view":
165165
self._serve_wiki_view()
166+
elif path_no_qs == "/api/wiki/bibliography":
167+
self._serve_wiki_bibliography()
168+
elif path_no_qs == "/api/wiki/bibliography/read":
169+
self._serve_wiki_bibliography_read()
166170
elif self.path.startswith("/js/") and self.path.endswith(".js"):
167171
serve_static_file(self, js_dir, self.path[4:], "application/javascript")
168172
elif self.path.startswith("/css/") and self.path.endswith(".css"):
@@ -295,6 +299,27 @@ def _serve_wiki_view(self):
295299
except Exception as e:
296300
send_error_response(self, e)
297301

302+
def _serve_wiki_bibliography(self):
303+
try:
304+
from mcp_server.handlers.wiki_api import list_bibliography
305+
from mcp_server.infrastructure.config import METHODOLOGY_DIR
306+
307+
send_json_response(self, list_bibliography(METHODOLOGY_DIR / "wiki"))
308+
except Exception as e:
309+
send_error_response(self, e)
310+
311+
def _serve_wiki_bibliography_read(self):
312+
try:
313+
from mcp_server.handlers.wiki_api import read_bibliography
314+
from mcp_server.infrastructure.config import METHODOLOGY_DIR
315+
316+
rel_path = self._qs().get("path", "")
317+
send_json_response(
318+
self, read_bibliography(METHODOLOGY_DIR / "wiki", rel_path)
319+
)
320+
except Exception as e:
321+
send_error_response(self, e)
322+
298323
def _serve_wiki_save(self):
299324
"""POST /api/wiki/save — body: JSON {rel_path, body}."""
300325
try:

ui/unified/js/wiki.js

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,11 @@
570570
} catch (e) { /* KaTeX optional; swallow failures */ }
571571
}
572572

573+
// Phase 9 — academic passes (section numbering, figure/equation
574+
// numbering, cross-refs, citations + bibliography). Runs async;
575+
// the body is visible immediately, citations appear when loaded.
576+
applyAcademicPasses(bodyEl, meta);
577+
573578
// Wire internal wiki links
574579
bodyEl.querySelectorAll('.wiki-link').forEach(function(link) {
575580
link.addEventListener('click', function() {
@@ -977,6 +982,247 @@
977982
return e;
978983
}
979984

985+
// ── Academic rendering layer (Phase 9) ──
986+
//
987+
// Three post-render passes over the already-rendered body:
988+
// 1. Number headings (section numbers) — 9.5
989+
// 2. Number figures + equations + tables — 9.2
990+
// 3. Resolve @label cross-refs — 9.3
991+
// 4. Resolve [@citekey] citations + bibliography — 9.1
992+
//
993+
// Citation.js is lazy-loaded the first time we see a cite key on a
994+
// page. Bibliography files live in wiki/_bibliography/*.bib; which
995+
// file(s) a page uses is declared in its frontmatter
996+
// (bibliography: [_bibliography/foo.bib]) or, absent that, all
997+
// files in _bibliography/ are available.
998+
999+
var _bibCache = null; // combined cite-key → entry map
1000+
var _bibLoadPromise = null;
1001+
var _citationJsPromise = null;
1002+
1003+
function _loadCitationJs() {
1004+
if (_citationJsPromise) return _citationJsPromise;
1005+
_citationJsPromise = import('https://esm.sh/@citation-js/core@0.7').then(function(core) {
1006+
return Promise.all([
1007+
import('https://esm.sh/@citation-js/plugin-bibtex@0.7'),
1008+
import('https://esm.sh/@citation-js/plugin-csl@0.7')
1009+
]).then(function() { return core; });
1010+
});
1011+
return _citationJsPromise;
1012+
}
1013+
1014+
async function _ensureBibliography(meta) {
1015+
if (_bibCache) return _bibCache;
1016+
if (_bibLoadPromise) return _bibLoadPromise;
1017+
_bibLoadPromise = (async function() {
1018+
var explicit = (meta && meta.bibliography) || null;
1019+
var list;
1020+
try {
1021+
if (explicit && Array.isArray(explicit)) {
1022+
list = explicit;
1023+
} else {
1024+
var resp = await fetch('/api/wiki/bibliography');
1025+
var j = await resp.json();
1026+
list = (j.files || []).map(function(f) { return f.path; });
1027+
}
1028+
} catch (e) { return {}; }
1029+
if (!list || list.length === 0) return {};
1030+
1031+
var core = await _loadCitationJs();
1032+
var Cite = core.Cite;
1033+
var byKey = {};
1034+
await Promise.all(list.map(async function(path) {
1035+
try {
1036+
var r = await fetch('/api/wiki/bibliography/read?path=' + encodeURIComponent(path));
1037+
var data = await r.json();
1038+
if (!data.content) return;
1039+
var cite = new Cite(data.content);
1040+
cite.data.forEach(function(entry) {
1041+
if (entry.id) byKey[entry.id] = entry;
1042+
});
1043+
} catch (e) { /* skip bad file */ }
1044+
}));
1045+
_bibCache = byKey;
1046+
return byKey;
1047+
})();
1048+
return _bibLoadPromise;
1049+
}
1050+
1051+
function _formatInlineCite(entry) {
1052+
// Minimal "Author (Year)" format; Citation.js can do full CSL
1053+
// rendering in the bibliography pass. This is just the inline
1054+
// marker that sits where the `[@key]` was typed.
1055+
if (!entry) return '[?]';
1056+
var first = (entry.author && entry.author[0]) || {};
1057+
var surname = first.family || first.literal || '?';
1058+
var year = (entry.issued && entry.issued['date-parts'] && entry.issued['date-parts'][0] && entry.issued['date-parts'][0][0])
1059+
|| entry.year || 'n.d.';
1060+
return surname + ' ' + year;
1061+
}
1062+
1063+
async function _formatBibliographyHtml(usedKeys, byKey) {
1064+
if (!usedKeys || usedKeys.size === 0) return '';
1065+
var core = await _loadCitationJs();
1066+
var Cite = core.Cite;
1067+
var entries = [];
1068+
usedKeys.forEach(function(k) {
1069+
if (byKey[k]) entries.push(byKey[k]);
1070+
});
1071+
if (entries.length === 0) return '';
1072+
try {
1073+
var cite = new Cite(entries);
1074+
var html = cite.format('bibliography', { format: 'html', template: 'apa', lang: 'en-US' });
1075+
return '<h2 id="references">References</h2>' + html;
1076+
} catch (e) {
1077+
// Fallback: plain list of raw ids
1078+
return '<h2 id="references">References</h2><ul>' +
1079+
Array.from(usedKeys).map(function(k) { return '<li>' + esc(k) + '</li>'; }).join('') +
1080+
'</ul>';
1081+
}
1082+
}
1083+
1084+
function _numberHeadings(root, enabled) {
1085+
if (!enabled) return;
1086+
var counters = [0, 0, 0, 0, 0, 0];
1087+
root.querySelectorAll('h1, h2, h3, h4, h5, h6').forEach(function(h) {
1088+
if (h.id === 'references') return; // don't number the bibliography
1089+
var level = parseInt(h.tagName.slice(1), 10);
1090+
counters[level - 1]++;
1091+
for (var i = level; i < 6; i++) counters[i] = 0;
1092+
var num = counters.slice(0, level).filter(function(n) { return n > 0; }).join('.');
1093+
var span = document.createElement('span');
1094+
span.className = 'wiki-section-num';
1095+
span.textContent = num + ' ';
1096+
h.insertBefore(span, h.firstChild);
1097+
});
1098+
}
1099+
1100+
function _numberLabeled(root, selector, prefix, labelMap) {
1101+
var i = 0;
1102+
root.querySelectorAll(selector).forEach(function(node) {
1103+
i++;
1104+
var label = node.getAttribute('data-label') || null;
1105+
node.setAttribute('data-num', String(i));
1106+
var caption = node.querySelector('figcaption, .wiki-caption');
1107+
if (caption) {
1108+
var pfx = document.createElement('span');
1109+
pfx.className = 'wiki-caption-prefix';
1110+
pfx.textContent = prefix + ' ' + i + ': ';
1111+
caption.insertBefore(pfx, caption.firstChild);
1112+
}
1113+
if (label) labelMap[label] = { prefix: prefix, num: i };
1114+
});
1115+
}
1116+
1117+
function _resolveCrossRefs(root, labelMap) {
1118+
// Replaces `{@fig:foo}` / `{@eq:bar}` / `{@sec:intro}` tokens that
1119+
// our markdown renderer has dropped into the HTML as literal
1120+
// text. We used `{@…}` to avoid collision with the `[@citekey]`
1121+
// citation syntax.
1122+
var walker = document.createTreeWalker(root, NodeFilter.SHOW_TEXT);
1123+
var nodes = [];
1124+
var n;
1125+
while ((n = walker.nextNode())) nodes.push(n);
1126+
nodes.forEach(function(text) {
1127+
if (text.nodeValue.indexOf('{@') < 0) return;
1128+
var frag = document.createDocumentFragment();
1129+
var re = /\{@([a-zA-Z0-9:_-]+)\}/g;
1130+
var remaining = text.nodeValue;
1131+
var lastIdx = 0;
1132+
var m;
1133+
while ((m = re.exec(text.nodeValue)) !== null) {
1134+
if (m.index > lastIdx) {
1135+
frag.appendChild(document.createTextNode(
1136+
text.nodeValue.slice(lastIdx, m.index)
1137+
));
1138+
}
1139+
var key = m[1];
1140+
var ref = labelMap[key];
1141+
var out = document.createElement('a');
1142+
out.className = 'wiki-xref';
1143+
out.href = '#' + key;
1144+
out.textContent = ref ? (ref.prefix + ' ' + ref.num) : ('?' + key);
1145+
frag.appendChild(out);
1146+
lastIdx = m.index + m[0].length;
1147+
}
1148+
if (lastIdx < text.nodeValue.length) {
1149+
frag.appendChild(document.createTextNode(text.nodeValue.slice(lastIdx)));
1150+
}
1151+
remaining = frag;
1152+
text.parentNode.replaceChild(frag, text);
1153+
});
1154+
}
1155+
1156+
async function _resolveCitations(root, byKey, usedKeys) {
1157+
// Replace `[@key]` and `[@k1; @k2]` tokens with formatted inline
1158+
// citations.
1159+
var walker = document.createTreeWalker(root, NodeFilter.SHOW_TEXT);
1160+
var nodes = [];
1161+
var n;
1162+
while ((n = walker.nextNode())) nodes.push(n);
1163+
var re = /\[@([a-zA-Z0-9_-]+(?:\s*;\s*@[a-zA-Z0-9_-]+)*)\]/g;
1164+
nodes.forEach(function(text) {
1165+
if (text.nodeValue.indexOf('[@') < 0) return;
1166+
var frag = document.createDocumentFragment();
1167+
var lastIdx = 0;
1168+
var m;
1169+
while ((m = re.exec(text.nodeValue)) !== null) {
1170+
if (m.index > lastIdx) {
1171+
frag.appendChild(document.createTextNode(text.nodeValue.slice(lastIdx, m.index)));
1172+
}
1173+
var keys = m[1].split(';').map(function(s) { return s.trim().replace(/^@/, ''); });
1174+
var parts = keys.map(function(k) {
1175+
usedKeys.add(k);
1176+
return _formatInlineCite(byKey[k]);
1177+
});
1178+
var cite = document.createElement('a');
1179+
cite.className = 'wiki-cite';
1180+
cite.href = '#references';
1181+
cite.textContent = '(' + parts.join('; ') + ')';
1182+
frag.appendChild(cite);
1183+
lastIdx = m.index + m[0].length;
1184+
}
1185+
if (lastIdx < text.nodeValue.length) {
1186+
frag.appendChild(document.createTextNode(text.nodeValue.slice(lastIdx)));
1187+
}
1188+
text.parentNode.replaceChild(frag, text);
1189+
});
1190+
}
1191+
1192+
async function applyAcademicPasses(bodyEl, meta) {
1193+
if (!bodyEl) return;
1194+
var sectionNums = meta && meta.section_numbering === true;
1195+
1196+
// 1. Section numbers
1197+
_numberHeadings(bodyEl, sectionNums);
1198+
1199+
// 2. Figure / equation / table numbering
1200+
var labelMap = {};
1201+
_numberLabeled(bodyEl, 'figure', 'Figure', labelMap);
1202+
_numberLabeled(bodyEl, '.katex-display', 'Equation', labelMap);
1203+
_numberLabeled(bodyEl, 'table', 'Table', labelMap);
1204+
1205+
// 3. Cross-references
1206+
_resolveCrossRefs(bodyEl, labelMap);
1207+
1208+
// 4. Citations (async — loads Citation.js + bibliography)
1209+
var hasCite = /\[@[a-zA-Z0-9_-]/.test(bodyEl.textContent);
1210+
if (hasCite) {
1211+
try {
1212+
var byKey = await _ensureBibliography(meta);
1213+
var usedKeys = new Set();
1214+
await _resolveCitations(bodyEl, byKey, usedKeys);
1215+
var refsHtml = await _formatBibliographyHtml(usedKeys, byKey);
1216+
if (refsHtml) {
1217+
var refs = document.createElement('section');
1218+
refs.className = 'wiki-bibliography';
1219+
refs.innerHTML = refsHtml;
1220+
bodyEl.appendChild(refs);
1221+
}
1222+
} catch (e) { console.warn('[cortex] citation pass failed:', e); }
1223+
}
1224+
}
1225+
9801226
// ── Inline editor (Phase 8.3) ──
9811227
//
9821228
// Lazy-loads CodeMirror 6 from esm.sh the first time the user clicks

0 commit comments

Comments
 (0)