Skip to content

Commit 017ca4f

Browse files
committed
Preserve and store outgoing document link order
1 parent 5dfecc5 commit 017ca4f

4 files changed

Lines changed: 100 additions & 23 deletions

File tree

ARCHITECTURE.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,12 @@ A simple key-value table for site-wide configuration. Currently stores:
151151

152152
**`document_refs`**
153153

154-
Tracks which documents link to which other documents. Updated on save — the server scans the document's nodes for internal links (annotations on text nodes that point to other pages) and diffs against the existing rows. Same pattern as `asset_refs`.
154+
Tracks which documents link to which other documents. Updated on save — the server scans the document's nodes for internal links (annotations on text nodes that point to other pages) and rewrites the rows for that source document. Same pattern as `asset_refs`.
155155

156156
This table tracks links from all document types — pages, nav, and footer. Since nav and footer are stitched into every page, their links are always live. This is the basis for determining page reachability (see "Page reachability" below).
157157

158+
`document_refs` must also preserve the **first-seen link order** for each `source_document_id`, because the page browser sitemap uses that order when projecting the reachable graph into a tree. In other words, if a page body links to pages in the order A, then B, then C, the stored outgoing refs for that page must preserve A → B → C. Duplicate links to the same target are collapsed to the first occurrence only.
159+
158160
**`asset_refs`**
159161

160162
Tracks which assets are referenced by which documents. The compound primary key `(asset_id, document_id)` naturally deduplicates — a document referencing the same image five times still produces one row.
@@ -654,6 +656,7 @@ The tree is built with these rules:
654656
2. home page body links
655657
3. shared footer links
656658
- **Recursive ordering:** once a child page has been placed in the tree, recurse into that page using **body links only**
659+
- **Within each source document, preserve author order:** outgoing refs are consumed in the same order they appear in the source document, with duplicates removed by first occurrence
657660

658661
This means the sitemap is not a full graph visualization. It is a stable, editor-friendly tree derived from the reachable graph, where shared navigation and footer establish the top-level site structure, and deeper nesting comes from contextual links inside page content.
659662

src/app.css

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545

4646
/* Website fonts */
4747
@theme {
48-
--font-sans: 'Inter', ui-sans-serif, system-ui, sans-serif;
48+
--font-sans: 'courier new', 'Inter', ui-sans-serif, system-ui, sans-serif;
4949
--font-serif: 'Libertinos Serif Display', ui-serif, Georgia, serif;
5050
--font-mono: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, 'Liberation Mono', 'Courier New', monospace;
5151
}

src/lib/api.remote.js

Lines changed: 86 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,63 @@ function collect_node_ids(root_id, nodes, exclude_roots) {
8989
return collected;
9090
}
9191

92+
/**
93+
* Collect all node ids reachable from a root node by walking node/node_array
94+
* properties and annotation references, preserving first-seen traversal order.
95+
*
96+
* @param {string} root_id
97+
* @param {Record<string, any>} nodes
98+
* @param {Set<string>} [exclude_roots]
99+
* @returns {string[]}
100+
*/
101+
function collect_node_ids_in_order(root_id, nodes, exclude_roots) {
102+
const collected = [];
103+
const seen_ids = new Set();
104+
const stack = [root_id];
105+
106+
while (stack.length > 0) {
107+
const id = stack.pop();
108+
if (!id || seen_ids.has(id)) continue;
109+
if (exclude_roots && exclude_roots.has(id) && id !== root_id) continue;
110+
111+
seen_ids.add(id);
112+
collected.push(id);
113+
114+
const node = nodes[id];
115+
if (!node) continue;
116+
117+
const type_schema = document_schema[node.type];
118+
if (!type_schema) continue;
119+
120+
const next_ids = [];
121+
122+
for (const [prop_name, prop_def] of Object.entries(type_schema.properties)) {
123+
const value = node[prop_name];
124+
if (value == null) continue;
125+
126+
if (prop_def.type === 'node' && typeof value === 'string') {
127+
next_ids.push(value);
128+
} else if (prop_def.type === 'node_array' && Array.isArray(value)) {
129+
for (const child_id of value) {
130+
next_ids.push(child_id);
131+
}
132+
} else if (prop_def.type === 'annotated_text' && value.annotations) {
133+
for (const annotation of value.annotations) {
134+
if (annotation.node_id) {
135+
next_ids.push(annotation.node_id);
136+
}
137+
}
138+
}
139+
}
140+
141+
for (let i = next_ids.length - 1; i >= 0; i -= 1) {
142+
stack.push(next_ids[i]);
143+
}
144+
}
145+
146+
return collected;
147+
}
148+
92149
/**
93150
* @param {string} document_id
94151
* @param {Set<string>} node_ids
@@ -184,21 +241,23 @@ function normalize_internal_page_href(href, source_document_id) {
184241

185242
/**
186243
* @param {Record<string, any>} nodes
187-
* @param {Set<string>} node_ids
244+
* @param {Iterable<string>} node_ids
188245
* @param {string} source_document_id
189-
* @returns {Set<string>}
246+
* @returns {string[]}
190247
*/
191248
function collect_document_refs(nodes, node_ids, source_document_id) {
192-
const refs = new Set();
249+
const refs = [];
250+
const seen_refs = new Set();
193251

194252
for (const node_id of node_ids) {
195253
const node = nodes[node_id];
196254
if (!node) continue;
197255

198256
if (typeof node.href === 'string') {
199257
const target_document_id = normalize_internal_page_href(node.href, source_document_id);
200-
if (target_document_id) {
201-
refs.add(target_document_id);
258+
if (target_document_id && !seen_refs.has(target_document_id)) {
259+
seen_refs.add(target_document_id);
260+
refs.push(target_document_id);
202261
}
203262
}
204263

@@ -221,8 +280,9 @@ function collect_document_refs(nodes, node_ids, source_document_id) {
221280
source_document_id
222281
);
223282

224-
if (target_document_id) {
225-
refs.add(target_document_id);
283+
if (target_document_id && !seen_refs.has(target_document_id)) {
284+
seen_refs.add(target_document_id);
285+
refs.push(target_document_id);
226286
}
227287
}
228288
}
@@ -233,7 +293,7 @@ function collect_document_refs(nodes, node_ids, source_document_id) {
233293

234294
/**
235295
* @param {string} document_id
236-
* @param {Set<string>} node_ids
296+
* @param {Iterable<string>} node_ids
237297
* @param {Record<string, any>} all_nodes
238298
* @param {import('node:sqlite').StatementSync} delete_stmt
239299
* @param {import('node:sqlite').StatementSync} insert_stmt
@@ -262,14 +322,14 @@ function update_asset_refs(document_id, node_ids, all_nodes, delete_stmt, insert
262322

263323
/**
264324
* @param {string} source_document_id
265-
* @param {Set<string>} target_document_ids
325+
* @param {string[]} target_document_ids
266326
* @param {import('node:sqlite').StatementSync} delete_stmt
267327
* @param {import('node:sqlite').StatementSync} insert_stmt
268328
*/
269329
function update_document_refs(source_document_id, target_document_ids, delete_stmt, insert_stmt) {
270330
delete_stmt.run(source_document_id);
271-
for (const target_document_id of target_document_ids) {
272-
insert_stmt.run(target_document_id, source_document_id);
331+
for (const [ref_order, target_document_id] of target_document_ids.entries()) {
332+
insert_stmt.run(target_document_id, source_document_id, ref_order);
273333
}
274334
}
275335

@@ -313,21 +373,24 @@ function get_combined_document(document_id) {
313373

314374
/**
315375
* @param {DocumentData} page_doc
316-
* @returns {Set<string>}
376+
* @returns {string[]}
317377
*/
318378
function collect_page_body_node_ids(page_doc) {
319379
const page_root = page_doc.nodes[page_doc.document_id];
320380

321381
if (!page_root?.body || !Array.isArray(page_root.body)) {
322-
return new Set([page_doc.document_id]);
382+
return [page_doc.document_id];
323383
}
324384

325-
const body_node_ids = new Set([page_doc.document_id]);
385+
const body_node_ids = [page_doc.document_id];
386+
const seen_ids = new Set(body_node_ids);
326387

327388
for (const child_id of page_root.body) {
328-
const subtree_ids = collect_node_ids(child_id, page_doc.nodes);
389+
const subtree_ids = collect_node_ids_in_order(child_id, page_doc.nodes);
329390
for (const subtree_id of subtree_ids) {
330-
body_node_ids.add(subtree_id);
391+
if (seen_ids.has(subtree_id)) continue;
392+
seen_ids.add(subtree_id);
393+
body_node_ids.push(subtree_id);
331394
}
332395
}
333396

@@ -414,7 +477,7 @@ function summarize_page_document(page_doc) {
414477
function get_outgoing_refs(source_document_id) {
415478
const rows = /** @type {Array<{ target_document_id: string }>} */ (
416479
db.prepare(
417-
'SELECT target_document_id FROM document_refs WHERE source_document_id = ? ORDER BY rowid'
480+
'SELECT target_document_id FROM document_refs WHERE source_document_id = ? ORDER BY ref_order, rowid'
418481
).all(source_document_id)
419482
);
420483

@@ -526,7 +589,7 @@ function build_page_browser_data() {
526589
const body_node_ids = collect_page_body_node_ids(page_doc);
527590
body_refs_by_page_id.set(
528591
page_doc.document_id,
529-
Array.from(collect_document_refs(page_doc.nodes, body_node_ids, page_doc.document_id))
592+
collect_document_refs(page_doc.nodes, body_node_ids, page_doc.document_id)
530593
);
531594
}
532595

@@ -677,8 +740,10 @@ export const save_document = command(save_document_input_schema, async (combined
677740
footer_root_id
678741
});
679742

680-
const nav_node_ids = nav_root_id ? collect_node_ids(nav_root_id, all_nodes) : new Set();
681-
const footer_node_ids = footer_root_id ? collect_node_ids(footer_root_id, all_nodes) : new Set();
743+
const nav_node_ids = nav_root_id ? new Set(collect_node_ids_in_order(nav_root_id, all_nodes)) : new Set();
744+
const footer_node_ids = footer_root_id
745+
? new Set(collect_node_ids_in_order(footer_root_id, all_nodes))
746+
: new Set();
682747

683748
const exclude_roots = new Set();
684749
if (nav_root_id) exclude_roots.add(nav_root_id);
@@ -704,7 +769,7 @@ export const save_document = command(save_document_input_schema, async (combined
704769

705770
const delete_document_refs = db.prepare('DELETE FROM document_refs WHERE source_document_id = ?');
706771
const insert_document_ref = db.prepare(
707-
'INSERT OR IGNORE INTO document_refs (target_document_id, source_document_id) VALUES (?, ?)'
772+
'INSERT OR REPLACE INTO document_refs (target_document_id, source_document_id, ref_order) VALUES (?, ?, ?)'
708773
);
709774

710775
console.log('[save_document] begin transaction', {

src/lib/server/migrations.js

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,5 +67,14 @@ export default [
6767
PRIMARY KEY (asset_id, document_id)
6868
)
6969
`);
70+
},
71+
function add_document_ref_order({ db }) {
72+
db.exec(`
73+
ALTER TABLE document_refs ADD COLUMN ref_order INTEGER NOT NULL DEFAULT 0
74+
`);
75+
76+
db.exec(`
77+
DELETE FROM document_refs
78+
`);
7079
}
7180
];

0 commit comments

Comments
 (0)