-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathmdToJson.ts
More file actions
120 lines (101 loc) · 3.14 KB
/
Copy pathmdToJson.ts
File metadata and controls
120 lines (101 loc) · 3.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import { marked } from "marked";
// Types
interface Section {
_content: string;
[title: string]: Section | string;
}
interface Token {
type: string;
depth?: number;
text?: string;
raw?: string;
}
interface ParserState {
currentSection: Section;
sectionStack: Section[];
currentDepth: number;
content: string[];
}
function createInitialState(): ParserState {
return {
currentSection: { _content: "" },
sectionStack: [{ _content: "" }],
currentDepth: 0,
content: [],
};
}
function handleHeader(state: ParserState, token: Token): ParserState {
const depth = token.depth || 0;
const title = token.text || "";
let newStack = [...state.sectionStack];
if (depth <= state.currentDepth) {
const levelsUp = state.currentDepth - depth + 1;
newStack = newStack.slice(0, -levelsUp);
}
const newSection: Section = { _content: "" };
const currentParent = newStack[newStack.length - 1];
currentParent[title] = newSection;
return {
...state,
sectionStack: [...newStack, newSection],
currentDepth: depth,
content: [],
};
}
function normalizeContent(raw: string, type: string = "text"): string {
// First convert all line endings to \n
let content = raw.replace(/\r\n/g, "\n");
// For code blocks, only trim trailing/leading blank lines but preserve indentation
if (type === "code") {
return content.replace(/^\n+|\n+$/g, "");
}
// Remove trailing/leading whitespace from the whole content
content = content.trim();
// Handle list items differently from paragraphs
if (content.includes("\n- ")) {
// For list items, keep the exact format
return content;
}
// For paragraphs, ensure exactly one blank line between them
return content
.split(/\n+/)
.map((p) => p.trim())
.join("\n\n");
}
function handleContent(state: ParserState, token: Token): ParserState {
if (!token.raw) return state;
const normalized = normalizeContent(token.raw, token.type);
if (!normalized) return state;
const newStack = [...state.sectionStack];
const currentSection = newStack[newStack.length - 1];
if (state.content.length === 0) {
currentSection._content = normalized;
} else {
// If there's already content, we need to decide how to join it
const existingContent = currentSection._content;
if (normalized.startsWith("- ") || token.type === "code") {
// For lists and code blocks, preserve the exact format
currentSection._content = existingContent + "\n" + normalized;
} else {
// For paragraphs, ensure double newlines
currentSection._content = existingContent + "\n\n" + normalized;
}
}
return {
...state,
content: [...state.content, normalized],
sectionStack: newStack,
};
}
export function mdToJson(markdown: string): Section {
const normalizedMarkdown = markdown.replace(/\r\n/g, "\n");
const tokens = marked.lexer(normalizedMarkdown);
const finalState = tokens.reduce((state: ParserState, token: Token) => {
if (token.type === "heading") {
return handleHeader(state, token);
} else {
return handleContent(state, token);
}
}, createInitialState());
return finalState.sectionStack[0];
}