Skip to content

Commit 6fad5ae

Browse files
committed
✨ add smart reflow
1 parent c7a9403 commit 6fad5ae

1 file changed

Lines changed: 118 additions & 48 deletions

File tree

frontend/src/editor/text_tools.tsx

Lines changed: 118 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,33 @@ export const MenuItemButton = primitiveWithClassname('button', [
1515
'block',
1616
]);
1717

18+
function mergeSameSpeakerParagraphs(doc: Document) {
19+
const mergePoints: number[] = [];
20+
for (let i = 0; i < doc.children.length - 1; i++) {
21+
const paragraph = doc.children[i];
22+
const nextParagraph = doc.children[i + 1];
23+
if (paragraph.speaker == nextParagraph.speaker) {
24+
mergePoints.push(i);
25+
}
26+
}
27+
let removed = 0;
28+
mergePoints.forEach((index) => {
29+
const i = index - removed;
30+
doc.children[i].children.push(...JSON.parse(JSON.stringify(doc.children[i + 1].children)));
31+
doc.children.splice(i + 1, 1);
32+
removed++;
33+
});
34+
}
35+
36+
const punctuations = ['.', '?', '!'];
37+
const non_punctuations = ['...'];
38+
function containsSentenceEnd(text: string) {
39+
return (
40+
punctuations.some((punct) => text.includes(punct)) &&
41+
!non_punctuations.some((np) => text.includes(np))
42+
);
43+
}
44+
1845
export function TextTools({ editor }: { editor: EditorWithWebsocket }) {
1946
return (
2047
<Popup
@@ -25,62 +52,19 @@ export function TextTools({ editor }: { editor: EditorWithWebsocket }) {
2552
>
2653
<MenuItemButton
2754
onClick={() => {
28-
const mergePoints: number[] = [];
29-
for (let i = 0; i < editor.doc.children.length - 1; i++) {
30-
const paragraph = editor.doc.children[i];
31-
const nextParagraph = editor.doc.children[i + 1];
32-
if (paragraph.speaker == nextParagraph.speaker) {
33-
mergePoints.push(i);
34-
}
35-
}
36-
editor.update((doc: Document) => {
37-
let removed = 0;
38-
mergePoints.forEach((index) => {
39-
const i = index - removed;
40-
doc.children[i].children.push(
41-
...JSON.parse(JSON.stringify(doc.children[i + 1].children)),
42-
);
43-
doc.children.splice(i + 1, 1);
44-
removed++;
45-
});
46-
});
55+
editor.update(mergeSameSpeakerParagraphs);
4756
}}
4857
>
4958
Reflow to One Paragraph per Speaker
5059
</MenuItemButton>
5160

5261
<MenuItemButton
5362
onClick={() => {
54-
const punctuations = ['.', '?', '!'];
55-
const non_punctuations = ['...'];
56-
const contains_punctuation = (text: string) =>
57-
punctuations.some((punct) => text.includes(punct)) &&
58-
!non_punctuations.some((np) => text.includes(np));
59-
60-
// stategy: we first merge everything that could possibly be merged...
61-
const mergePoints: number[] = [];
62-
for (let i = 0; i < editor.doc.children.length - 1; i++) {
63-
const paragraph = editor.doc.children[i];
64-
const nextParagraph = editor.doc.children[i + 1];
65-
if (
66-
!contains_punctuation(paragraph.children[paragraph.children.length - 1].text) &&
67-
paragraph.speaker == nextParagraph.speaker
68-
) {
69-
mergePoints.push(i);
70-
}
71-
}
7263
editor.update((doc: Document) => {
73-
let removed = 0;
74-
mergePoints.forEach((index) => {
75-
const i = index - removed;
76-
doc.children[i].children.push(
77-
...JSON.parse(JSON.stringify(doc.children[i + 1].children)),
78-
);
79-
doc.children.splice(i + 1, 1);
80-
removed++;
81-
});
64+
// stategy: we first merge everything that could possibly be merged...
65+
mergeSameSpeakerParagraphs(doc);
8266

83-
// ...and only then break up
67+
// ...and only then break up on sentence boundaries
8468
const newChildren: Paragraph[] = [];
8569
doc.children.forEach((paragraph) => {
8670
let currentParagraph = {
@@ -89,7 +73,7 @@ export function TextTools({ editor }: { editor: EditorWithWebsocket }) {
8973
};
9074
paragraph.children.forEach((token) => {
9175
currentParagraph.children.push(JSON.parse(JSON.stringify(token)));
92-
if (contains_punctuation(token.text)) {
76+
if (containsSentenceEnd(token.text)) {
9377
newChildren.push(currentParagraph);
9478
currentParagraph = {
9579
...paragraph,
@@ -107,6 +91,92 @@ export function TextTools({ editor }: { editor: EditorWithWebsocket }) {
10791
>
10892
Reflow to One Paragraph per Sentence
10993
</MenuItemButton>
94+
95+
<MenuItemButton
96+
onClick={() => {
97+
// this strategy tries to split paragraphs at sentence boundaries, but only if there is a pause between the sentences
98+
// or the paragraphs would become too long.
99+
const initial = 2;
100+
const decay = 0.95;
101+
102+
const getPause = (i: number, paragraph: Paragraph) => {
103+
const token = paragraph.children[i];
104+
const nextToken = paragraph.children[i + 1];
105+
if (nextToken?.start !== undefined && token?.end !== undefined) {
106+
return nextToken.start - token.end;
107+
}
108+
return 0;
109+
};
110+
111+
editor.update((doc: Document) => {
112+
mergeSameSpeakerParagraphs(doc);
113+
const newChildren: Paragraph[] = [];
114+
const addNewChild = (paragraph: Paragraph) => {
115+
// if the paragraph is very long and does not contain any sentence ends, we still want to break it up
116+
if (paragraph.children.length <= 100) {
117+
newChildren.push(paragraph);
118+
} else {
119+
const silences = paragraph.children
120+
.map((x, i) => ({ ...x, pause: getPause(i, paragraph) }))
121+
.filter((token) => token.text.includes(','))
122+
.map((token) => token.pause);
123+
silences.sort();
124+
const thresholdIndex = Math.floor(paragraph.children.length / 100); // aim for paragraphs of max ~50 tokens
125+
const threshold = silences[silences.length - 1 - thresholdIndex];
126+
let currentParagraph = {
127+
...paragraph,
128+
children: [] as { text: string }[],
129+
};
130+
paragraph.children.forEach((token, i) => {
131+
currentParagraph.children.push(JSON.parse(JSON.stringify(token)));
132+
if (
133+
getPause(i, paragraph) >= threshold &&
134+
token.text.includes(',') &&
135+
currentParagraph.children.length > 3
136+
) {
137+
newChildren.push(currentParagraph);
138+
currentParagraph = {
139+
...paragraph,
140+
children: [],
141+
};
142+
}
143+
});
144+
if (currentParagraph.children.length > 0) {
145+
newChildren.push(currentParagraph);
146+
}
147+
}
148+
};
149+
doc.children.forEach((paragraph) => {
150+
let minPauseBetweenSentences = initial; // this gets reduced with every additional token
151+
let currentParagraph = {
152+
...paragraph,
153+
children: [] as { text: string }[],
154+
};
155+
paragraph.children.forEach((token, i) => {
156+
currentParagraph.children.push(JSON.parse(JSON.stringify(token)));
157+
minPauseBetweenSentences *= decay;
158+
if (
159+
getPause(i, paragraph) >= minPauseBetweenSentences &&
160+
containsSentenceEnd(token.text)
161+
) {
162+
addNewChild(currentParagraph);
163+
minPauseBetweenSentences = initial;
164+
currentParagraph = {
165+
...paragraph,
166+
children: [],
167+
};
168+
}
169+
});
170+
if (currentParagraph.children.length > 0) {
171+
addNewChild(currentParagraph);
172+
}
173+
});
174+
doc.children = newChildren;
175+
});
176+
}}
177+
>
178+
Smart Reflow ✨
179+
</MenuItemButton>
110180
</Popup>
111181
);
112182
}

0 commit comments

Comments
 (0)