1- import { DEFAULT_WORD_GAP_PX , type LineV2 } from '$lib/serialization/schema.js' ;
1+ import {
2+ DEFAULT_WORD_GAP_PX ,
3+ type LinePairGapV2 ,
4+ type LineV2 ,
5+ type PairControlV2 ,
6+ type VisualSettingsV2
7+ } from '$lib/serialization/schema.js' ;
28
3- export type ExampleId = 'simple' | 'transcription ' | 'rtl' | 'cjk' ;
9+ export type ExampleId = 'simple' | 'glosses ' | 'rtl' | 'tagalog ' | 'cjk' ;
410
511/** Token id pair `[upperLineId-index, lowerLineId-index]` connected after the snapshot loads. */
612export type ExampleConnection = readonly [ string , string ] ;
@@ -9,6 +15,16 @@ export interface ExampleEntry {
915 id : ExampleId ;
1016 label : string ;
1117 lines : LineV2 [ ] ;
18+ /** Per-pair connector visibility (e.g., hide connectors between text and its tightly-stacked gloss row). */
19+ pairControls ?: PairControlV2 [ ] ;
20+ /** Per-pair vertical gaps (px); omit a pair to use the default. */
21+ linePairGaps ?: LinePairGapV2 [ ] ;
22+ /**
23+ * Tokenizer / visual setting overrides applied while the example is loaded.
24+ * Token-related fields are reset to defaults first, so customizations from a previous
25+ * example never leak across loads.
26+ */
27+ settings ?: Partial < VisualSettingsV2 > ;
1228 connections : ExampleConnection [ ] ;
1329}
1430
@@ -37,10 +53,12 @@ const noto = (
3753
3854/**
3955 * Curated, opinionated set of preset alignments shown in the “Load example” dropdown.
40- * Each entry is a self-contained project: lines + connections to draw between them.
56+ * Each entry is a self-contained project: lines, optional pair controls / gaps / settings,
57+ * plus the connections to draw between them.
4158 *
42- * Connection ids must reference `lineId-tokenIndex` after whitespace tokenization with the
43- * default visual settings; do not rely on user-customized split/merge characters here.
59+ * Connection ids reference `lineId-tokenIndex` after tokenization with the example’s
60+ * effective settings (the loader resets token settings to defaults before applying
61+ * `example.settings`).
4462 */
4563export const EXAMPLES : readonly ExampleEntry [ ] = [
4664 {
@@ -54,61 +72,123 @@ export const EXAMPLES: readonly ExampleEntry[] = [
5472 ]
5573 } ,
5674 {
57- id : 'transcription' ,
58- label : 'Turkish with IPA (3 lines)' ,
75+ // Turkish interlinear: morpheme glosses → IPA → segmented text → free translation.
76+ // Only `|` splits tokens here so glosses can show a literal hyphen between stem and tag
77+ // (`garden|-|LOC` → garden, -, LOC) while Turkish/IPA use plain `bahçe|de` / `bahtʃe|de`.
78+ id : 'glosses' ,
79+ label : 'Turkish interlinear (IPA + glosses)' ,
5980 lines : [
60- inter ( 'Merhaba dünya' , 's' , 34 ) ,
61- noto ( 'meɾˈhaba dyzˈnja' , 'ipa' , 'Noto Sans' , 28 ) ,
62- inter ( 'Hello world' , 't' , 34 )
81+ inter ( 'child garden|-|LOC play|-|PROG' , 'gl' , 22 ) ,
82+ noto ( 'tʃodʒuk bahtʃe|de ojnu|joɾ' , 'ipa' , 'Noto Sans' , 26 ) ,
83+ inter ( 'Çocuk bahçe|de oynu|yor' , 's' , 36 ) ,
84+ inter ( 'The child is+playing in the garden' , 't' , 30 )
85+ ] ,
86+ settings : { tokenSplitChars : '|' } ,
87+ // Top three rows form one interlinear block: tight vertical spacing, no link lines.
88+ // The free translation sits at a normal distance below, with full link drawing.
89+ pairControls : [
90+ { upperLineId : 'gl' , lowerLineId : 'ipa' , showConnectors : false } ,
91+ { upperLineId : 'ipa' , lowerLineId : 's' , showConnectors : false }
92+ ] ,
93+ linePairGaps : [
94+ { upperLineId : 'gl' , lowerLineId : 'ipa' , gapPx : 16 } ,
95+ { upperLineId : 'ipa' , lowerLineId : 's' , gapPx : 16 }
6396 ] ,
6497 connections : [
65- [ 's-0' , 'ipa-0' ] ,
66- [ 's-1' , 'ipa-1' ] ,
67- [ 'ipa-0' , 't-0' ] ,
68- [ 'ipa-1' , 't-1' ]
98+ // Interlinear: seven gloss tokens vs five IPA/Turkish tokens; hyphen-only gloss
99+ // tokens have no counterpart in orthography/IPA.
100+ [ 'gl-0' , 'ipa-0' ] ,
101+ [ 'gl-1' , 'ipa-1' ] ,
102+ [ 'gl-3' , 'ipa-2' ] ,
103+ [ 'gl-4' , 'ipa-3' ] ,
104+ [ 'gl-6' , 'ipa-4' ] ,
105+ [ 'ipa-0' , 's-0' ] ,
106+ [ 'ipa-1' , 's-1' ] ,
107+ [ 'ipa-2' , 's-2' ] ,
108+ [ 'ipa-3' , 's-3' ] ,
109+ [ 'ipa-4' , 's-4' ] ,
110+ // Segmented Turkish ↔ free translation. English uses the default merge char (`+`)
111+ // so “is playing” is one alignment token while still displaying as two words.
112+ [ 's-0' , 't-1' ] , // Çocuk → child
113+ [ 's-1' , 't-5' ] , // bahçe → garden
114+ [ 's-2' , 't-3' ] , // de → in
115+ [ 's-3' , 't-2' ] , // oynu → is+playing
116+ [ 's-4' , 't-2' ] // yor → is+playing
117+ // `t-0` (The) and `t-4` (the) intentionally unaligned: English-only definiteness.
69118 ]
70119 } ,
71120 {
121+ // Hebrew → Arabic → English. Two right-to-left scripts compared against an LTR
122+ // translation. Hebrew writes the preposition bound to the noun (`בבית`); we mark the
123+ // morpheme boundary with `-` in the editor so it splits under the default tokenizer.
72124 id : 'rtl' ,
73- label : 'Hebrew + Arabic + English (RTL, merged ב+בית / في+البيت)' ,
125+ label : 'Hebrew + Arabic + English (right-to-left)' ,
126+ lines : [
127+ noto ( 'אני גר ב-בית גדול' , 'he' , 'Noto Sans Hebrew' , 36 , true ) ,
128+ noto ( 'أنا أسكن في بيت كبير' , 'ar' , 'Noto Sans Arabic' , 36 , true ) ,
129+ inter ( 'I live in a big house' , 'en' , 30 )
130+ ] ,
131+ connections : [
132+ // Hebrew (5 tokens after the `-` split) ↔ Arabic (5 tokens). Both put the
133+ // adjective after the noun, so the rows are parallel — no crossings.
134+ [ 'he-0' , 'ar-0' ] , // אני ↔ أنا
135+ [ 'he-1' , 'ar-1' ] , // גר ↔ أسكن
136+ [ 'he-2' , 'ar-2' ] , // ב ↔ في
137+ [ 'he-3' , 'ar-3' ] , // בית ↔ بيت
138+ [ 'he-4' , 'ar-4' ] , // גדול ↔ كبير
139+ // Arabic ↔ English. Adjective-noun order flips, so the last two links cross.
140+ [ 'ar-0' , 'en-0' ] , // أنا ↔ I
141+ [ 'ar-1' , 'en-1' ] , // أسكن ↔ live
142+ [ 'ar-2' , 'en-2' ] , // في ↔ in
143+ [ 'ar-3' , 'en-5' ] , // بيت ↔ house (crossing)
144+ [ 'ar-4' , 'en-4' ] // كبير ↔ big (crossing)
145+ // `en-3` (a) intentionally unaligned: Hebrew/Arabic have no indefinite article.
146+ ]
147+ } ,
148+ {
149+ // Tagalog compounds often contain hyphens that should remain inside a word rather than
150+ // becoming alignment boundaries. This example disables `-` as a split character while
151+ // keeping the predicate-initial Tagalog sentence aligned to a natural English translation.
152+ id : 'tagalog' ,
153+ label : 'Tagalog compounds (keep hyphens)' ,
74154 lines : [
75- // `+` is the default merge character: bound morphemes stay one alignment token but show
76- // with a space in the preview (e.g. Hebrew inseparable preposition + noun vs. English
77- // “at home” as two words).
78- noto ( 'אני אוהב ב+בית' , 'he' , 'Noto Sans Hebrew' , 34 , true ) ,
79- noto ( 'أنا أحب في+البيت' , 'ar' , 'Noto Sans Arabic' , 34 , true ) ,
80- inter ( 'I love at home' , 'en' , 32 )
155+ noto ( 'Maganda ang bahay-kubo sa tabing-ilog' , 'tl' , 'Noto Sans' , 34 ) ,
156+ inter ( 'The nipa+hut by the river is beautiful' , 'en' , 30 )
81157 ] ,
158+ settings : { tokenSplitChars : '.' } ,
82159 connections : [
83- [ 'he-0' , 'ar-0' ] ,
84- [ 'he-1' , 'ar-1' ] ,
85- [ 'he-2' , 'ar-2' ] ,
86- [ 'ar-0' , 'en-0' ] ,
87- [ 'ar-1' , 'en-1' ] ,
88- [ 'ar-2' , 'en-2' ] ,
89- [ 'ar-2' , 'en-3' ]
160+ [ 'tl-0' , 'en-6' ] , // Maganda → beautiful
161+ [ 'tl-1' , 'en-0' ] , // ang → The
162+ [ 'tl-2' , 'en-1' ] , // bahay-kubo → nipa+hut
163+ [ 'tl-3' , 'en-2' ] , // sa → by
164+ [ 'tl-4' , 'en-4' ] // tabing-ilog → river
90165 ]
91166 } ,
92167 {
168+ // Japanese (SOV) ↔ Chinese (SVO) ↔ English (SVO). Putting two related East-Asian
169+ // languages side by side highlights how the verb travels in alignment, while CJK
170+ // scripts share most content morphemes (今日/今天, 本/书, 読/读).
171+ // Word boundaries are inserted with spaces because neither script uses them
172+ // natively — the alignment tool needs explicit token boundaries to draw links.
93173 id : 'cjk' ,
94- label : 'Japanese + Chinese + English' ,
174+ label : 'Japanese + Chinese + English (SOV ↔ SVO) ' ,
95175 lines : [
96- // Word/phrase boundaries marked with spaces — CJK scripts have no native word
97- // separators, and an alignment tool needs explicit token boundaries to draw links.
98- // Horizontal Japanese and Chinese are laid out LTR here (standard typography).
99- noto ( '私は 本を 読む' , 'ja' , 'Noto Sans JP' , 34 ) ,
100- noto ( '我 读 书' , 'zh' , 'Noto Sans SC' , 34 ) ,
101- inter ( 'I read books' , 'en' , 32 )
176+ noto ( '今日 私は 本を 読みました' , 'ja' , 'Noto Sans JP' , 34 ) ,
177+ noto ( '今天 我 读了 书' , 'zh' , 'Noto Sans SC' , 34 ) ,
178+ inter ( 'Today I read a book' , 'en' , 30 )
102179 ] ,
103- // Japanese is SOV (verb last) while Chinese/English are SVO — verb/object swap shows
104- // up as crossing connectors between the Japanese and Chinese rows.
105180 connections : [
106- [ 'ja-0' , 'zh-0' ] ,
107- [ 'ja-1' , 'zh-2' ] ,
108- [ 'ja-2' , 'zh-1' ] ,
109- [ 'zh-0' , 'en-0' ] ,
110- [ 'zh-1' , 'en-1' ] ,
111- [ 'zh-2' , 'en-2' ]
181+ // Japanese ↔ Chinese: the object precedes the verb in Japanese (本を 読みました)
182+ // but follows it in Chinese (读了 书) — the swap shows up as a clean crossing.
183+ [ 'ja-0' , 'zh-0' ] , // 今日 ↔ 今天
184+ [ 'ja-1' , 'zh-1' ] , // 私は ↔ 我
185+ [ 'ja-2' , 'zh-3' ] , // 本を ↔ 书 (crossing)
186+ [ 'ja-3' , 'zh-2' ] , // 読みました ↔ 读了 (crossing)
187+ // Chinese ↔ English: parallel SVO. English “a” has no Chinese counterpart.
188+ [ 'zh-0' , 'en-0' ] , // 今天 ↔ Today
189+ [ 'zh-1' , 'en-1' ] , // 我 ↔ I
190+ [ 'zh-2' , 'en-2' ] , // 读了 ↔ read
191+ [ 'zh-3' , 'en-4' ] // 书 ↔ book
112192 ]
113193 }
114194] as const ;
0 commit comments