tinygodsdev
diff --git a/‎bitext/src/lib/components/editor/LineCard.svelte‎
Lines changed: 36 additions & 4 deletions b/‎bitext/src/lib/components/editor/LineCard.svelte‎
Lines changed: 36 additions & 4 deletions
diff --git a/‎bitext/src/lib/components/share/ShareDialog.svelte‎
Lines changed: 59 additions & 1 deletion b/‎bitext/src/lib/components/share/ShareDialog.svelte‎
Lines changed: 59 additions & 1 deletion
diff --git a/‎bitext/src/lib/serialization/schema.ts‎
Lines changed: 1 addition & 1 deletion b/‎bitext/src/lib/serialization/schema.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bitext/src/lib/state/examples.ts‎
Lines changed: 123 additions & 43 deletions b/‎bitext/src/lib/state/examples.ts‎
Lines changed: 123 additions & 43 deletions
@@ -1,8 +1,11 @@
 <script lang="ts">
+	import { TrashBinOutline } from 'flowbite-svelte-icons';
 	import type { LineV2 } from '$lib/serialization/schema.js';
 	import { projectStore } from '$lib/state/project.svelte.js';
 	import { ButtonGroup, Input, InputAddon } from 'flowbite-svelte';
 
+	const addonClass = 'border-gray-300! bg-gray-50! px-2! dark:border-gray-600! dark:bg-gray-700!';
+
 	let {
 		line,
 		index
@@ -31,6 +34,25 @@
 	function toggleLineDir() {
 		projectStore.updateLineStyle(line.id, { rtl: !line.rtl });
 	}
+
+	function lineHasAnyConnection(): boolean {
+		const connections = projectStore.connections;
+		return connections.some(
+			(c) => c.upperTokenId.startsWith(`${line.id}-`) || c.lowerTokenId.startsWith(`${line.id}-`)
+		);
+	}
+
+	function confirmRemove(): boolean {
+		if (!lineHasAnyConnection()) return true;
+		return typeof window !== 'undefined'
+			? window.confirm('This line has connections. Removing it will delete those links. Continue?')
+			: true;
+	}
+
+	function removeThisLine() {
+		if (!confirmRemove()) return;
+		projectStore.removeLine(line.id);
+	}
 </script>
 
 <div
@@ -56,8 +78,8 @@
 		</span>
 	</div>
 
+	<label class="sr-only" for="line-{line.id}">Line {index + 1} text</label>
 	<ButtonGroup class="w-full min-w-32 flex-1 basis-48 sm:basis-auto">
-		<label class="sr-only" for="line-{line.id}">Line {index + 1} text</label>
 		<Input
 			id="line-{line.id}"
 			type="text"
@@ -68,9 +90,7 @@
 			oninput={(e) =>
 				projectStore.setLineText(line.id, (e.currentTarget as HTMLInputElement).value)}
 		/>
-		<InputAddon
-			class="rounded-e-none! border-gray-300! bg-gray-50! px-2! dark:border-gray-600! dark:bg-gray-700!"
-		>
+		<InputAddon class={addonClass}>
 			<button
 				type="button"
 				class="min-w-9 cursor-pointer select-none border-0 bg-transparent p-0 text-center text-[10px] font-medium tracking-wide text-gray-600 uppercase hover:text-gray-900 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-primary-500 focus-visible:ring-offset-1 dark:text-gray-400 dark:hover:text-gray-100 dark:focus-visible:ring-primary-400 dark:focus-visible:ring-offset-gray-800"
@@ -82,5 +102,17 @@
 				{line.rtl ? 'RTL' : 'LTR'}
 			</button>
 		</InputAddon>
+		<InputAddon class="{addonClass} rounded-e-none!">
+			<button
+				type="button"
+				class="flex cursor-pointer items-center justify-center border-0 bg-transparent p-0 text-gray-500 hover:text-red-600 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-primary-500 focus-visible:ring-offset-1 disabled:cursor-not-allowed disabled:opacity-40 disabled:hover:text-gray-500 dark:text-gray-400 dark:hover:text-red-400 dark:focus-visible:ring-primary-400 dark:focus-visible:ring-offset-gray-800 dark:disabled:hover:text-gray-400"
+				title="Remove line"
+				aria-label="Remove line"
+				disabled={projectStore.lines.length <= 2}
+				onclick={removeThisLine}
+			>
+				<TrashBinOutline class="h-4 w-4 shrink-0" aria-hidden="true" />
+			</button>
+		</InputAddon>
 	</ButtonGroup>
 </div>
@@ -1,8 +1,14 @@
 <script lang="ts">
+	import { browser } from '$app/environment';
 	import { Button, Modal } from 'flowbite-svelte';
 	import { ALIGNER_SITE_HOST } from '$lib/brand.js';
 	import { encodeState } from '$lib/serialization/encode.js';
-	import { SCHEMA_VERSION, type AppStateV2 } from '$lib/serialization/schema.js';
+	import {
+		SCHEMA_VERSION,
+		defaultVisualSettingsV2,
+		type AppStateV2,
+		type VisualSettingsV2
+	} from '$lib/serialization/schema.js';
 	import { projectStore } from '$lib/state/project.svelte.js';
 	import { settingsStore } from '$lib/state/settings.svelte.js';
 	import { getShareUrl } from '$lib/share/url.js';
@@ -13,6 +19,7 @@
 	let qrSrc = $state<string | null>(null);
 	let qrErr = $state<string | null>(null);
 	let qrLoading = $state(false);
+	let dataObjectCopied = $state(false);
 
 	/** Called from parent via `bind:this` */
 	export function open() {
@@ -85,6 +92,49 @@
 		a.download = 'alignment-share-qr.png';
 		a.click();
 	}
+
+	function visualSettingsDiff(): Partial<VisualSettingsV2> | undefined {
+		const cur = settingsStore.settings;
+		const d = defaultVisualSettingsV2();
+		const patch: Partial<VisualSettingsV2> = {};
+		(keysOfVisualSettings() as (keyof VisualSettingsV2)[]).forEach((k) => {
+			if (cur[k] !== d[k]) (patch as Record<string, unknown>)[k] = cur[k];
+		});
+		return Object.keys(patch).length ? patch : undefined;
+	}
+
+	function keysOfVisualSettings(): (keyof VisualSettingsV2)[] {
+		return Object.keys(defaultVisualSettingsV2()) as (keyof VisualSettingsV2)[];
+	}
+
+	/** JSON shaped like `ExampleEntry` in `src/lib/state/examples.ts` (placeholders for id/label). */
+	function buildExampleDataObject(): Record<string, unknown> {
+		const snap = projectStore.getSnapshot();
+		const out: Record<string, unknown> = {
+			format: 'bitext-example-candidate-v1',
+			id: '<ExampleId>',
+			label: '<Example label>',
+			lines: snap.lines.map((l) => ({ ...l, font: { ...l.font } })),
+			connections: snap.connections.map((c) => [c.upperTokenId, c.lowerTokenId])
+		};
+		if (snap.pairControls.length) {
+			out.pairControls = snap.pairControls.map((p) => ({ ...p }));
+		}
+		if (snap.linePairGaps.length) {
+			out.linePairGaps = snap.linePairGaps.map((g) => ({ ...g }));
+		}
+		const diff = visualSettingsDiff();
+		if (diff) out.settings = diff;
+		return out;
+	}
+
+	async function copyDataObject() {
+		if (!browser) return;
+		const text = JSON.stringify(buildExampleDataObject(), null, '\t');
+		await navigator.clipboard.writeText(text);
+		dataObjectCopied = true;
+		setTimeout(() => (dataObjectCopied = false), 2000);
+	}
 </script>
 
 <Modal bind:open={modalOpen} title="Share" size="md">
@@ -127,6 +177,14 @@
 					Download QR (PNG)
 				</Button>
 			{/if}
+			<Button
+				color="alternative"
+				size="sm"
+				class="shrink-0 border border-gray-200 bg-gray-50 text-gray-500 hover:bg-gray-100 dark:border-gray-600 dark:bg-gray-800/80 dark:text-gray-400 dark:hover:bg-gray-800"
+				onclick={copyDataObject}
+			>
+				{dataObjectCopied ? 'Copied!' : 'Data object'}
+			</Button>
 		</div>
 	</div>
 </Modal>
@@ -35,7 +35,7 @@ export const MAX_TEXT_SIZE_PX = 64;
 export const DEFAULT_WORD_GAP_PX = 14;
 export const MIN_WORD_GAP_PX = 0;
 export const MAX_WORD_GAP_PX = 56;
-export const DEFAULT_TOKEN_SPLIT_CHARS = '.-';
+export const DEFAULT_TOKEN_SPLIT_CHARS = '.-|';
 /** Default join character for new projects; omits from compact when equal. */
 export const DEFAULT_TOKEN_MERGE_CHAR = '+';
 
 
@@ -1,6 +1,12 @@
-import { DEFAULT_WORD_GAP_PX, type LineV2 } from '$lib/serialization/schema.js';
+import {
+	DEFAULT_WORD_GAP_PX,
+	type LinePairGapV2,
+	type LineV2,
+	type PairControlV2,
+	type VisualSettingsV2
+} from '$lib/serialization/schema.js';
 
-export type ExampleId = 'simple' | 'transcription' | 'rtl' | 'cjk';
+export type ExampleId = 'simple' | 'glosses' | 'rtl' | 'tagalog' | 'cjk';
 
 /** Token id pair `[upperLineId-index, lowerLineId-index]` connected after the snapshot loads. */
 export type ExampleConnection = readonly [string, string];
@@ -9,6 +15,16 @@ export interface ExampleEntry {
 	id: ExampleId;
 	label: string;
 	lines: LineV2[];
+	/** Per-pair connector visibility (e.g., hide connectors between text and its tightly-stacked gloss row). */
+	pairControls?: PairControlV2[];
+	/** Per-pair vertical gaps (px); omit a pair to use the default. */
+	linePairGaps?: LinePairGapV2[];
+	/**
+	 * Tokenizer / visual setting overrides applied while the example is loaded.
+	 * Token-related fields are reset to defaults first, so customizations from a previous
+	 * example never leak across loads.
+	 */
+	settings?: Partial<VisualSettingsV2>;
 	connections: ExampleConnection[];
 }
 
@@ -37,10 +53,12 @@ const noto = (
 
 /**
  * Curated, opinionated set of preset alignments shown in the “Load example” dropdown.
- * Each entry is a self-contained project: lines + connections to draw between them.
+ * Each entry is a self-contained project: lines, optional pair controls / gaps / settings,
+ * plus the connections to draw between them.
  *
- * Connection ids must reference `lineId-tokenIndex` after whitespace tokenization with the
- * default visual settings; do not rely on user-customized split/merge characters here.
+ * Connection ids reference `lineId-tokenIndex` after tokenization with the example’s
+ * effective settings (the loader resets token settings to defaults before applying
+ * `example.settings`).
  */
 export const EXAMPLES: readonly ExampleEntry[] = [
 	{
@@ -54,61 +72,123 @@ export const EXAMPLES: readonly ExampleEntry[] = [
 		]
 	},
 	{
-		id: 'transcription',
-		label: 'Turkish with IPA (3 lines)',
+		// Turkish interlinear: morpheme glosses → IPA → segmented text → free translation.
+		// Only `|` splits tokens here so glosses can show a literal hyphen between stem and tag
+		// (`garden|-|LOC` → garden, -, LOC) while Turkish/IPA use plain `bahçe|de` / `bahtʃe|de`.
+		id: 'glosses',
+		label: 'Turkish interlinear (IPA + glosses)',
 		lines: [
-			inter('Merhaba dünya', 's', 34),
-			noto('meɾˈhaba dyzˈnja', 'ipa', 'Noto Sans', 28),
-			inter('Hello world', 't', 34)
+			inter('child garden|-|LOC play|-|PROG', 'gl', 22),
+			noto('tʃodʒuk bahtʃe|de ojnu|joɾ', 'ipa', 'Noto Sans', 26),
+			inter('Çocuk bahçe|de oynu|yor', 's', 36),
+			inter('The child is+playing in the garden', 't', 30)
+		],
+		settings: { tokenSplitChars: '|' },
+		// Top three rows form one interlinear block: tight vertical spacing, no link lines.
+		// The free translation sits at a normal distance below, with full link drawing.
+		pairControls: [
+			{ upperLineId: 'gl', lowerLineId: 'ipa', showConnectors: false },
+			{ upperLineId: 'ipa', lowerLineId: 's', showConnectors: false }
+		],
+		linePairGaps: [
+			{ upperLineId: 'gl', lowerLineId: 'ipa', gapPx: 16 },
+			{ upperLineId: 'ipa', lowerLineId: 's', gapPx: 16 }
 		],
 		connections: [
-			['s-0', 'ipa-0'],
-			['s-1', 'ipa-1'],
-			['ipa-0', 't-0'],
-			['ipa-1', 't-1']
+			// Interlinear: seven gloss tokens vs five IPA/Turkish tokens; hyphen-only gloss
+			// tokens have no counterpart in orthography/IPA.
+			['gl-0', 'ipa-0'],
+			['gl-1', 'ipa-1'],
+			['gl-3', 'ipa-2'],
+			['gl-4', 'ipa-3'],
+			['gl-6', 'ipa-4'],
+			['ipa-0', 's-0'],
+			['ipa-1', 's-1'],
+			['ipa-2', 's-2'],
+			['ipa-3', 's-3'],
+			['ipa-4', 's-4'],
+			// Segmented Turkish ↔ free translation. English uses the default merge char (`+`)
+			// so “is playing” is one alignment token while still displaying as two words.
+			['s-0', 't-1'], // Çocuk → child
+			['s-1', 't-5'], // bahçe → garden
+			['s-2', 't-3'], // de → in
+			['s-3', 't-2'], // oynu → is+playing
+			['s-4', 't-2'] // yor → is+playing
+			// `t-0` (The) and `t-4` (the) intentionally unaligned: English-only definiteness.
 		]
 	},
 	{
+		// Hebrew → Arabic → English. Two right-to-left scripts compared against an LTR
+		// translation. Hebrew writes the preposition bound to the noun (`בבית`); we mark the
+		// morpheme boundary with `-` in the editor so it splits under the default tokenizer.
 		id: 'rtl',
-		label: 'Hebrew + Arabic + English (RTL, merged ב+בית / في+البيت)',
+		label: 'Hebrew + Arabic + English (right-to-left)',
+		lines: [
+			noto('אני גר ב-בית גדול', 'he', 'Noto Sans Hebrew', 36, true),
+			noto('أنا أسكن في بيت كبير', 'ar', 'Noto Sans Arabic', 36, true),
+			inter('I live in a big house', 'en', 30)
+		],
+		connections: [
+			// Hebrew (5 tokens after the `-` split) ↔ Arabic (5 tokens). Both put the
+			// adjective after the noun, so the rows are parallel — no crossings.
+			['he-0', 'ar-0'], // אני ↔ أنا
+			['he-1', 'ar-1'], // גר ↔ أسكن
+			['he-2', 'ar-2'], // ב ↔ في
+			['he-3', 'ar-3'], // בית ↔ بيت
+			['he-4', 'ar-4'], // גדול ↔ كبير
+			// Arabic ↔ English. Adjective-noun order flips, so the last two links cross.
+			['ar-0', 'en-0'], // أنا ↔ I
+			['ar-1', 'en-1'], // أسكن ↔ live
+			['ar-2', 'en-2'], // في ↔ in
+			['ar-3', 'en-5'], // بيت ↔ house  (crossing)
+			['ar-4', 'en-4'] //  كبير ↔ big   (crossing)
+			// `en-3` (a) intentionally unaligned: Hebrew/Arabic have no indefinite article.
+		]
+	},
+	{
+		// Tagalog compounds often contain hyphens that should remain inside a word rather than
+		// becoming alignment boundaries. This example disables `-` as a split character while
+		// keeping the predicate-initial Tagalog sentence aligned to a natural English translation.
+		id: 'tagalog',
+		label: 'Tagalog compounds (keep hyphens)',
 		lines: [
-			// `+` is the default merge character: bound morphemes stay one alignment token but show
-			// with a space in the preview (e.g. Hebrew inseparable preposition + noun vs. English
-			// “at home” as two words).
-			noto('אני אוהב ב+בית', 'he', 'Noto Sans Hebrew', 34, true),
-			noto('أنا أحب في+البيت', 'ar', 'Noto Sans Arabic', 34, true),
-			inter('I love at home', 'en', 32)
+			noto('Maganda ang bahay-kubo sa tabing-ilog', 'tl', 'Noto Sans', 34),
+			inter('The nipa+hut by the river is beautiful', 'en', 30)
 		],
+		settings: { tokenSplitChars: '.' },
 		connections: [
-			['he-0', 'ar-0'],
-			['he-1', 'ar-1'],
-			['he-2', 'ar-2'],
-			['ar-0', 'en-0'],
-			['ar-1', 'en-1'],
-			['ar-2', 'en-2'],
-			['ar-2', 'en-3']
+			['tl-0', 'en-6'], // Maganda → beautiful
+			['tl-1', 'en-0'], // ang → The
+			['tl-2', 'en-1'], // bahay-kubo → nipa+hut
+			['tl-3', 'en-2'], // sa → by
+			['tl-4', 'en-4'] // tabing-ilog → river
 		]
 	},
 	{
+		// Japanese (SOV) ↔ Chinese (SVO) ↔ English (SVO). Putting two related East-Asian
+		// languages side by side highlights how the verb travels in alignment, while CJK
+		// scripts share most content morphemes (今日/今天, 本/书, 読/读).
+		// Word boundaries are inserted with spaces because neither script uses them
+		// natively — the alignment tool needs explicit token boundaries to draw links.
 		id: 'cjk',
-		label: 'Japanese + Chinese + English',
+		label: 'Japanese + Chinese + English (SOV ↔ SVO)',
 		lines: [
-			// Word/phrase boundaries marked with spaces — CJK scripts have no native word
-			// separators, and an alignment tool needs explicit token boundaries to draw links.
-			// Horizontal Japanese and Chinese are laid out LTR here (standard typography).
-			noto('私は 本を 読む', 'ja', 'Noto Sans JP', 34),
-			noto('我 读 书', 'zh', 'Noto Sans SC', 34),
-			inter('I read books', 'en', 32)
+			noto('今日 私は 本を 読みました', 'ja', 'Noto Sans JP', 34),
+			noto('今天 我 读了 书', 'zh', 'Noto Sans SC', 34),
+			inter('Today I read a book', 'en', 30)
 		],
-		// Japanese is SOV (verb last) while Chinese/English are SVO — verb/object swap shows
-		// up as crossing connectors between the Japanese and Chinese rows.
 		connections: [
-			['ja-0', 'zh-0'],
-			['ja-1', 'zh-2'],
-			['ja-2', 'zh-1'],
-			['zh-0', 'en-0'],
-			['zh-1', 'en-1'],
-			['zh-2', 'en-2']
+			// Japanese ↔ Chinese: the object precedes the verb in Japanese (本を 読みました)
+			// but follows it in Chinese (读了 书) — the swap shows up as a clean crossing.
+			['ja-0', 'zh-0'], // 今日 ↔ 今天
+			['ja-1', 'zh-1'], // 私は ↔ 我
+			['ja-2', 'zh-3'], // 本を ↔ 书   (crossing)
+			['ja-3', 'zh-2'], // 読みました ↔ 读了 (crossing)
+			// Chinese ↔ English: parallel SVO. English “a” has no Chinese counterpart.
+			['zh-0', 'en-0'], // 今天 ↔ Today
+			['zh-1', 'en-1'], // 我 ↔ I
+			['zh-2', 'en-2'], // 读了 ↔ read
+			['zh-3', 'en-4'] //  书 ↔ book
 		]
 	}
 ] as const;