Skip to content

Commit b8edcd1

Browse files
committed
examples attribution
1 parent 79c5ef2 commit b8edcd1

3 files changed

Lines changed: 49 additions & 19 deletions

File tree

bitext/src/lib/examples/catalog/wikipedia.ts

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
1-
import type { GalleryExampleEntry } from '../types-gallery.js';
1+
import type { GalleryExampleEntry, GallerySourceAttribution } from '../types-gallery.js';
22

3-
const WIKI =
4-
'Illustrative layout based on an example in the Wikipedia article “Interlinear gloss” (Leipzig-style conventions). Open in the editor to adapt lines, export PNG/SVG, or add IPA.';
3+
/** @see https://en.wikipedia.org/wiki/Interlinear_gloss */
4+
export const WIKIPEDIA_INTERLINEAR_GLOSS_SOURCE = {
5+
url: 'https://en.wikipedia.org/wiki/Interlinear_gloss',
6+
title: 'Interlinear gloss'
7+
} satisfies GallerySourceAttribution;
58

69
/** Gallery pages for Wikipedia “Interlinear gloss” article examples. */
7-
export const WIKIPEDIA_GALLERY: GalleryExampleEntry[] = [
10+
const WIKIPEDIA_GALLERY_ENTRIES = [
811
{
912
slug: 'classical-nahuatl-interlinear-gloss',
1013
exampleId: 'wiki-nahuatl',
1114
title: 'Classical Nahuatl interlinear gloss (word-by-word)',
1215
description:
1316
'Vertical interlinear text for Nahuatl: segmented verb morphology (ni-, c-, chihui, -lia) aligned to English “I made my son a house”.',
14-
body: `Languages: Classical Nahuatl → English. Shows the classic Humboldt-style vertical gloss where each morpheme in ni-c-chihui-lia in no-piltzin ce calli sits above an English word or phrase.\n\nWhat it demonstrates: applicative -lia, possessive no-, and how object-language morpheme order differs from English syntax. Useful for Uto-Aztecan fieldwork handouts and typology classes.\n\n${WIKI}`,
17+
body: `Languages: Classical Nahuatl → English. Shows the classic Humboldt-style vertical gloss where each morpheme in ni-c-chihui-lia in no-piltzin ce calli sits above an English word or phrase.\n\nWhat it demonstrates: applicative -lia, possessive no-, and how object-language morpheme order differs from English syntax. Useful for Uto-Aztecan fieldwork handouts and typology classes.`,
1518
imageAlt:
1619
'Nahuatl morpheme segmentation aligned to English gloss words for I made my son a house'
1720
},
@@ -21,7 +24,7 @@ export const WIKIPEDIA_GALLERY: GalleryExampleEntry[] = [
2124
title: 'Nahuatl with Leipzig Glossing Rules abbreviations',
2225
description:
2326
'Same Nahuatl clause with grammatical category labels (1SG.SUBJ, 3SG.OBJ, APPL) instead of full English gloss words.',
24-
body: `Languages: Classical Nahuatl → English translation. The middle line uses standard Leipzig abbreviations rather than literal English morpheme translations — the format linguists use in journals and grammars.\n\nWhat it demonstrates: switching from pedagogical glosses to LGR-style tags while keeping morpheme boundaries hyphen-aligned. Helpful for authors preparing interlinear text for publication.\n\n${WIKI}`,
27+
body: `Languages: Classical Nahuatl → English translation. The middle line uses standard Leipzig abbreviations rather than literal English morpheme translations — the format linguists use in journals and grammars.\n\nWhat it demonstrates: switching from pedagogical glosses to LGR-style tags while keeping morpheme boundaries hyphen-aligned. Helpful for authors preparing interlinear text for publication.`,
2528
imageAlt:
2629
'Nahuatl example with Leipzig glossing abbreviations 1SG SUBJ and APPL aligned to English'
2730
},
@@ -31,7 +34,7 @@ export const WIKIPEDIA_GALLERY: GalleryExampleEntry[] = [
3134
title: 'Taiwanese Minnan interlinear gloss',
3235
description:
3336
'Southern Min (Taiwanese) sentence with morpheme glosses and English: “I have not yet decided when I shall return”.',
34-
body: `Languages: Taiwanese Minnan (Hokkien, POJ romanization) → English. Based on the multi-line example in Wikipedia citing Ko & Tan’s beginner vocabulary — here condensed to gloss, source, and translation rows.\n\nWhat it demonstrates: tone-marked compounds (iáu-boē, koat-tēng), left-aligned word glosses, and a free translation that does not match source word order one-to-one.\n\n${WIKI}`,
37+
body: `Languages: Taiwanese Minnan (Hokkien, POJ romanization) → English. Based on the multi-line example in Wikipedia citing Ko & Tan’s beginner vocabulary — here condensed to gloss, source, and translation rows.\n\nWhat it demonstrates: tone-marked compounds (iáu-boē, koat-tēng), left-aligned word glosses, and a free translation that does not match source word order one-to-one.`,
3538
imageAlt:
3639
'Taiwanese Minnan sentence with interlinear English glosses for decide and return'
3740
},
@@ -41,7 +44,7 @@ export const WIKIPEDIA_GALLERY: GalleryExampleEntry[] = [
4144
title: 'Lezgian morpheme-by-morpheme interlinear gloss',
4245
description:
4346
'Lezgian (Northeast Caucasian) farm sentence — hyphen-aligned morphemes, OBL/GEN case tags, and FUT/NEG inflection.',
44-
body: `Languages: Lezgian → English. The Wikipedia example Gila abur-u-n ferma hamišaluǧ güǧüna amuqʼ-da-č illustrates the Leipzig rule that gloss lines must contain the same number of hyphens as the object line.\n\nWhat it demonstrates: ergative/absolutive-style case stacking on nouns, verb future + negation as separate morphemes, and a long free translation (“Now their farm will not stay behind forever”).\n\n${WIKI}`,
47+
body: `Languages: Lezgian → English. The Wikipedia example Gila abur-u-n ferma hamišaluǧ güǧüna amuqʼ-da-č illustrates the Leipzig rule that gloss lines must contain the same number of hyphens as the object line.\n\nWhat it demonstrates: ergative/absolutive-style case stacking on nouns, verb future + negation as separate morphemes, and a long free translation (“Now their farm will not stay behind forever”).`,
4548
imageAlt:
4649
'Lezgian interlinear gloss with OBL GEN case markers and English translation'
4750
},
@@ -51,7 +54,7 @@ export const WIKIPEDIA_GALLERY: GalleryExampleEntry[] = [
5154
title: 'Turkish infinitive gloss (one-to-many correspondence)',
5255
description:
5356
'Turkish çık-mak with gloss come.out-INF — one morpheme glossed as two English words using Leipzig period notation.',
54-
body: `Languages: Turkish → English. The verb çık-mak is a single token split into stem çık and infinitive -mak; the gloss come.out-INF uses a period to show one-to-many correspondence (standard Leipzig convention).\n\nWhat it demonstrates: minimal two-line interlinear for dictionary-style entries and how Aligner links one object token to multiple translation words.\n\n${WIKI}`,
57+
body: `Languages: Turkish → English. The verb çık-mak is a single token split into stem çık and infinitive -mak; the gloss come.out-INF uses a period to show one-to-many correspondence (standard Leipzig convention).\n\nWhat it demonstrates: minimal two-line interlinear for dictionary-style entries and how Aligner links one object token to multiple translation words.`,
5558
imageAlt: 'Turkish infinitive çık-mak aligned to English gloss come out INF'
5659
},
5760
{
@@ -60,7 +63,7 @@ export const WIKIPEDIA_GALLERY: GalleryExampleEntry[] = [
6063
title: 'Latin interlinear gloss with zero morpheme (ø)',
6164
description:
6265
'puer-ø glossed as boy-NOM — overt ø marks a null nominative case exponent in the object line.',
63-
body: `Languages: Latin → English. When a morpheme has no surface form but appears in the gloss, Leipzig conventions use an overt ø in the object text (puer-ø).\n\nWhat it demonstrates: non-overt category marking in interlinear layouts — common in Latin, Greek, and pro-drop languages when case is null on certain noun classes.\n\n${WIKI}`,
66+
body: `Languages: Latin → English. When a morpheme has no surface form but appears in the gloss, Leipzig conventions use an overt ø in the object text (puer-ø).\n\nWhat it demonstrates: non-overt category marking in interlinear layouts — common in Latin, Greek, and pro-drop languages when case is null on certain noun classes.`,
6467
imageAlt: 'Latin puer with zero morpheme aligned to NOM gloss boy'
6568
},
6669
{
@@ -69,7 +72,7 @@ export const WIKIPEDIA_GALLERY: GalleryExampleEntry[] = [
6972
title: 'Tagalog reduplication in interlinear gloss (~)',
7073
description:
7174
'Tagalog bi~bili glossed IPFV~buy — reduplication marked with tilde per Leipzig Glossing Rules.',
72-
body: `Languages: Tagalog → English. Reduplication for imperfective aspect is written bi~bili with a tilde connecting the copied syllable to the stem, mirrored in the gloss IPFV~buy.\n\nWhat it demonstrates: aspectual reduplication (distinct from the compound-hyphen Tagalog example elsewhere in this gallery). Useful for Austronesian morphology teaching.\n\n${WIKI}`,
75+
body: `Languages: Tagalog → English. Reduplication for imperfective aspect is written bi~bili with a tilde connecting the copied syllable to the stem, mirrored in the gloss IPFV~buy.\n\nWhat it demonstrates: aspectual reduplication (distinct from the compound-hyphen Tagalog example elsewhere in this gallery). Useful for Austronesian morphology teaching.`,
7376
imageAlt: 'Tagalog reduplicated verb bi~bili with IPFV gloss and English is buying'
7477
},
7578
{
@@ -78,7 +81,7 @@ export const WIKIPEDIA_GALLERY: GalleryExampleEntry[] = [
7881
title: 'Turkish case and verb morphology (Odadan hızlı çıktım)',
7982
description:
8083
'Turkish “I left the room quickly” — ablative -dan, comitative -lı, and past 1sg -tım aligned to English.',
81-
body: `Languages: Turkish → English. Wikipedia’s Odadan hızlı çıktım example shows morpheme-aligned glosses for case (ABL, COM) and verbal inflection (PFV, 1sg) on a single clause.\n\nWhat it demonstrates: agglutinative word structure and how English reorders “room”, “quickly”, and “left” relative to Turkish oda-dan hız-lı çık-tı-m.\n\n${WIKI}`,
84+
body: `Languages: Turkish → English. Wikipedia’s Odadan hızlı çıktım example shows morpheme-aligned glosses for case (ABL, COM) and verbal inflection (PFV, 1sg) on a single clause.\n\nWhat it demonstrates: agglutinative word structure and how English reorders “room”, “quickly”, and “left” relative to Turkish oda-dan hız-lı çık-tı-m.`,
8285
imageAlt:
8386
'Turkish sentence with ABL and COM case glosses aligned to I left the room quickly'
8487
},
@@ -88,7 +91,7 @@ export const WIKIPEDIA_GALLERY: GalleryExampleEntry[] = [
8891
title: 'French clitic pronouns in interlinear gloss (Je t’aime)',
8992
description:
9093
'French Je t’aime with morpheme gloss I you love — clitic pronoun t’ aligned separately from the verb.',
91-
body: `Languages: French → English. Clitics are often separated with a double hyphen in Leipzig notation; here t’ is its own token between Je and aime.\n\nWhat it demonstrates: proclitic object pronouns and crossing links when English “I love you” reorder clitic and verb relative to French.\n\n${WIKI}`,
94+
body: `Languages: French → English. Clitics are often separated with a double hyphen in Leipzig notation; here t’ is its own token between Je and aime.\n\nWhat it demonstrates: proclitic object pronouns and crossing links when English “I love you” reorder clitic and verb relative to French.`,
9295
imageAlt: 'French Je t aime with clitic gloss you aligned to English I love you'
9396
},
9497
{
@@ -97,7 +100,7 @@ export const WIKIPEDIA_GALLERY: GalleryExampleEntry[] = [
97100
title: 'Tagalog verbal aspect paradigm (interlinear)',
98101
description:
99102
'Four Tagalog verb forms — sulat, su~sulat, sumulat, sumusulat — with aspect/mood glosses and English equivalents.',
100-
body: `Languages: Tagalog → English. Wikipedia’s affixation section uses Tagalog write paradigms to show contemplative reduplication, agent trigger infixes, and combined forms.\n\nWhat it demonstrates: paradigm-style interlinear rows (multiple forms in one diagram) rather than a single sentence — handy for morphology cheat sheets.\n\n${WIKI}`,
103+
body: `Languages: Tagalog → English. Wikipedia’s affixation section uses Tagalog write paradigms to show contemplative reduplication, agent trigger infixes, and combined forms.\n\nWhat it demonstrates: paradigm-style interlinear rows (multiple forms in one diagram) rather than a single sentence — handy for morphology cheat sheets.`,
101104
imageAlt:
102105
'Tagalog verb paradigm sulat sumulat with aspect glosses and English translations'
103106
},
@@ -107,7 +110,7 @@ export const WIKIPEDIA_GALLERY: GalleryExampleEntry[] = [
107110
title: 'German dative plural with umlaut (unsern Vätern)',
108111
description:
109112
'German unser-n Väter-n glossed our-DAT.PL father\\PL-DAT.PL — syncretism and umlaut marked in Leipzig style.',
110-
body: `Languages: German → English. The plural umlaut in Väter is marked with a backslash in the gloss (father\\PL) when the boundary appears in only one line — another Leipzig punctuation rule.\n\nWhat it demonstrates: dative plural on both adjective and noun, and “to our fathers” as a phrasal English gloss.\n\n${WIKI}`,
113+
body: `Languages: German → English. The plural umlaut in Väter is marked with a backslash in the gloss (father\\PL) when the boundary appears in only one line — another Leipzig punctuation rule.\n\nWhat it demonstrates: dative plural on both adjective and noun, and “to our fathers” as a phrasal English gloss.`,
111114
imageAlt: 'German dative plural Vätern with DAT PL glosses aligned to to our fathers'
112115
},
113116
{
@@ -116,7 +119,7 @@ export const WIKIPEDIA_GALLERY: GalleryExampleEntry[] = [
116119
title: 'Avar interlinear gloss (Caucasian agreement)',
117120
description:
118121
'Avar “We didn’t steal your camel” — ergative agreement, genitive, and negation on a single verb complex.',
119-
body: `Languages: Avar (Northeast Caucasian) → English. From Wikipedia’s automatic-glossing section: mi-s ħumukuli elu-ab-okʼekʼ-asi anu with rich prefixal agreement and negation.\n\nWhat it demonstrates: how one verb token maps to multiple English words (didn’t steal) and non-linear alignment between translation and morpheme order.\n\n${WIKI}`,
122+
body: `Languages: Avar (Northeast Caucasian) → English. From Wikipedia’s automatic-glossing section: mi-s ħumukuli elu-ab-okʼekʼ-asi anu with rich prefixal agreement and negation.\n\nWhat it demonstrates: how one verb token maps to multiple English words (didn’t steal) and non-linear alignment between translation and morpheme order.`,
120123
imageAlt:
121124
'Avar sentence with ergative agreement glosses aligned to English We did not steal your camel'
122125
},
@@ -126,7 +129,7 @@ export const WIKIPEDIA_GALLERY: GalleryExampleEntry[] = [
126129
title: 'Lojban interlinear gloss with sumti placeholders',
127130
description:
128131
'Lojban washing sentence with x1–x4 placeholders — logical language sumti aligned to English.',
129-
body: `Languages: Lojban → English. mi lumci le creka le grasu le rirxe (“I wash the grease off the shirt in the river”) uses repeated le for nested sumti; glosses mark discourse referents (shirt=x2, grease=x3, river=x4).\n\nWhat it demonstrates: constructed-language interlinear for teaching Lojban place structure and how DET slots differ from English “the”.\n\n${WIKI}`,
132+
body: `Languages: Lojban → English. mi lumci le creka le grasu le rirxe (“I wash the grease off the shirt in the river”) uses repeated le for nested sumti; glosses mark discourse referents (shirt=x2, grease=x3, river=x4).\n\nWhat it demonstrates: constructed-language interlinear for teaching Lojban place structure and how DET slots differ from English “the”.`,
130133
imageAlt:
131134
'Lojban sentence with sumti placeholder glosses x1 x2 aligned to English wash shirt river'
132135
},
@@ -136,8 +139,15 @@ export const WIKIPEDIA_GALLERY: GalleryExampleEntry[] = [
136139
title: 'Russian case and gender agreement (evening run)',
137140
description:
138141
'Russian “In the evening I ran to the store” — instrumental, feminine past, and accusative in one interlinear block.',
139-
body: `Languages: Russian (Latin transliteration) → English. Vecher-om ya pobeja-la v magazin from Wikipedia’s paradigm-extraction example shows instrumental time adverb, gendered past -la, and accusative object.\n\nWhat it demonstrates: inflectional morphology on nouns and verbs and a free translation whose word order differs from the gloss line.\n\n${WIKI}`,
142+
body: `Languages: Russian (Latin transliteration) → English. Vecher-om ya pobeja-la v magazin from Wikipedia’s paradigm-extraction example shows instrumental time adverb, gendered past -la, and accusative object.\n\nWhat it demonstrates: inflectional morphology on nouns and verbs and a free translation whose word order differs from the gloss line.`,
140143
imageAlt:
141144
'Russian interlinear gloss with INS case and PFV PST FEM aligned to English evening ran store'
142145
}
143-
];
146+
] satisfies Omit<GalleryExampleEntry, 'sourceAttribution'>[];
147+
148+
export const WIKIPEDIA_GALLERY: GalleryExampleEntry[] = WIKIPEDIA_GALLERY_ENTRIES.map(
149+
(entry) => ({
150+
...entry,
151+
sourceAttribution: WIKIPEDIA_INTERLINEAR_GLOSS_SOURCE
152+
})
153+
);
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
11
import type { ExampleId } from '$lib/state/examples.js';
22

3+
export interface GallerySourceAttribution {
4+
url: string;
5+
title: string;
6+
}
7+
38
export interface GalleryExampleEntry {
49
slug: string;
510
exampleId: ExampleId;
611
title: string;
712
description: string;
813
body: string;
914
imageAlt: string;
15+
/** External source for adapted examples (e.g. Wikipedia). */
16+
sourceAttribution?: GallerySourceAttribution;
1017
}

bitext/src/routes/examples/[slug]/+page.svelte

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,19 @@
6262
<p class="max-w-prose text-base leading-relaxed">{paragraph}</p>
6363
{/each}
6464

65+
{#if entry.sourceAttribution}
66+
<p class="mt-6 max-w-prose text-sm leading-relaxed text-gray-600 dark:text-gray-400">
67+
Example layout based on an example in the Wikipedia article
68+
<a
69+
href={entry.sourceAttribution.url}
70+
class={linkClass}
71+
target="_blank"
72+
rel="noopener noreferrer">{entry.sourceAttribution.title}</a
73+
>
74+
(illustrative; Leipzig-style conventions).
75+
</p>
76+
{/if}
77+
6578
<figure class="my-8 m-0">
6679
<div class="overflow-hidden rounded-md border border-gray-200 bg-gray-50 dark:border-gray-700 dark:bg-gray-900/40">
6780
<img

0 commit comments

Comments
 (0)