Skip to content

Commit ca7cb21

Browse files
authored
LT-22203: punct only line; multiple punct; rare no morph (#405)
Change-Id: Ib92c66d6a7db544086f1413e59f04eaaffa1f536
1 parent b6fdac4 commit ca7cb21

7 files changed

Lines changed: 28025 additions & 105 deletions

File tree

DistFiles/Language Explorer/Export Templates/Interlinear/xml2XLingPapCommonConcatMorphemes.xsl

Lines changed: 139 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -66,113 +66,150 @@ Elements to ignore or are handled elsewhere
6666
<!-- <xsl:for-each select="words/word[item/@type='txt' or morphemes][1]/descendant-or-self::item">
6767
-->
6868
<lineGroup>
69-
<xsl:for-each select="words/word[item/@type='txt' or morphemes][1]/descendant-or-self::item | words/word/morphemes/morph[1][not(item)]">
70-
<xsl:variable name="sLang" select="@lang"/>
71-
<xsl:choose>
72-
<xsl:when test="parent::word ">
73-
<xsl:if test="@type='txt' or @type='punct'">
74-
<!-- word -->
75-
<line>
76-
<!-- word -->
77-
<xsl:for-each select="ancestor::words/word/item[@type='txt' and @lang=$sLang]">
78-
<wrd>
79-
<langData>
80-
<xsl:variable name="iBaselineSiblingsCount" select="count(preceding-sibling::item[@type='txt'])"/>
81-
<xsl:call-template name="GetWordLangAttribute"/>
82-
<xsl:if test="$iBaselineSiblingsCount=0">
83-
<!-- prepend any initial punctuation only to the first line -->
84-
<xsl:for-each select="../preceding-sibling::word[1]/item[@type='punct']">
85-
<xsl:choose>
86-
<xsl:when test="count(../preceding-sibling::word)=0">
87-
<!-- it's the first word item -->
88-
<xsl:value-of select="normalize-space(.)"/>
89-
</xsl:when>
90-
<xsl:when test="../preceding-sibling::word[1][item/@type='punct']">
91-
<!-- there are other punct items before it -->
92-
<xsl:variable name="iPreviousTextItem" select="count(../preceding-sibling::word[item/@type='txt'])"/>
69+
<xsl:variable name="iPunct" select="count(words/word[item[@type='punct']])"/>
70+
<xsl:variable name="iWord" select="count(words/word)"/>
71+
<xsl:choose>
72+
<xsl:when test="$iPunct = $iWord">
73+
<!-- every word is punctuation; still create a line using each word's language -->
74+
<line>
75+
<xsl:for-each select="parent::words/word">
76+
<wrd lang="{item/@lang}-baseline">
77+
<xsl:value-of select="item"/>
78+
</wrd>
79+
</xsl:for-each>
80+
</line>
81+
</xsl:when>
82+
<xsl:otherwise>
83+
<!-- this is the case before handling punctuation only lines -->
84+
<xsl:for-each select="words/word[item/@type='txt' or morphemes][1]/descendant-or-self::item | words/word/morphemes/morph[1][not(item)]">
85+
<xsl:variable name="sLang" select="@lang"/>
86+
<xsl:choose>
87+
<xsl:when test="parent::word">
88+
<xsl:if test="@type='txt' or @type='punct'">
89+
<!-- word -->
90+
<line>
91+
<!-- word -->
92+
<xsl:for-each select="ancestor::words/word/item[@type='txt' and @lang=$sLang]">
93+
<wrd>
94+
<langData>
95+
<xsl:variable name="iBaselineSiblingsCount" select="count(preceding-sibling::item[@type='txt'])"/>
96+
<xsl:call-template name="GetWordLangAttribute"/>
97+
<xsl:if test="$iBaselineSiblingsCount=0">
98+
<!-- prepend any initial punctuation only to the first line -->
99+
<xsl:for-each select="../preceding-sibling::word[1]/item[@type='punct']">
93100
<xsl:choose>
94-
<xsl:when test="$iPreviousTextItem=0">
95-
<!-- everything before is punctuation; prepend them all -->
96-
<xsl:for-each select="../preceding-sibling::word[item/@type='punct']">
101+
<xsl:when test="count(../preceding-sibling::word)=0">
102+
<!-- it's the first word item -->
97103
<xsl:value-of select="normalize-space(.)"/>
98-
</xsl:for-each>
99-
</xsl:when>
100-
<xsl:otherwise>
101-
<!-- assume only the last one is preceding punct -->
104+
</xsl:when>
105+
<xsl:when test="../preceding-sibling::word[1][item/@type='punct']">
106+
<!-- there are other punct items before it -->
107+
<xsl:variable name="iPreviousTextItem" select="count(../preceding-sibling::word[item/@type='txt'])"/>
108+
<xsl:choose>
109+
<xsl:when test="$iPreviousTextItem=0">
110+
<!-- everything before is punctuation; prepend them all -->
111+
<xsl:for-each select="../preceding-sibling::word[item/@type='punct']">
112+
<xsl:value-of select="normalize-space(.)"/>
113+
</xsl:for-each>
114+
<!-- include this one, too -->
115+
<xsl:value-of select="normalize-space(.)"/>
116+
</xsl:when>
117+
<xsl:otherwise>
118+
<!-- assume only the last one is preceding punct -->
119+
<xsl:value-of select="normalize-space(.)"/>
120+
</xsl:otherwise>
121+
</xsl:choose>
122+
</xsl:when>
123+
<xsl:when test="contains(.,'(') or contains(.,'[') or contains(.,'{') or contains(.,'“') or contains(.,'‘')">
124+
<!-- there are other preceding word items; look for preceding punctuation N.B. may well need to look for characters, too -->
102125
<xsl:value-of select="normalize-space(.)"/>
126+
</xsl:when>
127+
</xsl:choose>
128+
</xsl:for-each>
129+
</xsl:if>
130+
<!-- output the word -->
131+
<xsl:value-of select="normalize-space(.)"/>
132+
<xsl:if test="$iBaselineSiblingsCount=0">
133+
<!-- append any following punctuation only to the first line -->
134+
<xsl:if test="../following-sibling::word[1]/item[@type='punct']">
135+
<xsl:variable name="iFollowingTextItem" select="count(../following-sibling::word[item/@type='txt'])"/>
136+
<xsl:choose>
137+
<xsl:when test="$iFollowingTextItem=0">
138+
<!-- everything after is punctuation; append them all -->
139+
<xsl:for-each select="../following-sibling::word[item/@type='punct']">
140+
<xsl:value-of select="normalize-space(translate(.,'§',''))"/>
141+
</xsl:for-each>
142+
</xsl:when>
143+
<xsl:otherwise>
144+
<xsl:for-each select="../following-sibling::word[1]/item[@type='punct']">
145+
<xsl:if test="not(contains(.,'(') or contains(.,'[') or contains(.,'{') or contains(.,'“') or contains(.,'‘'))">
146+
<!-- skip any preceding punctuation N.B. may well need to look for characters, too -->
147+
<xsl:value-of select="normalize-space(translate(.,'§',''))"/>
148+
</xsl:if>
149+
</xsl:for-each>
150+
<!-- check for a second consecutive punctuation item -->
151+
<xsl:for-each select="../following-sibling::word[2]/item[@type='punct']">
152+
<xsl:if test="not(contains(.,'(') or contains(.,'[') or contains(.,'{') or contains(.,'“') or contains(.,'‘'))">
153+
<!-- skip any preceding punctuation N.B. may well need to look for characters, too -->
154+
<xsl:value-of select="normalize-space(translate(.,'§',''))"/>
155+
</xsl:if>
156+
</xsl:for-each>
103157
</xsl:otherwise>
104158
</xsl:choose>
105-
</xsl:when>
106-
<xsl:when test="contains(.,'(') or contains(.,'[') or contains(.,'{') or contains(.,'“') or contains(.,'‘')">
107-
<!-- there are other preceding word items; look for preceding punctuation N.B. may well need to look for characters, too -->
108-
<xsl:value-of select="normalize-space(.)"/>
109-
</xsl:when>
110-
</xsl:choose>
111-
</xsl:for-each>
112-
</xsl:if>
113-
<!-- output the word -->
114-
<xsl:value-of select="normalize-space(.)"/>
115-
<xsl:if test="$iBaselineSiblingsCount=0">
116-
<!-- append any following punctuation only to the first line -->
117-
<xsl:if test="../following-sibling::word[1]/item[@type='punct']">
118-
<xsl:variable name="iFollowingTextItem" select="count(../following-sibling::word[item/@type='txt'])"/>
119-
<xsl:choose>
120-
<xsl:when test="$iFollowingTextItem=0">
121-
<!-- everything after is punctuation; append them all -->
122-
<xsl:for-each select="../following-sibling::word[item/@type='punct']">
123-
<xsl:value-of select="normalize-space(translate(.,'§',''))"/>
124-
</xsl:for-each>
125-
</xsl:when>
126-
<xsl:otherwise>
127-
<xsl:for-each select="../following-sibling::word[1]/item[@type='punct']">
128-
<xsl:if test="not(contains(.,'(') or contains(.,'[') or contains(.,'{') or contains(.,'“') or contains(.,'‘'))">
129-
<!-- skip any preceding punctuation N.B. may well need to look for characters, too -->
130-
<xsl:value-of select="normalize-space(translate(.,'§',''))"/>
131-
</xsl:if>
132-
</xsl:for-each>
133-
<!-- check for a second consecutive punctuation item -->
134-
<xsl:for-each select="../following-sibling::word[2]/item[@type='punct']">
135-
<xsl:if test="not(contains(.,'(') or contains(.,'[') or contains(.,'{') or contains(.,'“') or contains(.,'‘'))">
136-
<!-- skip any preceding punctuation N.B. may well need to look for characters, too -->
137-
<xsl:value-of select="normalize-space(translate(.,'§',''))"/>
138-
</xsl:if>
139-
</xsl:for-each>
140-
</xsl:otherwise>
141-
</xsl:choose>
142-
</xsl:if>
143-
</xsl:if>
144-
</langData>
145-
</wrd>
146-
</xsl:for-each>
147-
</line>
148-
</xsl:if>
149-
<!-- word gloss -->
150-
<xsl:call-template name="OutputLineOfWrdElementsFromWord">
151-
<xsl:with-param name="sType" select="'gls'"/>
152-
<xsl:with-param name="sLang" select="$sLang"/>
153-
</xsl:call-template>
154-
<!-- word cat -->
155-
<xsl:call-template name="OutputLineOfWrdElementsFromWord">
156-
<xsl:with-param name="sType" select="'pos'"/>
157-
<xsl:with-param name="sLang" select="$sLang"/>
158-
</xsl:call-template>
159-
</xsl:when>
160-
<xsl:when test="name()='morph'">
161-
<!-- first word does not have an analysis -->
162-
<xsl:for-each select="ancestor::word/following-sibling::word[1]/morphemes/morph/item">
163-
<xsl:variable name="sLang2" select="@lang"/>
164-
<xsl:call-template name="ProcessMorphItem">
165-
<xsl:with-param name="sLang" select="$sLang2"/>
166-
</xsl:call-template>
167-
</xsl:for-each>
168-
</xsl:when>
169-
<xsl:when test="parent::morph[count(preceding-sibling::morph)=0]">
170-
<xsl:call-template name="ProcessMorphItem">
171-
<xsl:with-param name="sLang" select="$sLang"/>
172-
</xsl:call-template>
173-
</xsl:when>
174-
</xsl:choose>
175-
</xsl:for-each>
159+
</xsl:if>
160+
</xsl:if>
161+
</langData>
162+
</wrd>
163+
</xsl:for-each>
164+
</line>
165+
</xsl:if>
166+
<!-- word gloss -->
167+
<xsl:call-template name="OutputLineOfWrdElementsFromWord">
168+
<xsl:with-param name="sType" select="'gls'"/>
169+
<xsl:with-param name="sLang" select="$sLang"/>
170+
</xsl:call-template>
171+
<!-- word cat -->
172+
<xsl:call-template name="OutputLineOfWrdElementsFromWord">
173+
<xsl:with-param name="sType" select="'pos'"/>
174+
<xsl:with-param name="sLang" select="$sLang"/>
175+
</xsl:call-template>
176+
</xsl:when>
177+
<xsl:when test="name()='morph'">
178+
<!-- first word does not have an analysis -->
179+
<xsl:if test="not(ancestor::word[preceding-sibling::word/morphemes/morph/item])">
180+
<!-- avoid some duplications -->
181+
<xsl:variable name="nextWord" select="ancestor::word/following-sibling::word[1]"/>
182+
<xsl:choose>
183+
<xsl:when test="not($nextWord/morphemes)">
184+
<!-- very rare case: the next word does not even have blank analysis items
185+
find the next word which has filled in analysis items -->
186+
<xsl:for-each select="ancestor::word/following-sibling::word[morphemes/morph/item][1]/morphemes/morph/item">
187+
<xsl:variable name="sLang2" select="@lang"/>
188+
<xsl:call-template name="ProcessMorphItem">
189+
<xsl:with-param name="sLang" select="$sLang2"/>
190+
</xsl:call-template>
191+
</xsl:for-each>
192+
</xsl:when>
193+
<xsl:otherwise>
194+
<xsl:for-each select="ancestor::word/following-sibling::word[1]/morphemes/morph/item">
195+
<xsl:variable name="sLang2" select="@lang"/>
196+
<xsl:call-template name="ProcessMorphItem">
197+
<xsl:with-param name="sLang" select="$sLang2"/>
198+
</xsl:call-template>
199+
</xsl:for-each>
200+
</xsl:otherwise>
201+
</xsl:choose>
202+
</xsl:if>
203+
</xsl:when>
204+
<xsl:when test="parent::morph[count(preceding-sibling::morph)=0]">
205+
<xsl:call-template name="ProcessMorphItem">
206+
<xsl:with-param name="sLang" select="$sLang"/>
207+
</xsl:call-template>
208+
</xsl:when>
209+
</xsl:choose>
210+
</xsl:for-each>
211+
</xsl:otherwise>
212+
</xsl:choose>
176213
</lineGroup>
177214
<xsl:for-each select="item">
178215
<xsl:choose>

Src/LexText/Interlinear/ITextDllTests/XLingPaperExporterTests.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,13 @@ public void FixtureSetup()
3131
m_xmlTransform.Load(m_sTransformPath);
3232
}
3333

34+
[TestCase("BruceCoxEmpty")]
3435
[TestCase("Gilaki01")]
3536
[TestCase("HalbiBUD2")]
3637
[TestCase("HalbiCS3")]
3738
[TestCase("HalbiST1")]
3839
[TestCase("Jibiyal2Texts")]
40+
[TestCase("Jibiyal3Text")]
3941
[TestCase("nszEnglishWords")]
4042
[TestCase("SETCorn")]
4143
[TestCase("Urim2Kids")]

0 commit comments

Comments
 (0)