Skip to content

Commit 892ddfe

Browse files
Fix template text extraction for Lang, Native name, and Nihongo templates (#828)
## Problem Templates like `{{lang|nap|Abbrùzzu}}` and `{{Nihongo2|東京都}}` in Wikipedia infoboxes were not being extracted, resulting in missing text content in DBpedia. ## Root Cause The `Lang` template was configured to extract parameter 3, but `{{lang}}` only has 2 parameters. Additionally, `Native name`, `Nihongo`, and `Nihongo2` templates were not configured. ## Fix Updated [templatetransform.json](cci:7://file:///c:/Users/Vaibhav%20Mishra/gsoc2026/prr5/extraction-framework/core/src/main/resources/templatetransform.json:0:0-0:0): - **Lang**: Extract param 2 (was incorrectly param 3) - **Native name|native_name**: Added - extracts param 2 - **Nihongo2**: Added - extracts param 1 - **Nihongo**: Added - extracts param 2 ## Examples | Template | Before | After | |----------|--------|-------| | `{{lang\|nap\|Abbrùzzu}}` | *(empty)* | Abbrùzzu | | `{{Nihongo2\|東京都}}` | *(empty)* | 東京都 |
1 parent 9e66903 commit 892ddfe

2 files changed

Lines changed: 60 additions & 1 deletion

File tree

core/src/main/resources/templatetransform.json

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,31 @@
3030
},
3131
"Lang":{
3232
"transformer":"textNode",
33-
"replace": "<br />$(3||)<br />"
33+
"replace": "$(2||)"
34+
},
35+
"Native name|native_name":{
36+
"transformer":"textNode",
37+
"replace": "$(2||)"
38+
},
39+
"Nihongo2":{
40+
"transformer":"textNode",
41+
"replace": "$(1||)"
42+
},
43+
"Nihongo":{
44+
"transformer":"textNode",
45+
"replace": "$(2||)"
46+
},
47+
"Langx":{
48+
"transformer":"textNode",
49+
"replace": "$(2||)"
50+
},
51+
"Script":{
52+
"transformer":"textNode",
53+
"replace": "$(2||)"
54+
},
55+
"Transliteration":{
56+
"transformer":"textNode",
57+
"replace": "$(2||)"
3458
},
3559
"Marriage":{
3660
"transformer":"extractChildren",

core/src/test/scala/org/dbpedia/extraction/wikiparser/TemplateTransformParserTest.scala

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,41 @@ class TemplateTransformParserTest extends FlatSpec with Matchers
5353
parse("en", "{{url|https://www.dji.com DJI.com}}") should be (Some("[https://www.dji.com]"))
5454
}
5555

56+
it should "extract text from {{lang|nap|Abbrùzzu}}" in
57+
{
58+
parse("en", "{{lang|nap|Abbrùzzu}}") should be (Some("Abbrùzzu"))
59+
}
60+
61+
it should "extract text from {{native name|nap|Abbrùzze}}" in
62+
{
63+
parse("en", "{{native name|nap|Abbrùzze}}") should be (Some("Abbrùzze"))
64+
}
65+
66+
it should "extract text from {{Nihongo2|東京都}}" in
67+
{
68+
parse("en", "{{Nihongo2|東京都}}") should be (Some("東京都"))
69+
}
70+
71+
it should "extract text from {{Nihongo|Tokyo|東京|Tōkyō}}" in
72+
{
73+
parse("en", "{{Nihongo|Tokyo|東京|Tōkyō}}") should be (Some("東京"))
74+
}
75+
76+
it should "extract text from {{Script|Arab|أبجدية عربية}}" in
77+
{
78+
parse("en", "{{Script|Arab|أبجدية عربية}}") should be (Some("أبجدية عربية"))
79+
}
80+
81+
it should "extract text from {{Transliteration|ru|Moskva}}" in
82+
{
83+
parse("en", "{{Transliteration|ru|Moskva}}") should be (Some("Moskva"))
84+
}
85+
86+
it should "extract text from {{Langx|ja|東京}}" in
87+
{
88+
parse("en", "{{Langx|ja|東京}}") should be (Some("東京"))
89+
}
90+
5691

5792
private val wikiParser = WikiParser.getInstance()
5893

0 commit comments

Comments
 (0)