Skip to content

Commit 904045e

Browse files
committed
Use slash separator between SFS designation and document position
Change positional id format from: sfs-2024-123-kap5.2-belopp-1 To: sfs-2024-123/kap5.2-belopp-1 The "/" creates clearer visual hierarchy: - Before slash: the law (SFS designation) - After slash: position within the document Added test for reference table slug resolution.
1 parent a0d6ee5 commit 904045e

3 files changed

Lines changed: 51 additions & 33 deletions

File tree

data/amount-references.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
{
22
"_comment": "Reference table mapping positional ids to descriptive slugs. Keys starting with _ are ignored.",
3-
"_format": "{ 'sfs-YYYY-NNN-section-type-position': 'descriptive-slug' }",
3+
"_format": "{ 'sfs-YYYY-NNN/section-type-position': 'descriptive-slug' }",
44

55
"_example_tracking_changes": "Multiple SFS entries can map to same slug to track value changes over time",
66

7-
"sfs-2020-100-kap5.2-belopp-1": "tillstandsavgift",
8-
"sfs-2022-456-kap5.2-belopp-1": "tillstandsavgift",
9-
"sfs-2024-123-kap5.2-belopp-1": "tillstandsavgift",
7+
"sfs-2020-100/kap5.2-belopp-1": "tillstandsavgift",
8+
"sfs-2022-456/kap5.2-belopp-1": "tillstandsavgift",
9+
"sfs-2024-123/kap5.2-belopp-1": "tillstandsavgift",
1010

11-
"sfs-2020-100-kap6.1-procent-1": "riksbankens-referensranta",
12-
"sfs-2024-123-kap6.1-procent-1": "riksbankens-referensranta"
11+
"sfs-2020-100/kap6.1-procent-1": "riksbankens-referensranta",
12+
"sfs-2024-123/kap6.1-procent-1": "riksbankens-referensranta"
1313
}

formatters/tag_swedish_amounts.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -126,21 +126,22 @@ def generate_positional_id(sfs_id: Optional[str], section_id: Optional[str], dat
126126
position: 1-based position within the section for this type
127127
128128
Returns:
129-
A positional id like "sfs-2024-123-kap5.2-belopp-1"
129+
A positional id like "sfs-2024-123/kap5.2-belopp-1"
130+
Uses "/" to separate SFS designation from document position.
130131
"""
131-
parts = []
132+
# Build the document position part
133+
position_parts = []
134+
if section_id:
135+
position_parts.append(section_id)
136+
position_parts.append(f"{data_type}-{position}")
137+
position_str = "-".join(position_parts)
132138

133139
if sfs_id:
134140
# Normalize SFS id: "2024:123" -> "sfs-2024-123"
135141
normalized_sfs = "sfs-" + sfs_id.replace(":", "-")
136-
parts.append(normalized_sfs)
137-
138-
if section_id:
139-
parts.append(section_id)
140-
141-
parts.append(f"{data_type}-{position}")
142-
143-
return "-".join(parts)
142+
return f"{normalized_sfs}/{position_str}"
143+
else:
144+
return position_str
144145

145146

146147
def resolve_id(positional_id: str) -> str:

test/test_tag_swedish_amounts.py

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,12 @@ class TestGeneratePositionalId:
9090
def test_with_sfs_and_section(self):
9191
"""Test generating positional id with SFS and section."""
9292
result = generate_positional_id("2024:123", "kap5.2", "belopp", 1)
93-
assert result == "sfs-2024-123-kap5.2-belopp-1"
93+
assert result == "sfs-2024-123/kap5.2-belopp-1"
9494

9595
def test_with_sfs_only(self):
9696
"""Test generating positional id with only SFS."""
9797
result = generate_positional_id("2024:123", None, "belopp", 1)
98-
assert result == "sfs-2024-123-belopp-1"
98+
assert result == "sfs-2024-123/belopp-1"
9999

100100
def test_with_section_only(self):
101101
"""Test generating positional id with only section."""
@@ -110,12 +110,12 @@ def test_without_sfs_or_section(self):
110110
def test_percentage_type(self):
111111
"""Test generating positional id for percentage."""
112112
result = generate_positional_id("2020:100", "kap1.5", "procent", 2)
113-
assert result == "sfs-2020-100-kap1.5-procent-2"
113+
assert result == "sfs-2020-100/kap1.5-procent-2"
114114

115115
def test_multiple_positions(self):
116116
"""Test generating positional id with higher position."""
117117
result = generate_positional_id("2024:123", "kap5.2", "belopp", 3)
118-
assert result == "sfs-2024-123-kap5.2-belopp-3"
118+
assert result == "sfs-2024-123/kap5.2-belopp-3"
119119

120120

121121
# ===========================================================================
@@ -338,18 +338,19 @@ def test_simple_positional_id(self):
338338
def test_with_sfs_id(self):
339339
"""Test positional id with sfs_id parameter."""
340340
result = tag_swedish_amounts("Avgiften är 500 kronor.", sfs_id="2024:123")
341-
assert 'id="sfs-2024-123-belopp-1"' in result
341+
assert 'id="sfs-2024-123/belopp-1"' in result
342342

343343
def test_with_sfs_and_section(self):
344344
"""Test positional id with both sfs_id and section_id."""
345-
result = tag_swedish_amounts("Avgiften är 500 kronor.", sfs_id="2024:123", section_id="kap5.2")
346-
assert 'id="sfs-2024-123-kap5.2-belopp-1"' in result
345+
# Use SFS id not in reference table to test positional id format
346+
result = tag_swedish_amounts("Avgiften är 500 kronor.", sfs_id="2099:999", section_id="kap9.9")
347+
assert 'id="sfs-2099-999/kap9.9-belopp-1"' in result
347348

348349
def test_multiple_amounts_incrementing(self):
349350
"""Test that multiple amounts get incrementing positions."""
350351
result = tag_swedish_amounts("Första 500 kr och andra 1000 kr.", sfs_id="2024:123", section_id="kap1.1")
351-
assert 'id="sfs-2024-123-kap1.1-belopp-1"' in result
352-
assert 'id="sfs-2024-123-kap1.1-belopp-2"' in result
352+
assert 'id="sfs-2024-123/kap1.1-belopp-1"' in result
353+
assert 'id="sfs-2024-123/kap1.1-belopp-2"' in result
353354

354355
def test_section_tag_resets_counter(self):
355356
"""Test that section tags reset the counter."""
@@ -360,33 +361,49 @@ def test_section_tag_resets_counter(self):
360361
Belopp 200 kronor.
361362
</section>'''
362363
result = tag_swedish_amounts(text, sfs_id="2024:123")
363-
assert 'id="sfs-2024-123-kap1.1-belopp-1"' in result
364-
assert 'id="sfs-2024-123-kap1.2-belopp-1"' in result
364+
assert 'id="sfs-2024-123/kap1.1-belopp-1"' in result
365+
assert 'id="sfs-2024-123/kap1.2-belopp-1"' in result
365366

366367
def test_article_tag_extracts_sfs(self):
367368
"""Test that article tags extract SFS id from selex:id."""
368369
text = '''<article selex:id="lag-2024-123">
369370
Avgiften är 500 kronor.
370371
</article>'''
371372
result = tag_swedish_amounts(text)
372-
assert 'id="sfs-2024-123-belopp-1"' in result
373+
assert 'id="sfs-2024-123/belopp-1"' in result
373374

374375
def test_percentage_positional_id(self):
375376
"""Test positional id for percentages."""
376377
result = tag_swedish_amounts("Räntan är 5 procent.", sfs_id="2024:123", section_id="kap2.3")
377-
assert 'id="sfs-2024-123-kap2.3-procent-1"' in result
378+
assert 'id="sfs-2024-123/kap2.3-procent-1"' in result
378379

379380
def test_same_slug_different_sfs(self):
380381
"""Test that same position in different SFS gives different positional ids."""
381-
result1 = tag_swedish_amounts("Avgiften är 500 kronor.", sfs_id="2020:100", section_id="kap5.2")
382-
result2 = tag_swedish_amounts("Avgiften är 1000 kronor.", sfs_id="2024:123", section_id="kap5.2")
382+
# Use SFS ids not in reference table
383+
result1 = tag_swedish_amounts("Avgiften är 500 kronor.", sfs_id="2098:100", section_id="kap9.9")
384+
result2 = tag_swedish_amounts("Avgiften är 1000 kronor.", sfs_id="2099:123", section_id="kap9.9")
383385
# Different SFS gives different positional ids
384-
assert 'id="sfs-2020-100-kap5.2-belopp-1"' in result1
385-
assert 'id="sfs-2024-123-kap5.2-belopp-1"' in result2
386+
assert 'id="sfs-2098-100/kap9.9-belopp-1"' in result1
387+
assert 'id="sfs-2099-123/kap9.9-belopp-1"' in result2
386388
# But values are different
387389
assert 'value="500"' in result1
388390
assert 'value="1000"' in result2
389391

392+
def test_reference_table_resolves_slug(self):
393+
"""Test that reference table resolves positional id to slug."""
394+
# These SFS ids ARE in the reference table
395+
import formatters.tag_swedish_amounts as module
396+
module._reference_table = None # Reset cache
397+
398+
result1 = tag_swedish_amounts("Avgiften är 500 kronor.", sfs_id="2020:100", section_id="kap5.2")
399+
result2 = tag_swedish_amounts("Avgiften är 750 kronor.", sfs_id="2024:123", section_id="kap5.2")
400+
# Both resolve to same slug
401+
assert 'id="tillstandsavgift"' in result1
402+
assert 'id="tillstandsavgift"' in result2
403+
# But values are different - tracking the change!
404+
assert 'value="500"' in result1
405+
assert 'value="750"' in result2
406+
390407

391408
# ===========================================================================
392409
# tag_swedish_amounts Tests - Edge cases

0 commit comments

Comments
 (0)