Skip to content

Commit b5a5a4e

Browse files
committed
Simplify data tag id to only contain descriptive identifier
Remove numeric value and unit from id attribute, keeping only the context-derived identifier (e.g., "avgift", "ranta", "moms"). This allows tracking the same data point across law amendments, since the id stays constant while only the value changes. Before: id="avgift-1500-kr" After: id="avgift"
1 parent 616e86c commit b5a5a4e

2 files changed

Lines changed: 46 additions & 66 deletions

File tree

formatters/tag_swedish_amounts.py

Lines changed: 16 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -73,60 +73,41 @@ def normalize_number(num_str: str) -> str:
7373
return normalized
7474

7575

76-
def generate_amount_slug(value: str, multiplier: Optional[str], currency: str, context: str) -> str:
76+
def generate_amount_slug(context: str) -> str:
7777
"""
7878
Generate a descriptive slug for an amount based on context.
7979
80+
The slug identifies what the amount represents, not its value.
81+
This allows tracking changes across law amendments.
82+
8083
Args:
81-
value: The numeric value (normalized)
82-
multiplier: Optional multiplier like "miljoner", "miljarder", "tusen"
83-
currency: The currency unit used
8484
context: Surrounding text for context extraction
8585
8686
Returns:
87-
A slug like "belopp-1000000-kr" or "avgift-500-kr"
87+
A slug like "avgift" or "bidrag" that identifies the amount
8888
"""
89-
# Try to extract a descriptive word from context
89+
# Extract a descriptive word from context
9090
prefix = _extract_context_word(context)
9191

92-
# Format the value with multiplier
93-
if multiplier:
94-
multiplier_lower = multiplier.lower()
95-
if 'miljard' in multiplier_lower:
96-
suffix = 'mdkr'
97-
elif 'miljon' in multiplier_lower:
98-
suffix = 'mkr'
99-
elif 'tusen' in multiplier_lower:
100-
suffix = 'tkr'
101-
else:
102-
suffix = 'kr'
103-
else:
104-
suffix = 'kr'
105-
106-
# Create slug
107-
slug_parts = [prefix, value, suffix]
108-
slug = '-'.join(filter(None, slug_parts))
109-
110-
return _slugify(slug)
92+
return _slugify(prefix)
11193

11294

113-
def generate_percentage_slug(value: str, context: str) -> str:
95+
def generate_percentage_slug(context: str) -> str:
11496
"""
11597
Generate a descriptive slug for a percentage based on context.
11698
99+
The slug identifies what the percentage represents, not its value.
100+
This allows tracking changes across law amendments.
101+
117102
Args:
118-
value: The numeric value (normalized)
119103
context: Surrounding text for context extraction
120104
121105
Returns:
122-
A slug like "ranta-5-procent" or "andel-25-procent"
106+
A slug like "ranta" or "moms" that identifies the percentage
123107
"""
124108
prefix = _extract_context_word(context)
125109

126-
slug_parts = [prefix, value, 'procent']
127-
slug = '-'.join(filter(None, slug_parts))
128-
129-
return _slugify(slug)
110+
return _slugify(prefix)
130111

131112

132113
def _extract_context_word(context: str) -> str:
@@ -316,15 +297,13 @@ def _tag_amounts_in_line(line: str) -> str:
316297
def replace_amount_with_multiplier(match):
317298
full_match = match.group(0)
318299
number = match.group(1)
319-
multiplier = match.group(3)
320-
currency = match.group(4)
321300

322301
# Get context (text before match)
323302
start_pos = match.start()
324303
context = line[:start_pos]
325304

326305
normalized_value = normalize_number(number)
327-
slug = generate_amount_slug(normalized_value, multiplier, currency, context)
306+
slug = generate_amount_slug(context)
328307

329308
return f'<data id="{slug}" type="amount" value="{normalized_value}">{full_match}</data>'
330309

@@ -338,12 +317,11 @@ def replace_simple_amount(match):
338317
return full_match
339318

340319
number = match.group(1)
341-
currency = match.group(3)
342320

343321
context = line[:start_pos]
344322

345323
normalized_value = normalize_number(number)
346-
slug = generate_amount_slug(normalized_value, None, currency, context)
324+
slug = generate_amount_slug(context)
347325

348326
return f'<data id="{slug}" type="amount" value="{normalized_value}">{full_match}</data>'
349327

@@ -377,7 +355,7 @@ def replace_percentage(match):
377355
context = line[:start_pos]
378356

379357
normalized_value = normalize_number(number)
380-
slug = generate_percentage_slug(normalized_value, context)
358+
slug = generate_percentage_slug(context)
381359

382360
return f'<data id="{slug}" type="percentage" value="{normalized_value}">{full_match}</data>'
383361

test/test_tag_swedish_amounts.py

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -121,27 +121,23 @@ class TestGenerateAmountSlug:
121121

122122
def test_simple_amount(self):
123123
"""Test generating slug for simple amount."""
124-
slug = generate_amount_slug("1000", None, "kronor", "En avgift på ")
125-
assert "1000" in slug
126-
assert "kr" in slug
124+
slug = generate_amount_slug("En avgift på ")
125+
assert slug == "avgift"
127126

128-
def test_amount_with_miljoner(self):
129-
"""Test generating slug for amount with 'miljoner'."""
130-
slug = generate_amount_slug("5", "miljoner", "kronor", "Kapitalet är ")
131-
assert "5" in slug
132-
assert "mkr" in slug
127+
def test_amount_with_context(self):
128+
"""Test generating slug extracts context word."""
129+
slug = generate_amount_slug("Kapitalet är ")
130+
assert slug == "kapital"
133131

134-
def test_amount_with_miljarder(self):
135-
"""Test generating slug for amount with 'miljarder'."""
136-
slug = generate_amount_slug("2", "miljarder", "kronor", "Omsättningen är ")
137-
assert "2" in slug
138-
assert "mdkr" in slug
132+
def test_amount_with_omsattning(self):
133+
"""Test generating slug for 'omsättning'."""
134+
slug = generate_amount_slug("Omsättningen är ")
135+
assert slug == "omsattning"
139136

140-
def test_amount_with_tusen(self):
141-
"""Test generating slug for amount with 'tusen'."""
142-
slug = generate_amount_slug("50", "tusen", "kronor", "Priset är ")
143-
assert "50" in slug
144-
assert "tkr" in slug
137+
def test_amount_with_pris(self):
138+
"""Test generating slug for 'pris'."""
139+
slug = generate_amount_slug("Priset är ")
140+
assert slug == "pris"
145141

146142

147143
# ===========================================================================
@@ -154,15 +150,13 @@ class TestGeneratePercentageSlug:
154150

155151
def test_simple_percentage(self):
156152
"""Test generating slug for simple percentage."""
157-
slug = generate_percentage_slug("5", "Räntan är ")
158-
assert "5" in slug
159-
assert "procent" in slug
153+
slug = generate_percentage_slug("Räntan är ")
154+
assert slug == "ranta"
160155

161156
def test_percentage_with_context(self):
162157
"""Test generating slug with context extraction."""
163-
slug = generate_percentage_slug("25", "Momsen är ")
164-
assert "25" in slug
165-
assert "procent" in slug
158+
slug = generate_percentage_slug("Momsen är ")
159+
assert slug == "moms"
166160

167161

168162
# ===========================================================================
@@ -361,14 +355,22 @@ class TestTagSwedishAmountsContextSlugs:
361355
def test_avgift_context(self):
362356
"""Test slug generation with 'avgift' context."""
363357
result = tag_swedish_amounts("Avgiften är 500 kronor.")
364-
assert 'id="avgift-500-kr"' in result
358+
assert 'id="avgift"' in result
365359

366360
def test_ranta_context_percentage(self):
367361
"""Test slug generation with 'ränta' context for percentage."""
368362
result = tag_swedish_amounts("Räntan är 5 procent.")
369-
# Ränta should be extracted and slugified
370-
assert 'id="' in result
371-
assert 'procent"' in result
363+
assert 'id="ranta"' in result
364+
365+
def test_same_id_different_values(self):
366+
"""Test that same context gives same id regardless of value."""
367+
result1 = tag_swedish_amounts("Avgiften är 500 kronor.")
368+
result2 = tag_swedish_amounts("Avgiften är 1000 kronor.")
369+
# Both should have id="avgift" but different values
370+
assert 'id="avgift"' in result1
371+
assert 'id="avgift"' in result2
372+
assert 'value="500"' in result1
373+
assert 'value="1000"' in result2
372374

373375

374376
# ===========================================================================

0 commit comments

Comments
 (0)