@@ -115,22 +115,32 @@ def load_reference_table() -> Dict[str, str]:
115115 return _reference_table
116116
117117
118- def generate_positional_id (section_id : Optional [str ], data_type : str , position : int ) -> str :
118+ def generate_positional_id (sfs_id : Optional [ str ], section_id : Optional [str ], data_type : str , position : int ) -> str :
119119 """
120120 Generate a positional id for a data element.
121121
122122 Args:
123+ sfs_id: The SFS designation (e.g., "2024:123") or None
123124 section_id: The section id (e.g., "kap5.2") or None
124125 data_type: "belopp" for amounts, "procent" for percentages
125126 position: 1-based position within the section for this type
126127
127128 Returns:
128- A positional id like "kap5.2-belopp-1" or "procent-1" if no section
129+ A positional id like "sfs-2024-123- kap5.2-belopp-1"
129130 """
131+ parts = []
132+
133+ if sfs_id :
134+ # Normalize SFS id: "2024:123" -> "sfs-2024-123"
135+ normalized_sfs = "sfs-" + sfs_id .replace (":" , "-" )
136+ parts .append (normalized_sfs )
137+
130138 if section_id :
131- return f"{ section_id } -{ data_type } -{ position } "
132- else :
133- return f"{ data_type } -{ position } "
139+ parts .append (section_id )
140+
141+ parts .append (f"{ data_type } -{ position } " )
142+
143+ return "-" .join (parts )
134144
135145
136146def resolve_id (positional_id : str ) -> str :
@@ -178,34 +188,40 @@ def _slugify(text: str) -> str:
178188 return text
179189
180190
181- def tag_swedish_amounts (text : str , section_id : Optional [str ] = None ) -> str :
191+ def tag_swedish_amounts (text : str , sfs_id : Optional [ str ] = None , section_id : Optional [str ] = None ) -> str :
182192 """
183193 Tag Swedish monetary amounts and percentages in text with <data> elements.
184194
185195 Processes text line by line, skipping markdown headers.
186196 Each amount/percentage is wrapped with a <data> tag containing:
187- - id: positional id (e.g., "kap5.2-belopp-1") or resolved slug from reference table
197+ - id: positional id or resolved slug from reference table
188198 - type: "amount" or "percentage"
189199 - value: normalized numeric value
190200
191201 Args:
192202 text: The text to process
203+ sfs_id: Optional SFS designation (e.g., "2024:123") for generating positional ids
193204 section_id: Optional section id for generating positional ids (e.g., "kap5.2")
194205
195206 Returns:
196207 Text with amounts and percentages wrapped in <data> tags
197208
198209 Example:
199- Input: "Avgiften är 1 000 kronor per år ." with section_id="kap5.2"
200- Output: '<data id="kap5.2-belopp-1" type="amount" value="1000">1 000 kronor </data>'
210+ Input: "Avgiften är 1 000 kronor." with sfs_id="2024:123", section_id="kap5.2"
211+ Output: '<data id="sfs-2024-123- kap5.2-belopp-1" type="amount" value="1000">... </data>'
201212
202- With reference table {"kap5.2-belopp-1": "tillstandsavgift"}:
203- Output: '<data id="tillstandsavgift" type="amount" value="1000">1 000 kronor</data>'
213+ With reference table {"sfs-2024-123-kap5.2-belopp-1": "tillstandsavgift"}:
214+ Output: '<data id="tillstandsavgift" type="amount" value="1000">...</data>'
215+
216+ Multiple SFS entries can map to the same slug to track changes over time:
217+ {"sfs-2020-100-kap5.2-belopp-1": "tillstandsavgift",
218+ "sfs-2024-123-kap5.2-belopp-1": "tillstandsavgift"}
204219 """
205220 lines = text .split ('\n ' )
206221 processed_lines = []
207222
208- # Track current section and counters
223+ # Track current SFS, section and counters
224+ current_sfs = sfs_id
209225 current_section = section_id
210226 amount_counter = 0
211227 percentage_counter = 0
@@ -216,6 +232,17 @@ def tag_swedish_amounts(text: str, section_id: Optional[str] = None) -> str:
216232 processed_lines .append (line )
217233 continue
218234
235+ # Check for article tags to extract SFS id
236+ article_match = re .match (r'^\s*<article[^>]*\bselex:id=["\']([^"\']+)["\']' , line )
237+ if article_match :
238+ # Extract SFS id from selex:id like "lag-2024-123" -> "2024:123"
239+ selex_id = article_match .group (1 )
240+ sfs_match = re .search (r'(\d{4})-(\d+)' , selex_id )
241+ if sfs_match :
242+ current_sfs = f"{ sfs_match .group (1 )} :{ sfs_match .group (2 )} "
243+ processed_lines .append (line )
244+ continue
245+
219246 # Check for section tags to extract section id
220247 section_match = re .match (r'^\s*<section[^>]*\bid=["\']([^"\']+)["\']' , line )
221248 if section_match :
@@ -232,12 +259,12 @@ def tag_swedish_amounts(text: str, section_id: Optional[str] = None) -> str:
232259
233260 # Process amounts and percentages with counters
234261 processed_line , new_amount_count = _tag_amounts_in_line (
235- line , current_section , amount_counter
262+ line , current_sfs , current_section , amount_counter
236263 )
237264 amount_counter = new_amount_count
238265
239266 processed_line , new_percentage_count = _tag_percentages_in_line (
240- processed_line , current_section , percentage_counter
267+ processed_line , current_sfs , current_section , percentage_counter
241268 )
242269 percentage_counter = new_percentage_count
243270
@@ -248,6 +275,7 @@ def tag_swedish_amounts(text: str, section_id: Optional[str] = None) -> str:
248275
249276def _tag_amounts_in_line (
250277 line : str ,
278+ sfs_id : Optional [str ],
251279 section_id : Optional [str ],
252280 counter : int
253281) -> tuple [str , int ]:
@@ -256,6 +284,7 @@ def _tag_amounts_in_line(
256284
257285 Args:
258286 line: A single line of text
287+ sfs_id: Current SFS designation for positional ids
259288 section_id: Current section id for positional ids
260289 counter: Current count of amounts in this section
261290
@@ -271,7 +300,7 @@ def replace_amount_with_multiplier(match):
271300 number = match .group (1 )
272301
273302 current_counter += 1
274- positional_id = generate_positional_id (section_id , "belopp" , current_counter )
303+ positional_id = generate_positional_id (sfs_id , section_id , "belopp" , current_counter )
275304 resolved_id = resolve_id (positional_id )
276305
277306 normalized_value = normalize_number (number )
@@ -291,7 +320,7 @@ def replace_simple_amount(match):
291320 number = match .group (1 )
292321
293322 current_counter += 1
294- positional_id = generate_positional_id (section_id , "belopp" , current_counter )
323+ positional_id = generate_positional_id (sfs_id , section_id , "belopp" , current_counter )
295324 resolved_id = resolve_id (positional_id )
296325
297326 normalized_value = normalize_number (number )
@@ -307,6 +336,7 @@ def replace_simple_amount(match):
307336
308337def _tag_percentages_in_line (
309338 line : str ,
339+ sfs_id : Optional [str ],
310340 section_id : Optional [str ],
311341 counter : int
312342) -> tuple [str , int ]:
@@ -315,6 +345,7 @@ def _tag_percentages_in_line(
315345
316346 Args:
317347 line: A single line of text
348+ sfs_id: Current SFS designation for positional ids
318349 section_id: Current section id for positional ids
319350 counter: Current count of percentages in this section
320351
@@ -335,7 +366,7 @@ def replace_percentage(match):
335366 number = match .group (1 )
336367
337368 current_counter += 1
338- positional_id = generate_positional_id (section_id , "procent" , current_counter )
369+ positional_id = generate_positional_id (sfs_id , section_id , "procent" , current_counter )
339370 resolved_id = resolve_id (positional_id )
340371
341372 normalized_value = normalize_number (number )
0 commit comments