Skip to content

Commit cfa0e42

Browse files
authored
Add index.html generation for HTML export (#37)
## Summary - Add `generate_index_html()` function to create a minimalist root index page - Move CSS and JS files to root directory with absolute paths (`/styles.css`, `/selex-init.js`) - Display SE-Lex branding with link to GitHub repository - List 10 most recently added SFS documents with links ## Changes - New function in `exporters/html/html_export.py` to generate index.html - Update `sfs_processor.py` to call index generation after HTML export - Update all HTML documents to use absolute paths for CSS/JS resources The index page provides a clean entry point to the HTML export. 🤖 Generated with [Claude Code](https://claude.com/claude-code)
1 parent 386dd42 commit cfa0e42

2 files changed

Lines changed: 190 additions & 16 deletions

File tree

exporters/html/html_export.py

Lines changed: 181 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -146,28 +146,28 @@ def generate_css_file(css_dir: Path) -> None:
146146

147147
def generate_js_file(js_dir: Path) -> None:
148148
"""Generate the shared JavaScript file for HTML documents.
149-
149+
150150
Creates a selex-init.js file in the specified directory by copying
151151
the source JavaScript file from the html exporter directory.
152-
152+
153153
Args:
154154
js_dir: Directory where the JS file should be created
155155
"""
156156
import shutil
157-
157+
158158
js_file_path = js_dir / "selex-init.js"
159-
159+
160160
# Only generate if it doesn't exist to avoid regenerating on every document
161161
if js_file_path.exists():
162162
return
163-
163+
164164
# Get the source JS file path (same directory as this module)
165165
source_js_path = Path(__file__).parent / "selex-init.js"
166-
166+
167167
if not source_js_path.exists():
168168
print(f"Warning: Source JS file not found: {source_js_path}")
169169
return
170-
170+
171171
# Copy the JS file
172172
try:
173173
shutil.copy2(source_js_path, js_file_path)
@@ -176,6 +176,176 @@ def generate_js_file(js_dir: Path) -> None:
176176
print(f"Error copying JS file: {e}")
177177

178178

179+
def generate_index_html(output_path: Path, num_recent: int = 10) -> None:
180+
"""Generate an index.html file in the root directory with links to recent documents.
181+
182+
Creates a minimalist index page with:
183+
- Link to https://github.com/se-lex
184+
- Links to the most recently added SFS documents
185+
186+
Args:
187+
output_path: Base output path (where eli/sfs structure exists)
188+
num_recent: Number of recent documents to include (default: 10)
189+
"""
190+
from sfs_processor import save_to_disk
191+
192+
# Find all SFS documents in the ELI structure
193+
eli_sfs_dir = output_path / "eli" / "sfs"
194+
if not eli_sfs_dir.exists():
195+
print("Warning: No ELI/SFS directory found, skipping index.html generation")
196+
return
197+
198+
# Collect all documents with their metadata
199+
documents = []
200+
for year_dir in eli_sfs_dir.iterdir():
201+
if not year_dir.is_dir() or not year_dir.name.isdigit():
202+
continue
203+
204+
for doc_dir in year_dir.iterdir():
205+
if not doc_dir.is_dir():
206+
continue
207+
208+
index_file = doc_dir / "index.html"
209+
if not index_file.exists():
210+
continue
211+
212+
# Extract beteckning from directory structure
213+
year = year_dir.name
214+
lopnummer = doc_dir.name
215+
beteckning = f"{year}:{lopnummer}"
216+
217+
# Try to extract title from the HTML file
218+
try:
219+
with open(index_file, 'r', encoding='utf-8') as f:
220+
content = f.read(5000) # Read first 5000 chars to find title
221+
title_match = re.search(r'<h1[^>]*>([^<]+)</h1>', content)
222+
title = title_match.group(1) if title_match else beteckning
223+
except Exception:
224+
title = beteckning
225+
226+
# Extract numeric part from lopnummer for sorting (handle cases like "82 s.1")
227+
lopnummer_numeric = lopnummer
228+
numeric_match = re.match(r'(\d+)', lopnummer)
229+
if numeric_match:
230+
lopnummer_numeric = int(numeric_match.group(1))
231+
else:
232+
lopnummer_numeric = 0
233+
234+
documents.append({
235+
'beteckning': beteckning,
236+
'title': title,
237+
'year': int(year),
238+
'lopnummer': lopnummer,
239+
'lopnummer_numeric': lopnummer_numeric,
240+
'path': f"eli/sfs/{year}/{lopnummer}"
241+
})
242+
243+
# Sort by year and lopnummer_numeric (descending) to get most recent
244+
documents.sort(key=lambda x: (x['year'], x['lopnummer_numeric']), reverse=True)
245+
246+
# Take the most recent documents
247+
recent_docs = documents[:num_recent]
248+
249+
# Generate HTML content
250+
html_content = """<!DOCTYPE html>
251+
<html lang="sv">
252+
<head>
253+
<meta charset="UTF-8">
254+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
255+
<title>SE-Lex</title>
256+
<link rel="stylesheet" href="styles.css">
257+
<style>
258+
header {
259+
text-align: center;
260+
margin-bottom: 40px;
261+
}
262+
263+
h1 {
264+
font-size: 2.5em;
265+
margin-bottom: 20px;
266+
}
267+
268+
.logo {
269+
color: var(--selex-yellow);
270+
font-weight: bold;
271+
}
272+
273+
.github-link {
274+
display: inline-block;
275+
margin: 20px 0 40px;
276+
padding: 12px 24px;
277+
background-color: var(--selex-dark-blue);
278+
color: white;
279+
text-decoration: none;
280+
border-radius: 5px;
281+
transition: background-color 0.3s;
282+
}
283+
284+
.document-list {
285+
background: white;
286+
border-radius: 8px;
287+
padding: 20px;
288+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
289+
}
290+
291+
.document-item {
292+
padding: 12px 0;
293+
border-bottom: 1px solid #eee;
294+
}
295+
296+
.document-item:last-child {
297+
border-bottom: none;
298+
}
299+
300+
.document-link {
301+
color: var(--selex-light-blue);
302+
text-decoration: none;
303+
font-weight: 500;
304+
}
305+
306+
.document-link:hover {
307+
text-decoration: underline;
308+
}
309+
310+
.beteckning {
311+
display: inline-block;
312+
min-width: 100px;
313+
font-family: 'Courier New', monospace;
314+
color: #666;
315+
margin-right: 10px;
316+
}
317+
</style>
318+
</head>
319+
<body>
320+
<header>
321+
<h1><span class="logo">SE-Lex</span></h1>
322+
<a href="https://github.com/se-lex" class="github-link">Mer information & källkod på GitHub-repo</a>
323+
</header>
324+
325+
<main>
326+
<h2>Nyligen tillagda författningar</h2>
327+
<div class="document-list">
328+
"""
329+
330+
# Add links to recent documents
331+
for doc in recent_docs:
332+
html_content += f""" <div class="document-item">
333+
<span class="beteckning">{html.escape(doc['beteckning'])}</span>
334+
<a href="{doc['path']}" class="document-link">{html.escape(doc['title'])}</a>
335+
</div>
336+
"""
337+
338+
html_content += """ </div>
339+
</main>
340+
</body>
341+
</html>"""
342+
343+
# Save the index.html file
344+
index_path = output_path / "index.html"
345+
save_to_disk(index_path, html_content)
346+
print(f"Generated index.html: {index_path}")
347+
348+
179349

180350
def convert_to_html(data: Dict[str, Any], apply_amendments: bool = False, up_to_amendment: int = None) -> str:
181351
"""Convert JSON data to HTML format with ELI structure.
@@ -524,7 +694,7 @@ def extract_content_from_html(html_content: str) -> str:
524694
drawerEnabled: false,
525695
}};
526696
</script>
527-
<script src="../../selex-init.js" defer></script>
697+
<script src="/selex-init.js" defer></script>
528698
<style>{get_common_styles()}
529699
{get_amendment_styles()}
530700
</style>
@@ -579,7 +749,7 @@ def extract_content_from_html(html_content: str) -> str:
579749
return combined_html
580750

581751

582-
def create_html_head(title: str, beteckning: str, additional_styles: str = "", additional_scripts: str = "", use_external_css: bool = True, css_relative_path: str = "../../styles.css", js_relative_path: str = "../../selex-init.js") -> str:
752+
def create_html_head(title: str, beteckning: str, additional_styles: str = "", additional_scripts: str = "", use_external_css: bool = True, css_relative_path: str = "/styles.css", js_relative_path: str = "/selex-init.js") -> str:
583753
"""Create HTML head section with navbar integration.
584754
585755
Args:
@@ -588,8 +758,8 @@ def create_html_head(title: str, beteckning: str, additional_styles: str = "", a
588758
additional_styles: Additional CSS styles to include inline
589759
additional_scripts: Additional JavaScript to include
590760
use_external_css: Whether to use external CSS file (default: True)
591-
css_relative_path: Relative path to CSS file from HTML document (default: "../../styles.css")
592-
js_relative_path: Relative path to JS file from HTML document (default: "../../selex-init.js")
761+
css_relative_path: Path to CSS file from HTML document (default: "/styles.css")
762+
js_relative_path: Path to JS file from HTML document (default: "/selex-init.js")
593763
594764
Returns:
595765
str: Complete HTML head section

sfs_processor.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -672,10 +672,9 @@ def main():
672672
# Generate CSS and JS files once for HTML/HTMLDIFF formats
673673
if "html" in output_modes or "htmldiff" in output_modes:
674674
from exporters.html.html_export import generate_css_file, generate_js_file
675-
css_js_dir = output_dir / "eli" / "sfs"
676-
css_js_dir.mkdir(parents=True, exist_ok=True)
677-
generate_css_file(css_js_dir)
678-
generate_js_file(css_js_dir)
675+
# Place CSS and JS in root for easier access
676+
generate_css_file(output_dir)
677+
generate_js_file(output_dir)
679678

680679
# Handle vector mode with batch processing
681680
if "vector" in output_modes:
@@ -731,7 +730,12 @@ def main():
731730

732731
# Use make_document to create documents in specified formats
733732
make_document(data, output_dir, output_modes, args.year_folder, args.verbose, False, args.predocs_fetch, args.apply_links, args.target_date)
734-
733+
734+
# Generate index.html after all documents have been processed
735+
if "html" in output_modes or "htmldiff" in output_modes:
736+
from exporters.html.html_export import generate_index_html
737+
generate_index_html(output_dir, num_recent=10)
738+
735739
print(f"\nBearbetning klar! {len(json_files)} filer sparade i {output_dir} i format: {', '.join(output_modes)}")
736740

737741

0 commit comments

Comments
 (0)