Skip to content

Commit 3f72c90

Browse files
committed
Enhance Git processing: implement batch processing for git commits etc.
1 parent 7ab68d0 commit 3f72c90

5 files changed

Lines changed: 284 additions & 99 deletions

File tree

.github/workflows/fetch-sfs-workflow.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ jobs:
4242
env:
4343
PYTHONPATH: ${{ github.workspace }}
4444

45-
- name: Process JSON files to Markdown files
45+
- name: Process JSON files to Markdown files with Selex tags
4646
run: |
47-
python sfs_processor.py --input sfs_json --output SFS --formats md,git
47+
python sfs_processor.py --input sfs_json --output SFS --formats md-markers
4848
env:
4949
PYTHONPATH: ${{ github.workspace }}
5050

exporters/git/__init__.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,23 @@
11
"""Git export functionality for SFS documents."""
22

33
from .git_utils import (
4-
ensure_git_branch_for_commits,
4+
prepare_git_branch,
55
restore_original_branch,
66
remove_all_commits_on_branch,
77
get_target_repository,
88
configure_git_remote,
9-
push_to_target_repository
9+
push_to_target_repository,
10+
clone_target_repository_to_temp
1011
)
1112
from .generate_init_commit_for_doc import generate_init_commit_for_document
1213

1314
__all__ = [
14-
'ensure_git_branch_for_commits',
15+
'prepare_git_branch',
1516
'restore_original_branch',
1617
'remove_all_commits_on_branch',
1718
'get_target_repository',
1819
'configure_git_remote',
1920
'push_to_target_repository',
21+
'clone_target_repository_to_temp',
2022
'generate_init_commit_for_document'
2123
]

exporters/git/generate_init_commit_for_doc.py

Lines changed: 78 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -2,96 +2,108 @@
22
"""
33
Module for generating initial Git commits for SFS documents.
44
5-
This module handles the complete git workflow for creating initial commits
6-
for SFS documents, including branch management and cleanup.
5+
This module handles creating commits for SFS documents without managing
6+
the overall git workflow (branching, pushing, etc).
77
"""
88

9+
import os
10+
import re
11+
import subprocess
912
from pathlib import Path
10-
from typing import Optional
1113

12-
from exporters.git import ensure_git_branch_for_commits, restore_original_branch
13-
from exporters.git.generate_commits import create_init_git_commit
14+
from exporters.git.git_utils import GIT_TIMEOUT
1415
from util.file_utils import save_to_disk
1516
from formatters.format_sfs_text import clean_selex_tags
16-
from util.datetime_utils import format_datetime
17+
from util.datetime_utils import format_datetime, format_datetime_for_git
1718

1819

1920
def generate_init_commit_for_document(
2021
data: dict,
2122
output_file: Path,
2223
markdown_content: str,
23-
git_branch: str,
24-
preserve_section_tags: bool = False,
2524
verbose: bool = False
2625
) -> str:
2726
"""
28-
Generate initial git commit for an SFS document with proper branch handling.
29-
30-
This function handles the complete git workflow:
31-
1. Creates a separate git branch for commits
32-
2. Creates the initial commit with document metadata
33-
3. Restores the original branch
34-
4. Writes the final file content
35-
27+
Generate initial git commit for an SFS document.
28+
29+
This function handles creating commits for individual documents.
30+
It assumes we're already in a git repository and on the correct branch.
31+
3632
Args:
3733
data: JSON data containing document information
38-
output_file: Path to the output markdown file
34+
output_file: Path to the output markdown file (for local reference)
3935
markdown_content: The markdown content to commit and save
40-
git_branch: Branch name to use for git commits
41-
preserve_section_tags: Whether to preserve <section> tags in final output
4236
verbose: Enable verbose output
43-
37+
4438
Returns:
45-
str: The final markdown content (cleaned if preserve_section_tags is False)
39+
str: The final markdown content (cleaned, without selex tags)
4640
"""
4741
# Extract document metadata
48-
beteckning = data.get('beteckning', 'Unknown')
49-
rubrik = data.get('rubrik', '')
50-
utfardad_datum = format_datetime(data.get('fulltext', {}).get('utfardadDateTime'))
51-
52-
# Ensure commits are made in a different branch
53-
original_branch, commit_branch = ensure_git_branch_for_commits(
54-
git_branch,
55-
remove_all_commits_first=True,
56-
verbose=verbose
57-
)
42+
beteckning = data.get('beteckning')
43+
if not beteckning:
44+
raise ValueError("Beteckning saknas i dokumentdata")
5845

59-
# Ensure branch creation was successful
60-
if original_branch is None or commit_branch is None:
61-
raise RuntimeError(f"Misslyckades att skapa git branch för {beteckning}")
46+
rubrik = data.get('rubrik')
47+
if not rubrik:
48+
raise ValueError("Rubrik saknas i dokumentdata")
6249

63-
try:
64-
# Only create main commit if we have utfardad_datum
65-
if utfardad_datum:
66-
# Get förarbeten if available
67-
register_data = data.get('register', {})
68-
predocs = register_data.get('forarbeten')
69-
70-
# Create initial git commit
71-
success = create_init_git_commit(
72-
output_file=output_file,
73-
markdown_content=markdown_content,
74-
beteckning=beteckning,
75-
rubrik=rubrik,
76-
utfardad_datum=utfardad_datum,
77-
predocs=predocs,
78-
verbose=verbose
79-
)
80-
81-
if not success:
82-
print(f"Git-commit misslyckades för {beteckning}")
50+
utfardad_datum = format_datetime(data.get('fulltext', {}).get('utfardadDateTime'))
51+
52+
# Prepare final content for local save (always clean selex tags in git mode)
53+
final_content = clean_selex_tags(markdown_content)
54+
55+
# Save file locally for reference
56+
save_to_disk(output_file, final_content)
57+
print(f"Skapade dokument: {output_file}")
58+
59+
# Create git commit if we have utfardad_datum
60+
if utfardad_datum:
61+
# Extract year from beteckning for directory structure
62+
year_match = re.search(r'(\d{4}):', beteckning)
63+
if year_match:
64+
year = year_match.group(1)
65+
relative_path = Path(year) / output_file.name
8366
else:
84-
# Write file if no utfardad_datum available
85-
save_to_disk(output_file, markdown_content)
86-
print(f"Skrev fil utan git-commit (inget utfärdandedatum): {output_file}")
67+
relative_path = Path(output_file.name)
68+
69+
# Create directory structure if needed
70+
target_file = Path.cwd() / relative_path
71+
target_file.parent.mkdir(parents=True, exist_ok=True)
72+
73+
# Write the file (use clean content without selex tags for git)
74+
clean_content = clean_selex_tags(markdown_content)
8775

88-
finally:
89-
# Always restore original branch after git operations
90-
restore_original_branch(original_branch)
91-
92-
# Prepare final content for return
93-
final_content = markdown_content
94-
if not preserve_section_tags:
95-
final_content = clean_selex_tags(final_content)
96-
76+
with open(target_file, 'w', encoding='utf-8') as f:
77+
f.write(clean_content)
78+
79+
# Stage the file
80+
subprocess.run(['git', 'add', str(relative_path)],
81+
check=True, capture_output=True, timeout=GIT_TIMEOUT)
82+
83+
# Prepare commit message
84+
commit_message = rubrik
85+
86+
# Add förarbeten if available
87+
register_data = data.get('register', {})
88+
predocs = register_data.get('forarbeten')
89+
if predocs:
90+
commit_message += (f"\n\nHar tillkommit i Svensk författningssamling "
91+
f"efter dessa förarbeten: {predocs}")
92+
93+
# Format date for git
94+
commit_date = format_datetime_for_git(utfardad_datum)
95+
96+
# Create commit with specified date
97+
env = {**os.environ, 'GIT_AUTHOR_DATE': commit_date, 'GIT_COMMITTER_DATE': commit_date}
98+
subprocess.run([
99+
'git', 'commit', '-m', commit_message
100+
], check=True, capture_output=True, env=env, timeout=GIT_TIMEOUT)
101+
102+
if verbose:
103+
print(f"Git-commit skapad: '{commit_message}' daterad {commit_date}")
104+
105+
elif not utfardad_datum:
106+
if verbose:
107+
print(f"Hoppade över git-commit (inget utfärdandedatum): {beteckning}")
108+
97109
return final_content

exporters/git/git_utils.py

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
import subprocess
55
from datetime import datetime
6+
from pathlib import Path
67
from urllib.parse import urlparse
78

89
# Git main branch name
@@ -15,7 +16,7 @@
1516
DEFAULT_TARGET_REPO = "https://github.com/se-lex/sfs.git"
1617

1718

18-
def ensure_git_branch_for_commits(git_branch, remove_all_commits_first=True, verbose=False):
19+
def prepare_git_branch(git_branch, remove_all_commits_first=True, verbose=False):
1920
"""
2021
Ensures that git commits are made in a different branch than the current one.
2122
Creates a new branch if needed and switches to it.
@@ -229,6 +230,67 @@ def create_authenticated_url(repo_url: str, pat_token: str) -> str:
229230
return repo_url
230231

231232

233+
def clone_target_repository_to_temp(verbose: bool = False) -> tuple[Path, str]:
234+
"""
235+
Clone target repository to a temporary directory.
236+
237+
Args:
238+
verbose: Enable verbose output
239+
240+
Returns:
241+
tuple[Path, str]: (repo_directory_path, original_cwd) or (None, None) if failed
242+
"""
243+
import tempfile
244+
245+
try:
246+
# Get repository URL and PAT token
247+
repo_url = get_target_repository()
248+
pat_token = os.getenv('GIT_GITHUB_PAT')
249+
250+
# Try to load PAT from .env file if not in environment
251+
if not pat_token:
252+
try:
253+
from dotenv import load_dotenv
254+
load_dotenv()
255+
pat_token = os.getenv('GIT_GITHUB_PAT')
256+
except ImportError:
257+
pass # dotenv not available
258+
259+
# Create authenticated URL if PAT is available
260+
if pat_token:
261+
auth_url = create_authenticated_url(repo_url, pat_token)
262+
else:
263+
auth_url = repo_url
264+
if verbose:
265+
print("Varning: Ingen PAT token hittades, använder okrypterad URL")
266+
267+
# Create temporary directory for cloning
268+
temp_dir = tempfile.mkdtemp()
269+
repo_dir = Path(temp_dir) / "target_repo"
270+
271+
if verbose:
272+
print(f"Klonar {repo_url} till temporär katalog...")
273+
274+
# Clone the repository
275+
subprocess.run([
276+
'git', 'clone', auth_url, str(repo_dir)
277+
], check=True, capture_output=True, timeout=GIT_TIMEOUT)
278+
279+
# Remember original directory
280+
original_cwd = os.getcwd()
281+
282+
return repo_dir, original_cwd
283+
284+
except subprocess.CalledProcessError as e:
285+
print(f"Fel vid kloning av target repository: {e}")
286+
if hasattr(e, 'stderr') and e.stderr:
287+
print(f"Git stderr: {e.stderr.decode('utf-8', errors='replace')}")
288+
return None, None
289+
except Exception as e:
290+
print(f"Oväntat fel vid kloning av target repository: {e}")
291+
return None, None
292+
293+
232294
def push_to_target_repository(branch_name: str, remote_name: str = 'target', verbose: bool = False) -> bool:
233295
"""
234296
Push the specified branch to the target repository.

0 commit comments

Comments
 (0)