Skip to content

Commit 0130107

Browse files
committed
Enhance datetime formatting for Git: handle very old dates by returning a minimum date of 1980-01-01
1 parent eccfedb commit 0130107

3 files changed

Lines changed: 13 additions & 4 deletions

File tree

exporters/git/batch_processor.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from exporters.git.git_utils import checkout_branch, push_to_target_repository
1616

1717

18-
def process_files_with_git_batch(json_files, output_dir, verbose, predocs, batch_size=10):
18+
def process_files_with_git_batch(json_files, output_dir, verbose, predocs, batch_size=100):
1919
"""Process files with git batch workflow, using same branch but pushing after each batch."""
2020
# Clone target repository once for all batches
2121
repo_dir, original_cwd = clone_target_repository_to_temp(verbose=verbose)
@@ -66,7 +66,8 @@ def process_files_with_git_batch(json_files, output_dir, verbose, predocs, batch
6666
print(f"Misslyckades med att pusha till target repository")
6767

6868
except Exception as e:
69-
print(f"Oväntat fel vid git batch processing: {e}")
69+
print(f"Fel vid git batch processing: {e}")
70+
raise # Re-raise the exception so temporal processing errors are visible
7071
finally:
7172
# Always change back to original directory
7273
os.chdir(original_cwd)

temporal/apply_temporal.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
relativt till ett angivet target datum.
77
88
Regler:
9-
1. Sektioner med selex:status="upphavd" tas bort helt
9+
1. Sektioner med selex:status="upphavd" eller "gammal upphord" tas bort helt
1010
2. Sektioner med selex:status="ikraft" och selex:ikraft_datum > target_date tas bort helt
1111
3. Sektioner med selex:status="ikraft" och selex:ikraft_datum <= target_date får sina temporal attribut borttagna
1212
4. Sektioner med selex:upphor_datum som är <= target_date tas bort helt
@@ -75,7 +75,7 @@ def apply_temporal(markdown_text: str, target_date: str, verbose: bool = False)
7575
# Kontrollera status-attribut
7676
if status_match:
7777
status_value = status_match.group(1)
78-
if "upphavd" in status_value:
78+
if "upphavd" in status_value or "upphord" in status_value:
7979
should_remove = True
8080
remove_reason = f"status '{status_value}'"
8181
elif "ikraft" in status_value and ikraft_match:

util/datetime_utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,17 @@ def format_datetime_for_git(dt_str: Optional[str]) -> Optional[str]:
3333
else:
3434
# Just date, add midnight time
3535
dt = datetime.fromisoformat(dt_str + 'T00:00:00')
36+
37+
# Git/GitHub has problems with very old dates, use 1980-01-01 as minimum
38+
if dt.year < 1980:
39+
return "1980-01-01T00:00:00"
40+
3641
return dt.strftime('%Y-%m-%dT%H:%M:%S')
3742
except (ValueError, AttributeError):
3843
# Fallback: try to add time to basic date format
3944
if dt_str and len(dt_str) == 10: # YYYY-MM-DD format
45+
year = int(dt_str[:4])
46+
if year < 1980:
47+
return "1980-01-01T00:00:00"
4048
return dt_str + 'T00:00:00'
4149
return dt_str

0 commit comments

Comments
 (0)