66"""
77
88import os
9- import shutil
10- import subprocess
119from datetime import datetime
1210from pathlib import Path
1311import random
1412import json
1513
1614from exporters .git import clone_target_repository_to_temp
17- from exporters .git .git_utils import GIT_TIMEOUT
15+ from exporters .git .git_utils import checkout_branch , push_to_target_repository
16+ from sfs_processor import make_document
1817
1918
20- def process_files_with_git_batch (json_files , output_dir , verbose , predocs ):
21- """Process files with git batch workflow."""
22- # Clone target repository once for all documents
19+ def process_files_with_git_batch (json_files , output_dir , verbose , predocs , batch_size = 10 ):
20+ """Process files with git batch workflow, using same branch but pushing after each batch ."""
21+ # Clone target repository once for all batches
2322 repo_dir , original_cwd = clone_target_repository_to_temp (verbose = verbose )
2423 if repo_dir is None :
2524 raise RuntimeError ("Failed to clone target repository" )
@@ -28,60 +27,75 @@ def process_files_with_git_batch(json_files, output_dir, verbose, predocs):
2827 # Change to cloned repository directory
2928 os .chdir (repo_dir )
3029
31- # Create unique branch name for this batch
30+ # Create unique branch name for this entire operation
3231 timestamp = datetime .now ().strftime ("%Y%m%d_%H%M%S" )
3332 random_suffix = random .randint (1000 , 9999 )
3433 unique_branch = f"batch_{ timestamp } _{ random_suffix } "
3534
36- # Create and checkout new branch directly
37- try :
38- subprocess .run (['git' , 'checkout' , '-b' , unique_branch ],
39- check = True , capture_output = True , timeout = GIT_TIMEOUT )
40- if verbose :
41- print (f"Skapade och bytte till branch '{ unique_branch } ' för batch-commits" )
42- except subprocess .CalledProcessError as e :
43- print (f"Fel: Kunde inte skapa git branch: { e } " )
35+ # Create and checkout new branch
36+ if not checkout_branch (unique_branch , create_if_missing = True , verbose = verbose ):
37+ print (f"Fel: Kunde inte skapa git branch: { unique_branch } " )
4438 return
4539
46- # Process each JSON file
47- from sfs_processor import make_document
48- for json_file in json_files :
49- # Use absolute path since we changed working directory
50- abs_json_file = Path (original_cwd ) / json_file
51- try :
52- with open (abs_json_file , 'r' , encoding = 'utf-8' ) as f :
53- data = json .load (f )
54- except (json .JSONDecodeError , FileNotFoundError ) as e :
55- print (f"Fel vid läsning av { abs_json_file } : { e } " )
56- continue
57-
58- # Create documents in the cloned repository AND save to original output directory
59- # First convert to absolute path since we changed working directory
60- original_output_dir = Path (original_cwd ) / Path (output_dir ).name if not Path (output_dir ).is_absolute () else Path (output_dir )
61- make_document (data , original_output_dir , ["git" ], True , verbose , True , predocs , True )
62-
63- # Push all commits to target repository
64- if verbose :
65- print (f"Pushar batch till target repository..." )
66-
67- from exporters .git .git_utils import push_to_target_repository
68- if push_to_target_repository (unique_branch , 'origin' , verbose ):
69- print (f"Batch pushad till target repository som branch '{ unique_branch } '" )
40+ # Split files into batches
41+ total_files = len (json_files )
42+ if total_files > batch_size :
43+ print (f"Delar upp { total_files } filer i batcher om { batch_size } filer var" )
44+ batches = [json_files [i :i + batch_size ] for i in range (0 , total_files , batch_size )]
45+ print (f"Skapade { len (batches )} batcher" )
46+
47+ # Process each batch in the same repository and branch, pushing after each
48+ for i , batch in enumerate (batches , 1 ):
49+ print (f"\n Bearbetar batch { i } /{ len (batches )} ({ len (batch )} filer)..." )
50+ _process_batch_files (batch , output_dir , verbose , predocs , original_cwd , i , len (batches ))
51+
52+ # Push after each batch
53+ print (f"Pushar batch { i } /{ len (batches )} till target repository..." )
54+ if push_to_target_repository (unique_branch , 'origin' , verbose ):
55+ print (f"Batch { i } /{ len (batches )} pushad till target repository som branch '{ unique_branch } '" )
56+ else :
57+ print (f"Misslyckades med att pusha batch { i } /{ len (batches )} till target repository" )
7058 else :
71- print (f"Misslyckades med att pusha batch till target repository" )
59+ print (f"Bearbetar { total_files } filer i en enda batch..." )
60+ _process_batch_files (json_files , output_dir , verbose , predocs , original_cwd , 1 , 1 )
61+
62+ # Push the single batch
63+ print (f"Pushar alla { total_files } filer till target repository..." )
64+ if push_to_target_repository (unique_branch , 'origin' , verbose ):
65+ print (f"Alla { total_files } filer pushade till target repository som branch '{ unique_branch } '" )
66+ else :
67+ print (f"Misslyckades med att pusha till target repository" )
7268
73- except subprocess .CalledProcessError as e :
74- print (f"Fel vid git batch processing: { e } " )
75- if hasattr (e , 'stderr' ) and e .stderr :
76- print (f"Git stderr: { e .stderr .decode ('utf-8' , errors = 'replace' )} " )
7769 except Exception as e :
7870 print (f"Oväntat fel vid git batch processing: { e } " )
7971 finally :
8072 # Always change back to original directory
8173 os .chdir (original_cwd )
82- # Clean up temporary directory
74+
75+
76+ def _process_batch_files (json_files , output_dir , verbose , predocs , original_cwd , batch_num , total_batches ):
77+ """Process batch files in the current repository without creating new branches."""
78+ # Process each JSON file in the current git repository
79+ from sfs_processor import make_document
80+ for json_file in json_files :
81+ # Use absolute path since we changed working directory
82+ abs_json_file = Path (original_cwd ) / json_file
8383 try :
84- shutil .rmtree (repo_dir .parent )
85- except Exception as e :
86- if verbose :
87- print (f"Varning: Kunde inte rensa temporär katalog: { e } " )
84+ with open (abs_json_file , 'r' , encoding = 'utf-8' ) as f :
85+ data = json .load (f )
86+ except (json .JSONDecodeError , FileNotFoundError ) as e :
87+ print (f"Fel vid läsning av { abs_json_file } : { e } " )
88+ continue
89+
90+ # Create documents in the cloned repository AND save to original output directory
91+ # First convert to absolute path since we changed working directory
92+ if not Path (output_dir ).is_absolute ():
93+ original_output_dir = Path (original_cwd ) / Path (output_dir ).name
94+ else :
95+ original_output_dir = Path (output_dir )
96+ make_document (data , original_output_dir , ["git" ], True , verbose , True , predocs , True )
97+
98+ if verbose :
99+ print (f"Batch { batch_num } /{ total_batches } bearbetad ({ len (json_files )} filer)" )
100+
101+
0 commit comments