@@ -158,6 +158,8 @@ def on_start(self, aggregator_pool: AggregatorPool) -> None:
158158 local_sqlite = self .sqlite_path
159159 if self .sqlite_local_build_path is not None :
160160 local_sqlite = EPath (self .sqlite_local_build_path )
161+ if self .sqlite_path .is_file ():
162+ self .sqlite_path .copy (local_sqlite )
161163 self .writer = SqliteIndexWriter (
162164 local_sqlite ,
163165 enable_sample_tables = self .enable_sample_tables ,
@@ -565,7 +567,9 @@ def prepare_dataset(
565567 remote_sqlite_tmp_dir : Optional [Path ] = None
566568 if not parent_path .is_local ():
567569 if index_sqlite_tmp_path is None :
568- remote_sqlite_tmp_dir = Path (tempfile .mkdtemp (dir = "/tmp" , prefix = "energon-prepare-" ))
570+ remote_sqlite_tmp_dir = Path (
571+ tempfile .mkdtemp (dir = "/tmp" , prefix = "energon-prepare-" )
572+ )
569573 index_sqlite_tmp_path = remote_sqlite_tmp_dir / INDEX_SQLITE_FILENAME
570574 owns_remote_sqlite_tmp = True
571575 else :
@@ -618,7 +622,9 @@ def prepare_dataset(
618622 # Fix permissions if needed
619623 if fix_local_permissions :
620624 try :
621- Path (str (parent_path / MAIN_FOLDER_NAME / INDEX_SQLITE_FILENAME )).chmod (file_perms )
625+ Path (str (parent_path / MAIN_FOLDER_NAME / INDEX_SQLITE_FILENAME )).chmod (
626+ file_perms
627+ )
622628 except OSError :
623629 pass
624630
@@ -703,7 +709,9 @@ def prepare_dataset(
703709 for split_part , split_ratio in split_parts_ratio :
704710 split_total += split_ratio
705711 split_end = int (len (shards ) * split_total )
706- split_shards [split_part ] = [shard .name for shard in shards [split_offset :split_end ]]
712+ split_shards [split_part ] = [
713+ shard .name for shard in shards [split_offset :split_end ]
714+ ]
707715 split_offset = split_end
708716 else :
709717 assert split_parts_patterns is not None , (
@@ -749,7 +757,6 @@ def prepare_dataset(
749757 if owns_remote_sqlite_tmp and remote_sqlite_tmp_dir is not None :
750758 shutil .rmtree (remote_sqlite_tmp_dir , ignore_errors = True )
751759
752-
753760 @classmethod
754761 def add_media_metadata (
755762 cls ,
@@ -781,12 +788,16 @@ def add_media_metadata(
781788 remote_sqlite_tmp_dir : Optional [Path ] = None
782789 if not parent_path .is_local ():
783790 if index_sqlite_tmp_path is None :
784- remote_sqlite_tmp_dir = Path (tempfile .mkdtemp (dir = "/tmp" , prefix = "energon-prepare-media-" ))
791+ remote_sqlite_tmp_dir = Path (
792+ tempfile .mkdtemp (dir = "/tmp" , prefix = "energon-prepare-media-" )
793+ )
785794 index_sqlite_tmp_path = remote_sqlite_tmp_dir / INDEX_SQLITE_FILENAME
786795 owns_remote_sqlite_tmp = True
787796 else :
788797 index_sqlite_tmp_path = None
789798
799+ sqlite_path = parent_path / MAIN_FOLDER_NAME / INDEX_SQLITE_FILENAME
800+
790801 try :
791802 aggregator = SqliteIndexWriterAggregator (
792803 parent_path / MAIN_FOLDER_NAME / INDEX_SQLITE_FILENAME ,
@@ -818,6 +829,18 @@ def add_media_metadata(
818829
819830 pool .process ()
820831
832+ if sqlite_path .is_local ():
833+ try :
834+ meta_dir = (parent_path / MAIN_FOLDER_NAME ).local_path ()
835+ # Copy group permissions from the parent dir
836+ meta_dir .chmod ((parent_path .local_path ().stat ().st_mode | 0o700 ))
837+ # Just read/write, no execute
838+ sqlite_path .local_path ().chmod (
839+ (parent_path .local_path ().stat ().st_mode | 0o600 ) & 0o666
840+ )
841+ except OSError :
842+ pass
843+
821844 return aggregator .media_metadata_written
822845 finally :
823846 if owns_remote_sqlite_tmp and remote_sqlite_tmp_dir is not None :
0 commit comments