@@ -128,18 +128,15 @@ def download_content(self, temporary_file_path):
128128 logger = self .logger ,
129129 )
130130 is_zip = zipfile .is_zipfile (temporary_file_path )
131- if is_zip :
132- extracted_file_path = os .path .join (
133- temporary_file_path .split ("." )[0 ], "extracted"
134- )
135- with zipfile .ZipFile (temporary_file_path , "r" ) as zip_ref :
136- zip_ref .extractall (os .path .dirname (extracted_file_path ))
137- # List all files in the extracted directory
138- extracted_files = os .listdir (os .path .dirname (extracted_file_path ))
139- self .logger .info (f"Extracted files: { extracted_files } " )
140131 return file_hash , is_zip
141132
142- def upload_file_to_storage (self , source_file_path , dataset_stable_id ):
133+ def upload_file_to_storage (
134+ self ,
135+ source_file_path ,
136+ dataset_stable_id ,
137+ extracted_files_path ,
138+ public = True ,
139+ ):
143140 """
144141 Uploads a file to the GCP bucket
145142 """
@@ -153,12 +150,12 @@ def upload_file_to_storage(self, source_file_path, dataset_stable_id):
153150 blob = bucket .blob (target_path )
154151 with open (source_file_path , "rb" ) as file :
155152 blob .upload_from_file (file )
156- blob .make_public ()
153+ if public :
154+ blob .make_public ()
157155
158156 base_path , _ = os .path .splitext (source_file_path )
159- extracted_files_path = os .path .join (base_path , "extracted" )
160157 extracted_files : List [Gtfsfile ] = []
161- if not os .path .exists (extracted_files_path ):
158+ if not extracted_files_path or not os .path .exists (extracted_files_path ):
162159 self .logger .warning (
163160 f"Extracted files path { extracted_files_path } does not exist."
164161 )
@@ -170,7 +167,8 @@ def upload_file_to_storage(self, source_file_path, dataset_stable_id):
170167 f"{ self .feed_stable_id } /{ dataset_stable_id } /extracted/{ file_name } "
171168 )
172169 file_blob .upload_from_filename (file_path )
173- file_blob .make_public ()
170+ if public :
171+ file_blob .make_public ()
174172 self .logger .info (
175173 f"Uploaded extracted file { file_name } to { file_blob .public_url } "
176174 )
@@ -183,7 +181,7 @@ def upload_file_to_storage(self, source_file_path, dataset_stable_id):
183181 )
184182 return blob , extracted_files
185183
186- def upload_dataset (self ) -> DatasetFile or None :
184+ def upload_dataset (self , public = True ) -> DatasetFile or None :
187185 """
188186 Uploads a dataset to a GCP bucket as <feed_stable_id>/latest.zip and
189187 <feed_stable_id>/<feed_stable_id>-<upload_datetime>.zip
@@ -203,12 +201,12 @@ def upload_dataset(self) -> DatasetFile or None:
203201 self .logger .info (
204202 f"[{ self .feed_stable_id } ] File hash is { file_sha256_hash } ."
205203 )
206-
207204 if self .latest_hash != file_sha256_hash :
208205 self .logger .info (
209206 f"[{ self .feed_stable_id } ] Dataset has changed (hash { self .latest_hash } "
210207 f"-> { file_sha256_hash } ). Uploading new version."
211208 )
209+ extracted_files_path = self .unzip_files (temp_file_path )
212210 self .logger .info (
213211 f"Creating file { self .feed_stable_id } /latest.zip in bucket { self .bucket_name } "
214212 )
@@ -224,7 +222,10 @@ def upload_dataset(self) -> DatasetFile or None:
224222 f" in bucket { self .bucket_name } "
225223 )
226224 _ , extracted_files = self .upload_file_to_storage (
227- temp_file_path , dataset_stable_id
225+ temp_file_path ,
226+ dataset_stable_id ,
227+ extracted_files_path ,
228+ public = public ,
228229 )
229230
230231 return DatasetFile (
@@ -246,6 +247,18 @@ def upload_dataset(self) -> DatasetFile or None:
246247 os .remove (temp_file_path )
247248 return None
248249
250+ def unzip_files (self , temp_file_path ):
251+ extracted_files_path = os .path .join (temp_file_path .split ("." )[0 ], "extracted" )
252+ self .logger .info (f"Unzipping files to { extracted_files_path } " )
253+ # Create the directory for extracted files if it does not exist
254+ os .makedirs (extracted_files_path , exist_ok = True )
255+ with zipfile .ZipFile (temp_file_path , "r" ) as zip_ref :
256+ zip_ref .extractall (path = extracted_files_path )
257+ # List all files in the extracted directory
258+ extracted_files = os .listdir (extracted_files_path )
259+ self .logger .info (f"Extracted files: { extracted_files } " )
260+ return extracted_files_path
261+
249262 def generate_temp_filename (self ):
250263 """
251264 Generates a temporary filename
0 commit comments