@@ -16,7 +16,7 @@ class Ident:
1616 @staticmethod
1717 async def remote_file_type (url : str ) -> str :
1818 #identifies the file type of a remote file by reading a chunk of it
19-
19+
2020 session = await SessionFactory ().grab_session ()
2121 async with session .get (url ) as response :
2222 mime_header = response .headers .get ('Content-Type' , '' ).lower ()
@@ -94,17 +94,17 @@ async def github_download(url: str) -> dict:
9494 #expects a root github repo url
9595
9696 try :
97- # Parse out the raw github content url
97+ #parse out the raw github content url
9898 raw_url = await URLParser .parse_github_url (url )
9999 metadata_url = f"{ raw_url } /metadata.json"
100100
101101 networking_logger .info (f"GitHub Download: Fetching metadata from { metadata_url } " )
102102
103- # Download and parse metadata file
103+ #download and parse metadata file
104104 metadata_content_raw = await DownloadManager .download_file (metadata_url )
105105 metadata_content = json .loads (metadata_content_raw )
106106
107- # Get the list of relevant files from metadata
107+ #get list of relevant files from metadata
108108 relevent_files = metadata_content .get (CoreDatasetMetadata .relevent_files , [])
109109
110110 if not relevent_files :
@@ -113,15 +113,17 @@ async def github_download(url: str) -> dict:
113113
114114 networking_logger .info (f"GitHub Download: Found { len (relevent_files )} files in metadata" )
115115
116- # Remove unnecessary files from the list
116+ #remove unnecessary files from the list (ie, metadata itself, and domains)
117117 for file_to_remove in SetDownload .removed_files :
118118 if file_to_remove in relevent_files :
119119 relevent_files .remove (file_to_remove )
120120 networking_logger .debug (f"GitHub Download: Removed { file_to_remove } from download list" )
121121
122- # Download each relevant file
122+ #download each relevant file referenced in metadata
123123 downloaded_count = 0
124+
124125 for file_to_download in relevent_files :
126+
125127 try :
126128 file_url = f"{ raw_url } /{ file_to_download } "
127129 networking_logger .debug (f"GitHub Download: Downloading { file_to_download } from { file_url } " )
@@ -204,4 +206,4 @@ async def _process_dataset_content(content, source_identifier: str, source_type:
204206
205207 except Exception as e :
206208 networking_logger .error (f"Error processing dataset content from { source_identifier } : { str (e )} " )
207- raise
209+ return { "success" : False , "source_identifier" : source_identifier , "source_type" : source_type , "message" : str ( e )}
0 commit comments