Skip to content

Commit 138ddff

Browse files
committed
Refactor: unified error handling in DSDownload class methods
1 parent 91813e4 commit 138ddff

1 file changed

Lines changed: 9 additions & 7 deletions

File tree

edge/helper/setOrchestrator.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class Ident:
1616
@staticmethod
1717
async def remote_file_type(url: str) -> str:
1818
#identifies the file type of a remote file by reading a chunk of it
19-
19+
2020
session = await SessionFactory().grab_session()
2121
async with session.get(url) as response:
2222
mime_header = response.headers.get('Content-Type', '').lower()
@@ -94,17 +94,17 @@ async def github_download(url: str) -> dict:
9494
#expects a root github repo url
9595

9696
try:
97-
# Parse out the raw github content url
97+
#parse out the raw github content url
9898
raw_url = await URLParser.parse_github_url(url)
9999
metadata_url = f"{raw_url}/metadata.json"
100100

101101
networking_logger.info(f"GitHub Download: Fetching metadata from {metadata_url}")
102102

103-
# Download and parse metadata file
103+
#download and parse metadata file
104104
metadata_content_raw = await DownloadManager.download_file(metadata_url)
105105
metadata_content = json.loads(metadata_content_raw)
106106

107-
# Get the list of relevant files from metadata
107+
#get list of relevant files from metadata
108108
relevent_files = metadata_content.get(CoreDatasetMetadata.relevent_files, [])
109109

110110
if not relevent_files:
@@ -113,15 +113,17 @@ async def github_download(url: str) -> dict:
113113

114114
networking_logger.info(f"GitHub Download: Found {len(relevent_files)} files in metadata")
115115

116-
# Remove unnecessary files from the list
116+
#remove unnecessary files from the list (ie, metadata itself, and domains)
117117
for file_to_remove in SetDownload.removed_files:
118118
if file_to_remove in relevent_files:
119119
relevent_files.remove(file_to_remove)
120120
networking_logger.debug(f"GitHub Download: Removed {file_to_remove} from download list")
121121

122-
# Download each relevant file
122+
#download each relevant file referenced in metadata
123123
downloaded_count = 0
124+
124125
for file_to_download in relevent_files:
126+
125127
try:
126128
file_url = f"{raw_url}/{file_to_download}"
127129
networking_logger.debug(f"GitHub Download: Downloading {file_to_download} from {file_url}")
@@ -204,4 +206,4 @@ async def _process_dataset_content(content, source_identifier: str, source_type:
204206

205207
except Exception as e:
206208
networking_logger.error(f"Error processing dataset content from {source_identifier}: {str(e)}")
207-
raise
209+
return {"success": False, "source_identifier": source_identifier, "source_type": source_type, "message": str(e)}

0 commit comments

Comments
 (0)