Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -244,12 +244,23 @@ def download(
:type max_concurrency: int
"""
try:
resolved_destination = Path(destination).resolve()
my_list = list(self.container_client.list_blobs(name_starts_with=starts_with, include="metadata"))
Comment thread
ayushhgarg-work marked this conversation as resolved.
download_size_in_mb = 0
for item in my_list:
blob_name = item.name[len(starts_with) :].lstrip("/") or Path(starts_with).name
target_path = Path(destination, blob_name).resolve()

# Prevent path traversal: ensure target is within the destination directory
try:
target_path.relative_to(resolved_destination)
except ValueError:
module_logger.warning(
Comment thread
ayushhgarg-work marked this conversation as resolved.
"Skipping blob '%s': resolved path is outside the destination directory.",
item.name,
)
continue

if _blob_is_hdi_folder(item):
target_path.mkdir(parents=True, exist_ok=True)
continue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -399,23 +399,46 @@ def recursive_download(
:type starts_with: str
"""
try:
resolved_destination = Path(destination).resolve()
items = list(client.list_directories_and_files(name_starts_with=starts_with))
files = [item for item in items if not item["is_directory"]]
folders = [item for item in items if item["is_directory"]]

for f in files:
Path(destination).mkdir(parents=True, exist_ok=True)
file_name = f["name"]
local_path = Path(destination, file_name)

# Prevent path traversal: ensure target is within the destination directory
try:
local_path.resolve().relative_to(resolved_destination)
except ValueError:
module_logger.warning(
"Skipping file '%s': resolved path is outside the destination directory.",
file_name,
)
continue

file_client = client.get_file_client(file_name)
file_content = file_client.download_file(max_concurrency=max_concurrency)
local_path = Path(destination, file_name)
with open(local_path, "wb") as file_data:
file_data.write(file_content.readall())

for f in folders:
sub_client = client.get_subdirectory_client(f["name"])
destination = "/".join((destination, f["name"]))
recursive_download(sub_client, destination=destination, max_concurrency=max_concurrency)
sub_destination = str(Path(destination) / f["name"])

# Prevent path traversal: ensure subdirectory is within the destination directory
try:
Path(sub_destination).resolve().relative_to(resolved_destination)
except ValueError:
module_logger.warning(
"Skipping directory '%s': resolved path is outside the destination directory.",
f["name"],
)
continue

recursive_download(sub_client, destination=sub_destination, max_concurrency=max_concurrency)
except Exception as e:
msg = f"Saving fileshare directory with prefix {starts_with} was unsuccessful."
raise MlException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,12 +204,23 @@ def download(self, starts_with: str, destination: Union[str, os.PathLike] = Path
:type destination: Union[str, os.PathLike]
"""
try:
resolved_destination = Path(destination).resolve()
mylist = self.file_system_client.get_paths(path=starts_with)
download_size_in_mb = 0
for item in mylist:
file_name = item.name[len(starts_with) :].lstrip("/") or Path(starts_with).name
target_path = Path(destination, file_name)

# Prevent path traversal: ensure target is within the destination directory
try:
target_path.resolve().relative_to(resolved_destination)
except ValueError:
module_logger.warning(
"Skipping path '%s': resolved path is outside the destination directory.",
item.name,
)
continue

if item.is_directory:
target_path.mkdir(parents=True, exist_ok=True)
continue
Expand Down