Skip to content
This repository was archived by the owner on Mar 31, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions google/cloud/storage/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@
DataCorruptionDynamicParent = Exception


class InvalidPathError(Exception):
"""Raised when the provided path string is malformed."""

pass


class InvalidResponse(InvalidResponseDynamicParent):
"""Error class for responses which are not in the correct state.

Expand Down
44 changes: 34 additions & 10 deletions google/cloud/storage/transfer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

from google.cloud.storage._media.requests.upload import XMLMPUContainer
from google.cloud.storage._media.requests.upload import XMLMPUPart
from google.cloud.storage.exceptions import DataCorruption
from google.cloud.storage.exceptions import DataCorruption, InvalidPathError

TM_DEFAULT_CHUNK_SIZE = 32 * 1024 * 1024
DEFAULT_MAX_WORKERS = 8
Expand Down Expand Up @@ -263,6 +263,8 @@ def upload_many(


def _resolve_path(target_dir, blob_path):
if os.name == "nt" and ":" in blob_path:
raise InvalidPathError(f"{blob_path} cannot be downloaded into {target_dir}")
Comment thread
chandra-siri marked this conversation as resolved.
target_dir = Path(target_dir)
blob_path = Path(blob_path)
# blob_path.anchor will be '/' if `blob_path` is full path else it'll empty.
Expand Down Expand Up @@ -805,43 +807,65 @@ def download_many_to_path(

:raises: :exc:`concurrent.futures.TimeoutError` if deadline is exceeded.

:rtype: list
:rtype: List[None|Exception|UserWarning]
:returns: A list of results corresponding to, in order, each item in the
input list. If an exception was received, it will be the result
for that operation. Otherwise, the return value from the successful
download method is used (which will be None).
input list. If an exception was received or a download was skipped
(e.g., due to existing file or path traversal), it will be the result
for that operation (as an Exception or UserWarning, respectively).
Otherwise, the result will be None for a successful download.
"""
results = [None] * len(blob_names)
blob_file_pairs = []
indices_to_process = []

for blob_name in blob_names:
for i, blob_name in enumerate(blob_names):
full_blob_name = blob_name_prefix + blob_name
resolved_path = _resolve_path(destination_directory, blob_name)
try:
resolved_path = _resolve_path(destination_directory, blob_name)
except InvalidPathError as e:
msg = f"The blob {blob_name} will **NOT** be downloaded. {e}"
warnings.warn(msg)
results[i] = UserWarning(msg)
Comment thread
chandra-siri marked this conversation as resolved.
continue
if not resolved_path.parent.is_relative_to(
Path(destination_directory).resolve()
):
warnings.warn(
msg = (
f"The blob {blob_name} will **NOT** be downloaded. "
f"The resolved destination_directory - {resolved_path.parent} - is either invalid or "
f"escapes user provided {Path(destination_directory).resolve()} . Please download this file separately using `download_to_filename`"
)
warnings.warn(msg)
results[i] = UserWarning(msg)
continue

resolved_path = str(resolved_path)
if skip_if_exists and os.path.isfile(resolved_path):
msg = f"The blob {blob_name} is skipped because destination file already exists"
results[i] = UserWarning(msg)
continue

if create_directories:
directory, _ = os.path.split(resolved_path)
os.makedirs(directory, exist_ok=True)
blob_file_pairs.append((bucket.blob(full_blob_name), resolved_path))
indices_to_process.append(i)

return download_many(
many_results = download_many(
blob_file_pairs,
download_kwargs=download_kwargs,
deadline=deadline,
raise_exception=raise_exception,
worker_type=worker_type,
max_workers=max_workers,
skip_if_exists=skip_if_exists,
skip_if_exists=False,
)

for meta_index, result in zip(indices_to_process, many_results):
results[meta_index] = result

return results


def download_chunks_concurrently(
blob,
Expand Down
86 changes: 84 additions & 2 deletions tests/system/test_transfer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,9 @@ def test_download_many_to_path_skips_download(
[str(warning.message) for warning in w]
)

# 1 total - 1 skipped = 0 results
assert len(results) == 0
# 1 total - 1 skipped = 1 result (containing Warning)
assert len(results) == 1
assert isinstance(results[0], UserWarning)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -266,6 +267,87 @@ def test_download_many_to_path_downloads_within_dest_dir(
assert downloaded_contents == source_contents



def test_download_many_to_path_mixed_results(
shared_bucket, file_data, blobs_to_delete
):
"""
Test download_many_to_path with successful downloads, skip_if_exists skips, and path traversal skips.
"""
PREFIX = "mixed_results/"
BLOBNAMES = [
"success1.txt",
"success2.txt",
"exists.txt",
"../escape.txt"
]

FILE_BLOB_PAIRS = [
(
file_data["simple"]["path"],
shared_bucket.blob(PREFIX + name),
)
for name in BLOBNAMES
]

results = transfer_manager.upload_many(
FILE_BLOB_PAIRS,
skip_if_exists=True,
deadline=DEADLINE,
)
for result in results:
assert result is None

blobs = list(shared_bucket.list_blobs(prefix=PREFIX))
blobs_to_delete.extend(blobs)
assert len(blobs) == 4

# Actual Test
with tempfile.TemporaryDirectory() as tempdir:
existing_file_path = os.path.join(tempdir, "exists.txt")
with open(existing_file_path, "w") as f:
f.write("already here")

import warnings
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
results = transfer_manager.download_many_to_path(
shared_bucket,
BLOBNAMES,
destination_directory=tempdir,
blob_name_prefix=PREFIX,
deadline=DEADLINE,
create_directories=True,
skip_if_exists=True,
)

assert len(results) == 4

path_traversal_warnings = [
warning
for warning in w
if str(warning.message).startswith("The blob ")
and "will **NOT** be downloaded. The resolved destination_directory"
in str(warning.message)
]
assert len(path_traversal_warnings) == 1, "---".join(
[str(warning.message) for warning in w]
)

assert results[0] is None
assert results[1] is None
assert isinstance(results[2], UserWarning)
assert "skipped because destination file already exists" in str(results[2])
assert isinstance(results[3], UserWarning)
assert "will **NOT** be downloaded" in str(results[3])

assert os.path.exists(os.path.join(tempdir, "success1.txt"))
assert os.path.exists(os.path.join(tempdir, "success2.txt"))

with open(existing_file_path, "r") as f:
assert f.read() == "already here"


def test_download_many(listable_bucket):
blobs = list(listable_bucket.list_blobs())
with tempfile.TemporaryDirectory() as tempdir:
Expand Down
Loading