Skip to content
This repository was archived by the owner on Mar 31, 2026. It is now read-only.

Commit 700fec3

Browse files
authored
fix(transfer_manager): Prevent path traversal in download_many_to_path (#1768)
fix(transfer_manager): Prevent path traversal in download_many_to_path This PR addresses a security vulnerability where `download_many_to_path` could be exploited to write files outside the intended destination directory. The fix ensures that the resolved path for each blob download remains within the bounds of the user-provided `destination_directory`. If a blob name would result in a path outside this directory (e.g., by using `../`), a warning is issued, and that specific blob download is skipped. This prevents directory traversal attacks. Absolute paths in blob names (e.g., `/etc/passwd`) are now treated as relative to the `destination_directory`, so `/etc/passwd` will be downloaded to `destination_directory/etc/passwd`. See b/449616593 for more details. BREAKING CHANGE: Blobs that would resolve to a path outside the `destination_directory` are no longer downloaded. While this is a security fix, users relying on the previous behavior to write files outside the target directory will see a change.
1 parent 8dd0a80 commit 700fec3

File tree

3 files changed

+351
-16
lines changed

3 files changed

+351
-16
lines changed

google/cloud/storage/transfer_manager.py

Lines changed: 86 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import struct
2626
import base64
2727
import functools
28+
from pathlib import Path
2829

2930
from google.api_core import exceptions
3031
from google.cloud.storage import Client
@@ -231,9 +232,11 @@ def upload_many(
231232
executor.submit(
232233
_call_method_on_maybe_pickled_blob,
233234
_pickle_client(blob) if needs_pickling else blob,
234-
"_handle_filename_and_upload"
235-
if isinstance(path_or_file, str)
236-
else "_prep_and_do_upload",
235+
(
236+
"_handle_filename_and_upload"
237+
if isinstance(path_or_file, str)
238+
else "_prep_and_do_upload"
239+
),
237240
path_or_file,
238241
**upload_kwargs,
239242
)
@@ -259,6 +262,16 @@ def upload_many(
259262
return results
260263

261264

265+
def _resolve_path(target_dir, blob_path):
266+
target_dir = Path(target_dir)
267+
blob_path = Path(blob_path)
268+
# blob_path.anchor will be '/' if `blob_path` is full path else it'll empty.
269+
# This is useful to concatnate target_dir = /local/target , and blob_path =
270+
# /usr/local/mybin into /local/target/usr/local/mybin
271+
concatenated_path = target_dir / blob_path.relative_to(blob_path.anchor)
272+
return concatenated_path.resolve()
273+
274+
262275
@_deprecate_threads_param
263276
def download_many(
264277
blob_file_pairs,
@@ -384,9 +397,11 @@ def download_many(
384397
executor.submit(
385398
_call_method_on_maybe_pickled_blob,
386399
_pickle_client(blob) if needs_pickling else blob,
387-
"_handle_filename_and_download"
388-
if isinstance(path_or_file, str)
389-
else "_prep_and_do_download",
400+
(
401+
"_handle_filename_and_download"
402+
if isinstance(path_or_file, str)
403+
else "_prep_and_do_download"
404+
),
390405
path_or_file,
391406
**download_kwargs,
392407
)
@@ -618,7 +633,8 @@ def download_many_to_path(
618633
"""Download many files concurrently by their blob names.
619634
620635
The destination files are automatically created, with paths based on the
621-
source blob_names and the destination_directory.
636+
source `blob_names` and the `destination_directory`.
637+
622638
623639
The destination files are not automatically deleted if their downloads fail,
624640
so please check the return value of this function for any exceptions, or
@@ -629,6 +645,50 @@ def download_many_to_path(
629645
"images/icon.jpg" will be downloaded to a file named
630646
"/home/myuser/icon.jpg".
631647
648+
649+
Note1: if the path after combining `blob_name` and `destination_directory`
650+
resolves outside `destination_directory` a warning will be issued and the
651+
that particular blob will NOT be downloaded. This may happen in scenarios
652+
where `blob_name` contains "../"
653+
654+
For example,
655+
consider `destination_directory` is "downloads/gcs_blobs" and
656+
`blob_name` is '../hello.blob'. This blob will not be downloaded
657+
because the final resolved path would be "downloads/hello.blob"
658+
659+
660+
To give further examples, the following blobs will not be downloaded because
661+
it "escapes" the "destination_directory"
662+
663+
"../../local/target", # skips download
664+
"../escape.txt", # skips download
665+
"go/four/levels/deep/../../../../../somefile1", # skips download
666+
"go/four/levels/deep/../some_dir/../../../../../invalid/path1" # skips download
667+
668+
however the following blobs will be downloaded because the final resolved
669+
destination_directory is still child of given destination_directory
670+
671+
"data/../sibling.txt",
672+
"dir/./file.txt",
673+
"go/four/levels/deep/../somefile2",
674+
"go/four/levels/deep/../some_dir/valid/path1",
675+
"go/four/levels/deep/../some_dir/../../../../valid/path2",
676+
677+
It is adviced to use other APIs such as `transfer_manager.download_many` or
678+
`Blob.download_to_filename` or `Blob.download_to_file` to download such blobs.
679+
680+
681+
Note2:
682+
The resolved download_directory will always be relative to user provided
683+
`destination_directory`. For example,
684+
685+
a `blob_name` "/etc/passwd" will be downloaded into
686+
"destination_directory/etc/passwd" instead of "/etc/passwd"
687+
Similarly,
688+
"/tmp/my_fav_blob" downloads to "destination_directory/tmp/my_fav_blob"
689+
690+
691+
632692
:type bucket: :class:`google.cloud.storage.bucket.Bucket`
633693
:param bucket:
634694
The bucket which contains the blobs to be downloaded
@@ -646,18 +706,18 @@ def download_many_to_path(
646706
647707
:type destination_directory: str
648708
:param destination_directory:
649-
A string that will be prepended (with os.path.join()) to each blob_name
650-
in the input list, in order to determine the destination path for that
651-
blob.
709+
A string that will be prepended to each blob_name in the input list, in
710+
order to determine the destination path for that blob.
652711
653712
For instance, if the destination_directory string is "/tmp/img" and a
654713
blob_name is "0001.jpg", with an empty blob_name_prefix, then the source
655714
blob "0001.jpg" will be downloaded to destination "/tmp/img/0001.jpg" .
656715
657716
This parameter can be an empty string.
658717
659-
Note that this parameter allows directory traversal (e.g. "/", "../")
660-
and is not intended for unsanitized end user input.
718+
Note directory traversal may be possible as long as the final
719+
(e.g. "/", "../") resolved path is inside "destination_directory".
720+
See examples above.
661721
662722
:type blob_name_prefix: str
663723
:param blob_name_prefix:
@@ -755,11 +815,22 @@ def download_many_to_path(
755815

756816
for blob_name in blob_names:
757817
full_blob_name = blob_name_prefix + blob_name
758-
path = os.path.join(destination_directory, blob_name)
818+
resolved_path = _resolve_path(destination_directory, blob_name)
819+
if not resolved_path.parent.is_relative_to(
820+
Path(destination_directory).resolve()
821+
):
822+
warnings.warn(
823+
f"The blob {blob_name} will **NOT** be downloaded. "
824+
f"The resolved destination_directory - {resolved_path.parent} - is either invalid or "
825+
f"escapes user provided {Path(destination_directory).resolve()} . Please download this file separately using `download_to_filename`"
826+
)
827+
continue
828+
829+
resolved_path = str(resolved_path)
759830
if create_directories:
760-
directory, _ = os.path.split(path)
831+
directory, _ = os.path.split(resolved_path)
761832
os.makedirs(directory, exist_ok=True)
762-
blob_file_pairs.append((bucket.blob(full_blob_name), path))
833+
blob_file_pairs.append((bucket.blob(full_blob_name), resolved_path))
763834

764835
return download_many(
765836
blob_file_pairs,

tests/system/test_transfer_manager.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,151 @@ def test_upload_many_from_filenames_with_attributes(
121121
assert blob.cache_control == "no-cache"
122122

123123

124+
@pytest.mark.parametrize(
125+
"blobname",
126+
[
127+
"../../local/target", # skips download
128+
"../escape.txt", # skips download
129+
"go/four/levels/deep/../../../../../somefile1", # skips download
130+
"go/four/levels/deep/../some_dir/../../../../../invalid/path1", # skips download
131+
],
132+
)
133+
def test_download_many_to_path_skips_download(
134+
shared_bucket, file_data, blobs_to_delete, blobname
135+
):
136+
"""
137+
Test downloading blobs with traversal skipped
138+
"""
139+
# Setup
140+
BLOBNAMES = [blobname]
141+
142+
FILE_BLOB_PAIRS = [
143+
(
144+
file_data["simple"]["path"],
145+
shared_bucket.blob("folder_traversal/" + blob_name),
146+
)
147+
for blob_name in BLOBNAMES
148+
]
149+
150+
results = transfer_manager.upload_many(
151+
FILE_BLOB_PAIRS,
152+
skip_if_exists=True,
153+
deadline=DEADLINE,
154+
)
155+
for result in results:
156+
assert result is None
157+
158+
blobs = list(shared_bucket.list_blobs(prefix="folder_traversal/"))
159+
blobs_to_delete.extend(blobs)
160+
161+
# We expect 1 blob uploaded for this test parametrization
162+
assert len(list(b for b in blobs if b.name == "folder_traversal/" + blobname)) == 1
163+
164+
# Actual Test
165+
with tempfile.TemporaryDirectory() as tempdir:
166+
import warnings
167+
168+
with warnings.catch_warnings(record=True) as w:
169+
warnings.simplefilter("always")
170+
results = transfer_manager.download_many_to_path(
171+
shared_bucket,
172+
BLOBNAMES,
173+
destination_directory=tempdir,
174+
blob_name_prefix="folder_traversal/",
175+
deadline=DEADLINE,
176+
create_directories=True,
177+
)
178+
179+
path_traversal_warnings = [
180+
warning
181+
for warning in w
182+
if str(warning.message).startswith("The blob ")
183+
and "will **NOT** be downloaded. The resolved destination_directory"
184+
in str(warning.message)
185+
]
186+
assert len(path_traversal_warnings) == 1, "---".join(
187+
[str(warning.message) for warning in w]
188+
)
189+
190+
# 1 total - 1 skipped = 0 results
191+
assert len(results) == 0
192+
193+
194+
@pytest.mark.parametrize(
195+
"blobname",
196+
[
197+
"simple_blob",
198+
"data/file.txt",
199+
"data/../sibling.txt",
200+
"/etc/passwd",
201+
"/local/usr/a.txt",
202+
"dir/./file.txt",
203+
"go/four/levels/deep/../somefile2",
204+
"go/four/levels/deep/../some_dir/valid/path1",
205+
"go/four/levels/deep/../some_dir/../../../../valid/path2",
206+
],
207+
)
208+
def test_download_many_to_path_downloads_within_dest_dir(
209+
shared_bucket, file_data, blobs_to_delete, blobname
210+
):
211+
"""
212+
Test downloading blobs with valid traversal
213+
"""
214+
# Setup
215+
BLOBNAMES = [blobname]
216+
217+
FILE_BLOB_PAIRS = [
218+
(
219+
file_data["simple"]["path"],
220+
shared_bucket.blob("folder_traversal/" + blob_name),
221+
)
222+
for blob_name in BLOBNAMES
223+
]
224+
225+
results = transfer_manager.upload_many(
226+
FILE_BLOB_PAIRS,
227+
skip_if_exists=True,
228+
deadline=DEADLINE,
229+
)
230+
for result in results:
231+
assert result is None
232+
233+
blobs = list(shared_bucket.list_blobs(prefix="folder_traversal/"))
234+
blobs_to_delete.extend(blobs)
235+
236+
assert len(list(b for b in blobs if b.name == "folder_traversal/" + blobname)) == 1
237+
238+
# Actual Test
239+
with tempfile.TemporaryDirectory() as tempdir:
240+
results = transfer_manager.download_many_to_path(
241+
shared_bucket,
242+
BLOBNAMES,
243+
destination_directory=tempdir,
244+
blob_name_prefix="folder_traversal/",
245+
deadline=DEADLINE,
246+
create_directories=True,
247+
)
248+
249+
assert len(results) == 1
250+
for result in results:
251+
assert result is None
252+
253+
# Verify the file exists and contents match
254+
from google.cloud.storage.transfer_manager import _resolve_path
255+
from pathlib import Path
256+
257+
expected_file_path = Path(_resolve_path(tempdir, blobname))
258+
assert expected_file_path.is_file()
259+
260+
with open(file_data["simple"]["path"], "rb") as source_file:
261+
source_contents = source_file.read()
262+
263+
with expected_file_path.open("rb") as downloaded_file:
264+
downloaded_contents = downloaded_file.read()
265+
266+
assert downloaded_contents == source_contents
267+
268+
124269
def test_download_many(listable_bucket):
125270
blobs = list(listable_bucket.list_blobs())
126271
with tempfile.TemporaryDirectory() as tempdir:

0 commit comments

Comments
 (0)