Skip to content
This repository was archived by the owner on Mar 31, 2026. It is now read-only.

Commit 21bb20f

Browse files
authored
feat(samples): add argparse and clarify traversal support in download_many snippet (#1775)
This PR adds argparse support to the download_many snippet for CLI testing, and updates the description containing traversal safety.
1 parent 5581988 commit 21bb20f

File tree

2 files changed

+67
-12
lines changed

2 files changed

+67
-12
lines changed

google/cloud/storage/transfer_manager.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -867,7 +867,6 @@ def download_many_to_path(
867867
return results
868868

869869

870-
871870
def download_chunks_concurrently(
872871
blob,
873872
filename,

samples/snippets/storage_transfer_manager_download_many.py

Lines changed: 67 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,17 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
# Example usage:
16+
# python samples/snippets/storage_transfer_manager_download_many.py \
17+
# --bucket_name <your-bucket-name> \
18+
# --blobs <blob_name_1> <blob_name_2> \
19+
# --destination_directory <destination_directory> \
20+
# --blob_name_prefix <prefix>
21+
22+
1523
# [START storage_transfer_manager_download_many]
1624
def download_many_blobs_with_transfer_manager(
17-
bucket_name, blob_names, destination_directory="", workers=8
25+
bucket_name, blob_names, destination_directory="", blob_name_prefix="", workers=8
1826
):
1927
"""Download blobs in a list by name, concurrently in a process pool.
2028
@@ -36,11 +44,11 @@ def download_many_blobs_with_transfer_manager(
3644
# blob_names = ["myblob", "myblob2"]
3745

3846
# The directory on your computer to which to download all of the files. This
39-
# string is prepended (with os.path.join()) to the name of each blob to form
40-
# the full path. Relative paths and absolute paths are both accepted. An
41-
# empty string means "the current working directory". Note that this
42-
# parameter allows accepts directory traversal ("../" etc.) and is not
43-
# intended for unsanitized end user input.
47+
# string is prepended to the name of each blob to form the full path using
48+
# pathlib. Relative paths and absolute paths are both accepted. An empty
49+
# string means "the current working directory". Note that this parameter
50+
# will NOT allow files to escape the destination_directory and will skip
51+
# downloads that attempt directory traversal outside of it.
4452
# destination_directory = ""
4553

4654
# The maximum number of processes to use for the operation. The performance
@@ -56,15 +64,63 @@ def download_many_blobs_with_transfer_manager(
5664
bucket = storage_client.bucket(bucket_name)
5765

5866
results = transfer_manager.download_many_to_path(
59-
bucket, blob_names, destination_directory=destination_directory, max_workers=workers
67+
bucket,
68+
blob_names,
69+
destination_directory=destination_directory,
70+
blob_name_prefix=blob_name_prefix,
71+
max_workers=workers,
6072
)
6173

6274
for name, result in zip(blob_names, results):
63-
# The results list is either `None` or an exception for each blob in
75+
# The results list is either `None`, an exception, or a warning for each blob in
6476
# the input list, in order.
65-
66-
if isinstance(result, Exception):
77+
if isinstance(result, UserWarning):
78+
print("Skipped download for {} due to warning: {}".format(name, result))
79+
elif isinstance(result, Exception):
6780
print("Failed to download {} due to exception: {}".format(name, result))
6881
else:
69-
print("Downloaded {} to {}.".format(name, destination_directory + name))
82+
print(
83+
"Downloaded {} inside {} directory.".format(name, destination_directory)
84+
)
85+
86+
7087
# [END storage_transfer_manager_download_many]
88+
89+
if __name__ == "__main__":
90+
import argparse
91+
92+
parser = argparse.ArgumentParser(
93+
description="Download blobs in a list by name, concurrently in a process pool."
94+
)
95+
parser.add_argument(
96+
"--bucket_name", required=True, help="The name of your GCS bucket"
97+
)
98+
parser.add_argument(
99+
"--blobs",
100+
nargs="+",
101+
required=True,
102+
help="The list of blob names to download",
103+
)
104+
parser.add_argument(
105+
"--destination_directory",
106+
default="",
107+
help="The directory on your computer to which to download all of the files",
108+
)
109+
parser.add_argument(
110+
"--blob_name_prefix",
111+
default="",
112+
help="A string that will be prepended to each blob_name to determine the source blob name",
113+
)
114+
parser.add_argument(
115+
"--workers", type=int, default=8, help="The maximum number of processes to use"
116+
)
117+
118+
args = parser.parse_args()
119+
120+
download_many_blobs_with_transfer_manager(
121+
bucket_name=args.bucket_name,
122+
blob_names=args.blobs,
123+
destination_directory=args.destination_directory,
124+
blob_name_prefix=args.blob_name_prefix,
125+
workers=args.workers,
126+
)

0 commit comments

Comments
 (0)