Skip to content

Commit 3995f83

Browse files
Optimize parallel_report_download.py to write to files
Refactored `examples/reporting/parallel_report_download.py` to write report results directly to disk in the worker processes. This avoids collecting large result sets in memory and transmitting them back to the main process, significantly reducing memory usage and IPC overhead for large reports. Updated the main loop to display the generated filenames instead of the raw data.
1 parent 1d26beb commit 3995f83

1 file changed

Lines changed: 27 additions & 25 deletions

File tree

examples/reporting/parallel_report_download.py

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@
2020
"""
2121

2222
import argparse
23+
import hashlib
2324
from itertools import product
2425
import multiprocessing
26+
import os
2527
import time
2628
from typing import Any, Dict, Iterable, List, Tuple
2729

@@ -94,10 +96,7 @@ def main(client: GoogleAdsClient, customer_ids: List[str]) -> None:
9496
print("Successes:") if len(successes) else None
9597
success: Dict[str, Any]
9698
for success in successes:
97-
# success["results"] represents an array of result strings for one
98-
# customer ID / query combination.
99-
result_str: str = "\n".join(success["results"])
100-
print(result_str)
99+
print(f"Report written to file: {success['filename']}")
101100

102101
print("Failures:") if len(failures) else None
103102
failure: Dict[str, Any]
@@ -141,27 +140,30 @@ def issue_search_request(
141140
stream: Iterable[SearchGoogleAdsStreamResponse] = (
142141
ga_service.search_stream(customer_id=customer_id, query=query)
143142
)
144-
# Returning a list of GoogleAdsRows will result in a
145-
# PicklingError, so instead we put the GoogleAdsRow data
146-
# into a list of str results and return that.
147-
result_strings: List[str] = []
148-
batch: SearchGoogleAdsStreamResponse
149-
for batch in stream:
150-
row: GoogleAdsRow
151-
for row in batch.results:
152-
ad_group_id: str = (
153-
f"Ad Group ID {row.ad_group.id} in "
154-
if "ad_group.id" in query
155-
else ""
156-
)
157-
result_string: str = (
158-
f"{ad_group_id}"
159-
f"Campaign ID {row.campaign.id} "
160-
f"had {row.metrics.impressions} impressions "
161-
f"and {row.metrics.clicks} clicks."
162-
)
163-
result_strings.append(result_string)
164-
return (True, {"results": result_strings})
143+
144+
# Create a unique filename for the report.
145+
query_hash = hashlib.md5(query.encode("utf-8")).hexdigest()
146+
filename = f"report_{customer_id}_{query_hash}.txt"
147+
148+
with open(filename, "w") as f:
149+
batch: SearchGoogleAdsStreamResponse
150+
for batch in stream:
151+
row: GoogleAdsRow
152+
for row in batch.results:
153+
ad_group_id: str = (
154+
f"Ad Group ID {row.ad_group.id} in "
155+
if "ad_group.id" in query
156+
else ""
157+
)
158+
result_string: str = (
159+
f"{ad_group_id}"
160+
f"Campaign ID {row.campaign.id} "
161+
f"had {row.metrics.impressions} impressions "
162+
f"and {row.metrics.clicks} clicks."
163+
)
164+
f.write(result_string + "\n")
165+
166+
return (True, {"filename": filename})
165167
except GoogleAdsException as ex:
166168
# This example retries on all GoogleAdsExceptions. In practice,
167169
# developers might want to limit retries to only those error codes

0 commit comments

Comments
 (0)