Skip to content

Commit c5059a7

Browse files
chore(closes OPEN-8832): optionally upload data from external URLs to our storage
1 parent 1628110 commit c5059a7

File tree

3 files changed

+68
-9
lines changed

3 files changed

+68
-9
lines changed

src/openlayer/lib/tracing/attachment_uploader.py

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -59,15 +59,22 @@ class AttachmentUploader:
5959
S3, GCS, Azure, and local storage backends.
6060
"""
6161

62-
def __init__(self, client: "Openlayer", storage: StorageType = STORAGE):
62+
def __init__(
63+
self,
64+
client: "Openlayer",
65+
storage: StorageType = STORAGE,
66+
url_upload_enabled: bool = False,
67+
):
6368
"""Initialize the attachment uploader.
6469
6570
Args:
6671
client: The Openlayer client instance.
6772
storage: Storage type override. Defaults to the global STORAGE setting.
73+
url_upload_enabled: Whether to download and re-upload URL attachments.
6874
"""
6975
self._client = client
7076
self._storage = storage
77+
self._url_upload_enabled = url_upload_enabled
7178
self._storage_uri_cache: Dict[str, str] = {} # checksum -> storage_uri
7279

7380
def upload_attachment(self, attachment: "Attachment") -> "Attachment":
@@ -88,12 +95,25 @@ def upload_attachment(self, attachment: "Attachment") -> "Attachment":
8895
logger.debug("Attachment %s already uploaded", attachment.name)
8996
return attachment
9097

91-
# Skip if it has an external URL (no upload needed)
98+
# Handle external URL attachments
9299
if attachment.url:
93-
logger.debug(
94-
"Attachment %s has external URL, skipping upload", attachment.name
95-
)
96-
return attachment
100+
if self._url_upload_enabled:
101+
logger.debug(
102+
"Downloading attachment %s from external URL for upload",
103+
attachment.name,
104+
)
105+
if not attachment.download_url():
106+
logger.warning(
107+
"Failed to download attachment %s from URL, skipping upload",
108+
attachment.name,
109+
)
110+
return attachment
111+
else:
112+
logger.debug(
113+
"Attachment %s has external URL, skipping upload",
114+
attachment.name,
115+
)
116+
return attachment
97117

98118
# Check if we have data to upload
99119
if not attachment.has_data():
@@ -241,7 +261,11 @@ def process_step(step: "Step") -> int:
241261
continue
242262
seen_ids.add(attachment.id)
243263

244-
if not attachment.is_uploaded() and attachment.has_data():
264+
needs_upload = not attachment.is_uploaded() and (
265+
attachment.has_data()
266+
or (self._url_upload_enabled and attachment.url)
267+
)
268+
if needs_upload:
245269
self.upload_attachment(attachment)
246270
if attachment.is_uploaded():
247271
step_upload_count += 1
@@ -279,7 +303,10 @@ def get_uploader() -> Optional[AttachmentUploader]:
279303
if _uploader is None:
280304
client = tracer._get_client()
281305
if client:
282-
_uploader = AttachmentUploader(client)
306+
_uploader = AttachmentUploader(
307+
client,
308+
url_upload_enabled=tracer._configured_url_upload_enabled,
309+
)
283310

284311
return _uploader
285312

src/openlayer/lib/tracing/attachments.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,31 @@ def from_base64(
326326
checksum_md5=checksum_md5,
327327
)
328328

329+
def download_url(self) -> bool:
330+
"""Download data from self.url and populate _pending_bytes, size_bytes, checksum_md5.
331+
332+
Returns:
333+
True if download succeeded, False otherwise.
334+
"""
335+
if not self.url:
336+
return False
337+
338+
try:
339+
import httpx
340+
341+
with httpx.Client(follow_redirects=True) as http_client:
342+
response = http_client.get(self.url)
343+
response.raise_for_status()
344+
345+
data = response.content
346+
self._pending_bytes = data
347+
self.size_bytes = len(data)
348+
self.checksum_md5 = hashlib.md5(data).hexdigest()
349+
return True
350+
except Exception as e:
351+
logger.error("Failed to download attachment from URL %s: %s", self.url, e)
352+
return False
353+
329354
def get_bytes(self) -> Optional[bytes]:
330355
"""Get the binary data for this attachment.
331356

src/openlayer/lib/tracing/tracer.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767

6868
# Attachment upload configuration
6969
_configured_attachment_upload_enabled: bool = False
70+
_configured_url_upload_enabled: bool = False
7071

7172
# Background publishing configuration
7273
_configured_background_publish_enabled: bool = True
@@ -108,6 +109,7 @@ def configure(
108109
offline_buffer_path: Optional[str] = None,
109110
max_buffer_size: Optional[int] = None,
110111
attachment_upload_enabled: bool = False,
112+
url_upload_enabled: bool = False,
111113
background_publish_enabled: bool = True,
112114
) -> None:
113115
"""Configure the Openlayer tracer with custom settings.
@@ -132,6 +134,10 @@ def configure(
132134
attachment_upload_enabled: Enable uploading of attachments (images, audio, etc.) to
133135
Openlayer storage. When enabled, attachments on steps will be uploaded during
134136
trace completion. Defaults to False.
137+
url_upload_enabled: Enable downloading and re-uploading of external URL
138+
attachments to Openlayer storage. When enabled, attachments that reference
139+
external URLs will be fetched and uploaded so the platform has a durable copy.
140+
Requires attachment_upload_enabled to also be True. Defaults to False.
135141
background_publish_enabled: Enable background publishing of traces. When enabled,
136142
attachment uploads and trace publishing happen in a background thread, allowing
137143
the main thread to return immediately. When disabled, tracing is synchronous.
@@ -166,7 +172,7 @@ def configure(
166172
"""
167173
global _configured_api_key, _configured_pipeline_id, _configured_base_url, _configured_timeout, _configured_max_retries, _client
168174
global _configured_on_flush_failure, _configured_offline_buffer_enabled, _configured_offline_buffer_path, _configured_max_buffer_size, _offline_buffer
169-
global _configured_attachment_upload_enabled, _configured_background_publish_enabled
175+
global _configured_attachment_upload_enabled, _configured_url_upload_enabled, _configured_background_publish_enabled
170176

171177
_configured_api_key = api_key
172178
_configured_pipeline_id = inference_pipeline_id
@@ -178,6 +184,7 @@ def configure(
178184
_configured_offline_buffer_path = offline_buffer_path
179185
_configured_max_buffer_size = max_buffer_size
180186
_configured_attachment_upload_enabled = attachment_upload_enabled
187+
_configured_url_upload_enabled = url_upload_enabled
181188
_configured_background_publish_enabled = background_publish_enabled
182189

183190
# Reset the client and buffer so they get recreated with new configuration

0 commit comments

Comments
 (0)