Skip to content

Commit 0536025

Browse files
re-run tox
2 parents 6f3c247 + ecd3718 commit 0536025

File tree

29 files changed

+1202
-364
lines changed

29 files changed

+1202
-364
lines changed

.github/workflows/validate-pr.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
permissions:
1111
pull-requests: write
1212
steps:
13-
- uses: getsentry/github-workflows/validate-pr@0b52fc6a867b744dcbdf5d25c18bc8d1c95710e1
13+
- uses: getsentry/github-workflows/validate-pr@71588ddf95134f804e82c5970a8098588e2eaecd
1414
with:
1515
app-id: ${{ vars.SDK_MAINTAINER_BOT_APP_ID }}
1616
private-key: ${{ secrets.SDK_MAINTAINER_BOT_PRIVATE_KEY }}

scripts/populate_tox/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
"deps": {
1919
"*": ["pytest-asyncio"],
2020
"<0.50": ["httpx<0.28.0"],
21+
# tokenizers dropped Python 3.8 support, but didn't update package metadata.
22+
# https://github.com/huggingface/tokenizers/commit/f4c9fd7f402fc794df8f1b547a95ee5305f9fe62
23+
"py3.8": ["tokenizers<0.20.4"],
2124
},
2225
"python": ">=3.8",
2326
},

scripts/populate_tox/package_dependencies.jsonl

Lines changed: 14 additions & 12 deletions
Large diffs are not rendered by default.

scripts/populate_tox/releases.jsonl

Lines changed: 25 additions & 25 deletions
Large diffs are not rendered by default.

sentry_sdk/_werkzeug.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from typing import Dict
3939
from typing import Iterator
4040
from typing import Tuple
41+
from typing import Optional
4142

4243

4344
#
@@ -62,35 +63,41 @@ def _get_headers(environ: "Dict[str, str]") -> "Iterator[Tuple[str, str]]":
6263
yield key.replace("_", "-").title(), value
6364

6465

65-
#
66+
def _strip_default_port(host: str, scheme: "Optional[str]") -> str:
67+
"""Strip the port from the host if it's the default for the scheme."""
68+
if scheme == "http" and host.endswith(":80"):
69+
return host[:-3]
70+
if scheme == "https" and host.endswith(":443"):
71+
return host[:-4]
72+
return host
73+
74+
6675
# `get_host` comes from `werkzeug.wsgi.get_host`
6776
# https://github.com/pallets/werkzeug/blob/1.0.1/src/werkzeug/wsgi.py#L145
68-
#
77+
78+
6979
def get_host(environ: "Dict[str, str]", use_x_forwarded_for: bool = False) -> str:
7080
"""
7181
Return the host for the given WSGI environment.
7282
"""
83+
scheme = environ.get("wsgi.url_scheme")
84+
if use_x_forwarded_for:
85+
scheme = environ.get("HTTP_X_FORWARDED_PROTO", scheme)
86+
7387
if use_x_forwarded_for and "HTTP_X_FORWARDED_HOST" in environ:
74-
rv = environ["HTTP_X_FORWARDED_HOST"]
75-
if environ["wsgi.url_scheme"] == "http" and rv.endswith(":80"):
76-
rv = rv[:-3]
77-
elif environ["wsgi.url_scheme"] == "https" and rv.endswith(":443"):
78-
rv = rv[:-4]
88+
return _strip_default_port(environ["HTTP_X_FORWARDED_HOST"], scheme)
7989
elif environ.get("HTTP_HOST"):
80-
rv = environ["HTTP_HOST"]
81-
if environ["wsgi.url_scheme"] == "http" and rv.endswith(":80"):
82-
rv = rv[:-3]
83-
elif environ["wsgi.url_scheme"] == "https" and rv.endswith(":443"):
84-
rv = rv[:-4]
90+
return _strip_default_port(environ["HTTP_HOST"], scheme)
8591
elif environ.get("SERVER_NAME"):
92+
# SERVER_NAME/SERVER_PORT describe the internal server, so use
93+
# wsgi.url_scheme (not the forwarded scheme) for port decisions.
8694
rv = environ["SERVER_NAME"]
8795
if (environ["wsgi.url_scheme"], environ["SERVER_PORT"]) not in (
8896
("https", "443"),
8997
("http", "80"),
9098
):
9199
rv += ":" + environ["SERVER_PORT"]
100+
return rv
92101
else:
93102
# In spite of the WSGI spec, SERVER_NAME might not be present.
94-
rv = "unknown"
95-
96-
return rv
103+
return "unknown"

sentry_sdk/ai/consts.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import re
2+
3+
# Matches data URLs with base64-encoded content, e.g. "data:image/png;base64,iVBORw0K..."
4+
DATA_URL_BASE64_REGEX = re.compile(
5+
r"^data:(?:[a-zA-Z0-9][a-zA-Z0-9.+\-]*/[a-zA-Z0-9][a-zA-Z0-9.+\-]*)(?:;[a-zA-Z0-9\-]+=[^;,]*)*;base64,(?:[A-Za-z0-9+/\-_]+={0,2})$"
6+
)

sentry_sdk/ai/utils.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from typing import TYPE_CHECKING
55

66
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
7+
from sentry_sdk.ai.consts import DATA_URL_BASE64_REGEX
78

89
if TYPE_CHECKING:
910
from typing import Any, Callable, Dict, List, Optional, Tuple
@@ -588,6 +589,20 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
588589
return 0
589590

590591

592+
def _is_image_type_with_blob_content(item: "Dict[str, Any]") -> bool:
593+
"""
594+
Some content blocks contain an image_url property with base64 content as its value.
595+
This is used to identify those while not leading to unnecessary copying of data when the image URL does not contain base64 content.
596+
"""
597+
if item.get("type") != "image_url":
598+
return False
599+
600+
image_url = item.get("image_url", {}).get("url", "")
601+
data_url_match = DATA_URL_BASE64_REGEX.match(image_url)
602+
603+
return bool(data_url_match)
604+
605+
591606
def redact_blob_message_parts(
592607
messages: "List[Dict[str, Any]]",
593608
) -> "List[Dict[str, Any]]":
@@ -640,7 +655,9 @@ def redact_blob_message_parts(
640655
content = message.get("content")
641656
if isinstance(content, list):
642657
for item in content:
643-
if isinstance(item, dict) and item.get("type") == "blob":
658+
if isinstance(item, dict) and (
659+
item.get("type") == "blob" or _is_image_type_with_blob_content(item)
660+
):
644661
has_blobs = True
645662
break
646663
if has_blobs:
@@ -661,8 +678,11 @@ def redact_blob_message_parts(
661678
content = message.get("content")
662679
if isinstance(content, list):
663680
for item in content:
664-
if isinstance(item, dict) and item.get("type") == "blob":
665-
item["content"] = BLOB_DATA_SUBSTITUTE
681+
if isinstance(item, dict):
682+
if item.get("type") == "blob":
683+
item["content"] = BLOB_DATA_SUBSTITUTE
684+
elif _is_image_type_with_blob_content(item):
685+
item["image_url"]["url"] = BLOB_DATA_SUBSTITUTE
666686

667687
return messages_copy
668688

sentry_sdk/consts.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -636,12 +636,6 @@ class SPANDATA:
636636
Example: "rainy, 57°F"
637637
"""
638638

639-
GEN_AI_TOOL_TYPE = "gen_ai.tool.type"
640-
"""
641-
The type of tool being used.
642-
Example: "function"
643-
"""
644-
645639
GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
646640
"""
647641
The number of tokens in the input.

0 commit comments

Comments
 (0)