Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

### New Features and Improvements

* Honor the Vercel `AI_AGENT=<name>` env var as a secondary fallback for AI agent detection in the User-Agent header (after the agents.md `AGENT=<name>` standard). Unrecognized fallback values now pass through the User-Agent sanitized and length-capped at 64 chars instead of being coerced to `agent/unknown`, so versioned variants such as `claude-code_2-1-141_agent` surface as-is.

### Security

### Bug Fixes
Expand Down
45 changes: 31 additions & 14 deletions databricks/sdk/useragent.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@
# Precompiled regex patterns
alphanum_pattern = re.compile(r"^[a-zA-Z0-9_.+-]+$")

# Matches any single character not allowed in a User-Agent token. Used to
# sanitize free-form values (e.g. the AGENT/AI_AGENT fallback) by replacing
# disallowed characters with a hyphen.
alphanum_inverse_pattern = re.compile(r"[^a-zA-Z0-9_.+-]")

# official https://semver.org/ recommendation: https://regex101.com/r/Ly7O1x/
# with addition of "x" wildcards for minor/patch versions. Also, patch version may be omitted.
semver_pattern = re.compile(
Expand Down Expand Up @@ -133,6 +138,11 @@ def _sanitize_header_value(value: str) -> str:
return value


def _sanitize_agent_value(value: str) -> str:
"""Replace any character not allowed in a User-Agent token with a hyphen."""
return alphanum_inverse_pattern.sub("-", value)


def to_string(
alternate_product_info: Optional[Tuple[str, str]] = None,
other_info: Optional[List[Tuple[str, str]]] = None,
Expand Down Expand Up @@ -226,7 +236,8 @@ def cicd_provider() -> str:


# Canonical list of known AI coding agents. Alphabetical by product name.
# Keep this list in sync with databricks-sdk-go and databricks-sdk-java.
# Keep this list, and the AGENT / AI_AGENT fallback handling in
# _agent_env_fallback, in sync with databricks-sdk-go and databricks-sdk-java.
#
# Each record has a single env var that identifies the product by presence
# (the env var just needs to be set, even to an empty string).
Expand All @@ -236,6 +247,12 @@ class _AgentRecord:
product: str


# Caps fallback values to keep the User-Agent bounded. Explicit-matcher
# products are short by construction; only the fallback path can carry
# arbitrary lengths.
_MAX_AGENT_FALLBACK_LEN = 64


_KNOWN_AGENTS: List[_AgentRecord] = [
_AgentRecord("AMP_CURRENT_THREAD_ID", "amp"), # https://ampcode.com/ (also sets AGENT=amp, handled centrally)
_AgentRecord("ANTIGRAVITY_AGENT", "antigravity"), # Closed source (Google)
Expand Down Expand Up @@ -274,13 +291,12 @@ def agent_provider() -> str:
every enclosing layer).

Explicit agent env vars (e.g. CLAUDECODE, GOOSE_TERMINAL) always take
precedence. The agents.md-standard AGENT=<name> env var is only consulted
as a fallback when no explicit matcher fired:
- If AGENT matches a known product name, return that product.
- Otherwise return "unknown".
precedence. The agents.md-standard AGENT=<name> env var and the Vercel
AI_AGENT=<name> convention are only consulted as a fallback when no
explicit matcher fired (see _agent_env_fallback).

This means AGENT=<name> never contributes to the multi-agent signal: if
any explicit matcher fires, AGENT is ignored entirely, even when it names
This means AGENT/AI_AGENT never contribute to the multi-agent signal: if
any explicit matcher fires, they are ignored entirely, even when they name
a different known product.

Result is cached after first call.
Expand All @@ -301,14 +317,15 @@ def agent_provider() -> str:


def _agent_env_fallback() -> str:
"""Honor the agents.md AGENT=<name> standard.
"""Return a sanitized, length-capped name from AGENT or AI_AGENT.

Returns the value if it matches a known product name, "unknown" if AGENT
is set to any other non-empty value, and "" if AGENT is unset or empty.
AGENT (the agents.md standard) is preferred; AI_AGENT (the Vercel
@vercel/detect-agent convention) is consulted only when AGENT is unset or
empty. The value is passed through rather than categorized so that new
names are propagated without updating the list of known agents. Returns ""
if both are unset or empty.
"""
v = os.environ.get("AGENT", "")
v = os.environ.get("AGENT") or os.environ.get("AI_AGENT")
if not v:
return ""
if v in {a.product for a in _KNOWN_AGENTS}:
return v
return "unknown"
return _sanitize_agent_value(v)[:_MAX_AGENT_FALLBACK_LEN]
119 changes: 112 additions & 7 deletions tests/test_user_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,25 +256,50 @@ def test_agent_provider_windsurf(clean_useragent_env):
assert useragent.agent_provider() == "windsurf"


def test_agent_provider_unknown_agent_fallback(clean_useragent_env):
# AGENT set to a value that doesn't match any known agent
# should fall back to "unknown".
def test_agent_provider_unknown_agent_passthrough(clean_useragent_env):
# AGENT set to a value that doesn't match any known agent now passes
# through (sanitized) rather than being coerced to "unknown".
os.environ["AGENT"] = "someweirdthing"
from databricks.sdk import useragent

assert useragent.agent_provider() == "unknown"
assert useragent.agent_provider() == "someweirdthing"


def test_agent_provider_agent_known_product_name_fallback(clean_useragent_env):
# AGENT=<known product name> with no other matchers set should resolve
# to the matching product (e.g. cursor is only identified by CURSOR_AGENT;
# AGENT=cursor is a reasonable implicit signal to attribute it).
# AGENT=<known product name> with no other matchers set passes through
# unchanged (cursor is only identified by CURSOR_AGENT; AGENT=cursor is a
# reasonable implicit signal to attribute it).
os.environ["AGENT"] = "cursor"
from databricks.sdk import useragent

assert useragent.agent_provider() == "cursor"


def test_agent_provider_agent_versioned_variant_passthrough(clean_useragent_env):
# A versioned variant passes through unchanged since every character is
# already in the allowlist.
os.environ["AGENT"] = "claude-code_2-1-141_agent"
from databricks.sdk import useragent

assert useragent.agent_provider() == "claude-code_2-1-141_agent"


def test_agent_provider_agent_disallowed_chars_sanitized(clean_useragent_env):
# Characters outside the User-Agent allowlist are replaced with hyphens.
os.environ["AGENT"] = "claude code/agent"
from databricks.sdk import useragent

assert useragent.agent_provider() == "claude-code-agent"


def test_agent_provider_agent_over_cap_truncated(clean_useragent_env):
# Values longer than the cap are truncated to 64 characters.
os.environ["AGENT"] = "a" * 100
from databricks.sdk import useragent

assert useragent.agent_provider() == "a" * 64


def test_agent_provider_known_matcher_wins_over_agent_fallback(clean_useragent_env):
# When a known matcher fires, it wins even if AGENT is set to an
# unrelated value. The AGENT fallback only applies when nothing else hits.
Expand All @@ -293,6 +318,86 @@ def test_agent_provider_agent_empty_string(clean_useragent_env):
assert useragent.agent_provider() == ""


def test_agent_provider_ai_agent_fallback(clean_useragent_env):
# AI_AGENT (Vercel @vercel/detect-agent convention) is consulted as a
# secondary fallback when AGENT is unset.
os.environ["AI_AGENT"] = "cursor"
from databricks.sdk import useragent

assert useragent.agent_provider() == "cursor"


def test_agent_provider_ai_agent_empty_string(clean_useragent_env):
# AI_AGENT="" (empty) should NOT trigger the fallback.
os.environ["AI_AGENT"] = ""
from databricks.sdk import useragent

assert useragent.agent_provider() == ""


def test_agent_provider_known_matcher_wins_over_ai_agent_fallback(clean_useragent_env):
# An explicit matcher wins over the AI_AGENT fallback.
os.environ["AI_AGENT"] = "somethingunknown"
os.environ["CLAUDECODE"] = "1"
from databricks.sdk import useragent

assert useragent.agent_provider() == "claude-code"


def test_agent_provider_agent_wins_over_ai_agent(clean_useragent_env):
# AGENT takes precedence over AI_AGENT when both are non-empty.
os.environ["AGENT"] = "claude-code"
os.environ["AI_AGENT"] = "cursor"
from databricks.sdk import useragent

assert useragent.agent_provider() == "claude-code"


def test_agent_provider_agent_unrecognized_wins_over_ai_agent(clean_useragent_env):
# A non-empty AGENT wins over AI_AGENT even when it is unrecognized.
os.environ["AGENT"] = "somethingunknown"
os.environ["AI_AGENT"] = "cursor"
from databricks.sdk import useragent

assert useragent.agent_provider() == "somethingunknown"


def test_agent_provider_agent_set_ai_agent_empty(clean_useragent_env):
# AGENT set, AI_AGENT empty: AGENT value is used.
os.environ["AGENT"] = "cursor"
os.environ["AI_AGENT"] = ""
from databricks.sdk import useragent

assert useragent.agent_provider() == "cursor"


def test_agent_provider_empty_agent_falls_through_to_ai_agent(clean_useragent_env):
# An empty AGENT falls through to AI_AGENT.
os.environ["AGENT"] = ""
os.environ["AI_AGENT"] = "cursor"
from databricks.sdk import useragent

assert useragent.agent_provider() == "cursor"


def test_agent_provider_both_agent_and_ai_agent_empty(clean_useragent_env):
# Both AGENT and AI_AGENT empty returns no agent.
os.environ["AGENT"] = ""
os.environ["AI_AGENT"] = ""
from databricks.sdk import useragent

assert useragent.agent_provider() == ""


def test_agent_provider_explicit_wins_over_ai_agent(clean_useragent_env):
# An explicit env var wins over AI_AGENT naming a different product.
os.environ["AI_AGENT"] = "cursor"
os.environ["CLAUDECODE"] = "1"
from databricks.sdk import useragent

assert useragent.agent_provider() == "claude-code"


def test_agent_provider_multiple_agents(clean_useragent_env):
# Nested agents (e.g. Claude Code spawning a Cursor CLI subagent) set
# multiple explicit matchers on the same process.
Expand Down
Loading