From cb0ba3b0fb3cdd5f54aec463e429fc4b29855048 Mon Sep 17 00:00:00 2001 From: wetbanana Date: Wed, 29 Apr 2026 16:26:23 +0100 Subject: [PATCH 1/2] fix: avoid leading separator in user_agent when none is supplied MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _get_hf_api() built user_agent_str = "" then did user_agent_str += "; kernels/...", producing a leading "; ". When the resulting string was forwarded to HfApi(...), huggingface_hub joined its own fields with another "; ", producing an empty token in the final User-Agent. After dedup, this left a trailing "; " in the header, which strict HTTP clients (httpx >= 0.25) reject with LocalProtocolError: Illegal header value. This breaks any code path that triggers _get_hf_api() without supplying a user_agent — most notably _get_available_versions(), which transformers hits when resolving finegrained-fp8 / deep-gemm kernel versions for FP8 models. Build the system info as a separate sys_info string with no leading separator, then join it onto any caller-supplied user_agent with a single "; " only when the caller-supplied part is non-empty. Adds a regression test in test_user_agent.py covering None, "", and {} inputs. --- kernels/src/kernels/utils.py | 12 +++++++----- kernels/tests/test_user_agent.py | 13 +++++++++++++ 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/kernels/src/kernels/utils.py b/kernels/src/kernels/utils.py index 829c740f..cf26ccff 100644 --- a/kernels/src/kernels/utils.py +++ b/kernels/src/kernels/utils.py @@ -636,22 +636,24 @@ def _get_hf_api(user_agent: str | dict | None = None) -> HfApi: # System info python = ".".join(platform.python_version_tuple()[:2]) backend = _select_backend(None).variant_str - user_agent_str += ( - f"; kernels/{__version__}; python/{python}; backend/{backend}; platform/{_platform()}; file_type/kernel" + sys_info = ( + f"kernels/{__version__}; python/{python}; backend/{backend}; platform/{_platform()}; file_type/kernel" ) if has_torch: import torch - user_agent_str += f"; torch/{torch.__version__}" + sys_info += f"; torch/{torch.__version__}" if has_tvm_ffi: import tvm_ffi - user_agent_str += f"; tvm-ffi/{tvm_ffi.__version__}" + sys_info += f"; tvm-ffi/{tvm_ffi.__version__}" # Add glibc version if available glibc = glibc_version() if glibc is not None: - user_agent_str += f"; glibc/{glibc}" + sys_info += f"; glibc/{glibc}" + + user_agent_str = f"{user_agent_str}; {sys_info}" if user_agent_str else sys_info return HfApi(library_name="kernels", library_version=__version__, user_agent=user_agent_str) diff --git a/kernels/tests/test_user_agent.py b/kernels/tests/test_user_agent.py index 11faafeb..c87e6287 100644 --- a/kernels/tests/test_user_agent.py +++ b/kernels/tests/test_user_agent.py @@ -69,3 +69,16 @@ def test_platform_format(): parts = plat.split("-") assert len(parts) == 2 assert parts[1] in ("linux", "darwin", "windows") + + +def test_user_agent_no_leading_or_empty_segment(): + # Regression: when no caller-supplied user_agent is passed, the resulting + # string must not start with a separator and must not contain empty + # segments. Empty segments downstream produce malformed User-Agent headers + # (e.g. trailing "; ") which strict HTTP clients reject. + for ua_input in (None, "", {}): + api = _get_hf_api(user_agent=ua_input) + ua = api.user_agent + assert not ua.startswith(";"), f"user_agent must not start with ';': {ua!r}" + for segment in ua.split(";"): + assert segment.strip() != "", f"empty segment found in user_agent: {ua!r}" From 3208d9785aceb6db9f7d70359b5e0e7b04355f1b Mon Sep 17 00:00:00 2001 From: wetbanana Date: Thu, 30 Apr 2026 01:12:28 +0100 Subject: [PATCH 2/2] refactor: build user_agent via list + join instead of conditional concat Address review feedback: replace the user_agent_str string-juggling with a single list of parts that gets joined at the end. Behavior is identical to the previous commit (existing test_user_agent_no_leading_or_empty_segment still passes), the code is just less branchy. --- kernels/src/kernels/utils.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/kernels/src/kernels/utils.py b/kernels/src/kernels/utils.py index cf26ccff..eec11e52 100644 --- a/kernels/src/kernels/utils.py +++ b/kernels/src/kernels/utils.py @@ -627,33 +627,41 @@ def _get_hf_api(user_agent: str | dict | None = None) -> HfApi: user_agent_str = "" if not constants.HF_HUB_DISABLE_TELEMETRY: + parts: list[str] = [] + # User-defined info if isinstance(user_agent, dict): - user_agent_str = "; ".join(f"{k}/{v}" for k, v in user_agent.items()) - if isinstance(user_agent, str): - user_agent_str = user_agent + parts.extend(f"{k}/{v}" for k, v in user_agent.items()) + elif isinstance(user_agent, str) and user_agent: + parts.append(user_agent) # System info python = ".".join(platform.python_version_tuple()[:2]) backend = _select_backend(None).variant_str - sys_info = ( - f"kernels/{__version__}; python/{python}; backend/{backend}; platform/{_platform()}; file_type/kernel" + parts.extend( + [ + f"kernels/{__version__}", + f"python/{python}", + f"backend/{backend}", + f"platform/{_platform()}", + "file_type/kernel", + ] ) if has_torch: import torch - sys_info += f"; torch/{torch.__version__}" + parts.append(f"torch/{torch.__version__}") if has_tvm_ffi: import tvm_ffi - sys_info += f"; tvm-ffi/{tvm_ffi.__version__}" + parts.append(f"tvm-ffi/{tvm_ffi.__version__}") # Add glibc version if available glibc = glibc_version() if glibc is not None: - sys_info += f"; glibc/{glibc}" + parts.append(f"glibc/{glibc}") - user_agent_str = f"{user_agent_str}; {sys_info}" if user_agent_str else sys_info + user_agent_str = "; ".join(parts) return HfApi(library_name="kernels", library_version=__version__, user_agent=user_agent_str)