diff --git a/llm/utils.py b/llm/utils.py index 587f19284..02b17a7fa 100644 --- a/llm/utils.py +++ b/llm/utils.py @@ -3,6 +3,7 @@ import httpx import itertools import json +import mimetypes import pathlib import puremagic import re @@ -37,17 +38,23 @@ def id(self): def mimetype_from_string(content) -> Optional[str]: try: type_ = puremagic.from_string(content, mime=True) - return MIME_TYPE_FIXES.get(type_, type_) + if type_: + return MIME_TYPE_FIXES.get(type_, type_) except puremagic.PureError: - return None + pass + return None def mimetype_from_path(path) -> Optional[str]: try: type_ = puremagic.from_file(path, mime=True) - return MIME_TYPE_FIXES.get(type_, type_) + if type_: + return MIME_TYPE_FIXES.get(type_, type_) except puremagic.PureError: - return None + pass + # Fall back to stdlib mimetypes when puremagic returns empty or raises + guessed, _ = mimetypes.guess_type(str(path)) + return guessed def dicts_to_table_string( diff --git a/tests/test_utils.py b/tests/test_utils.py index 51fb8754f..bead8f848 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,6 +4,7 @@ extract_fenced_code_block, instantiate_from_spec, maybe_fenced_code, + mimetype_from_path, schema_dsl, simplify_usage_dict, truncate_string, @@ -516,3 +517,26 @@ class Tool6(Toolbox): pass assert Tool6()._config == {} + + +def test_mimetype_from_path_falls_back_to_stdlib(tmp_path): + """When puremagic returns empty string, fall back to mimetypes.guess_type(). + + Regression test for https://github.com/simonw/llm/issues/1340 + """ + # Create a file with a known extension but content that puremagic may + # not recognise (e.g. an empty .mp4 file). + mp4_file = tmp_path / "video.mp4" + mp4_file.write_bytes(b"\x00" * 16) + + result = mimetype_from_path(str(mp4_file)) + assert result == "video/mp4" + + +def test_mimetype_from_path_returns_none_for_unknown(tmp_path): + """Unknown extension and unrecognisable content returns None.""" + unknown = tmp_path / "data.xyzzy123" + unknown.write_bytes(b"\x00" * 16) + + result = mimetype_from_path(str(unknown)) + assert result is None