Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions llm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import httpx
import itertools
import json
import mimetypes
import pathlib
import puremagic
import re
Expand Down Expand Up @@ -37,17 +38,23 @@ def id(self):
def mimetype_from_string(content) -> Optional[str]:
try:
type_ = puremagic.from_string(content, mime=True)
return MIME_TYPE_FIXES.get(type_, type_)
if type_:
return MIME_TYPE_FIXES.get(type_, type_)
except puremagic.PureError:
return None
pass
return None


def mimetype_from_path(path) -> Optional[str]:
try:
type_ = puremagic.from_file(path, mime=True)
return MIME_TYPE_FIXES.get(type_, type_)
if type_:
return MIME_TYPE_FIXES.get(type_, type_)
except puremagic.PureError:
return None
pass
# Fall back to stdlib mimetypes when puremagic returns empty or raises
guessed, _ = mimetypes.guess_type(str(path))
return guessed


def dicts_to_table_string(
Expand Down
24 changes: 24 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
extract_fenced_code_block,
instantiate_from_spec,
maybe_fenced_code,
mimetype_from_path,
schema_dsl,
simplify_usage_dict,
truncate_string,
Expand Down Expand Up @@ -516,3 +517,26 @@ class Tool6(Toolbox):
pass

assert Tool6()._config == {}


def test_mimetype_from_path_falls_back_to_stdlib(tmp_path):
"""When puremagic returns empty string, fall back to mimetypes.guess_type().

Regression test for https://github.com/simonw/llm/issues/1340
"""
# Create a file with a known extension but content that puremagic may
# not recognise (e.g. an empty .mp4 file).
mp4_file = tmp_path / "video.mp4"
mp4_file.write_bytes(b"\x00" * 16)

result = mimetype_from_path(str(mp4_file))
assert result == "video/mp4"


def test_mimetype_from_path_returns_none_for_unknown(tmp_path):
"""Unknown extension and unrecognisable content returns None."""
unknown = tmp_path / "data.xyzzy123"
unknown.write_bytes(b"\x00" * 16)

result = mimetype_from_path(str(unknown))
assert result is None