Skip to content

Commit 51ea6ef

Browse files
authored
fix(litellm): skip encoding_format="float" for voyage/bedrock (#152)
Voyage models (e.g. voyage/voyage-code-3) reject encoding_format="float" and require base64. Only inject encoding_format/drop_params for providers that accept the float hint, leaving voyage/ and bedrock/ to use their native defaults. Fixes #148
1 parent 9700655 commit 51ea6ef

2 files changed

Lines changed: 29 additions & 5 deletions

File tree

src/cocoindex_code/litellm_embedder.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
from cocoindex.ops.litellm import LiteLLMEmbedder, litellm
1414
from numpy.typing import NDArray
1515

16-
litellm.drop_params = True
17-
1816
logger = logging.getLogger(__name__)
1917

2018
_RATE_LIMIT_DELAY_RE = re.compile(r"Please try again in ([0-9.]+)(ms|s)", re.IGNORECASE)
@@ -84,10 +82,12 @@ async def run_embedding_request(self, *, input: list[str], **kwargs: Any) -> Any
8482
if self._next_request_at > now:
8583
await asyncio.sleep(self._next_request_at - now)
8684

85+
if not self._model.startswith(("voyage/", "bedrock/")):
86+
kwargs["encoding_format"] = "float"
87+
kwargs["drop_params"] = True
8788
response = await self._aembedding_with_rate_limit_retries(
8889
model=self._model,
8990
input=input,
90-
encoding_format="float",
9191
**kwargs,
9292
)
9393

tests/test_litellm_embedder.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ async def fake_aembedding(*, model: str, input: list[str], **kwargs: Any) -> Any
2525
attempts += 1
2626
assert model == "text-embedding-3-small"
2727
assert input == ["hello"]
28-
assert kwargs == {"encoding_format": "float"}
28+
assert kwargs == {"encoding_format": "float", "drop_params": True}
2929
if attempts == 1:
3030
raise Exception("Rate limit exceeded. Please try again in 250ms")
3131
return SimpleNamespace(data=[{"embedding": [1.0, 2.0]}])
@@ -54,7 +54,7 @@ async def fake_sleep(delay: float) -> None:
5454

5555
async def fake_aembedding(*, model: str, input: list[str], **kwargs: Any) -> Any:
5656
assert model == "text-embedding-3-small"
57-
assert kwargs == {"encoding_format": "float"}
57+
assert kwargs == {"encoding_format": "float", "drop_params": True}
5858
inputs_seen.append(input)
5959
return SimpleNamespace(data=[{"embedding": [1.0, 2.0]}])
6060

@@ -70,3 +70,27 @@ async def fake_aembedding(*, model: str, input: list[str], **kwargs: Any) -> Any
7070
assert inputs_seen == [["second"]]
7171
assert len(sleep_calls) == 1
7272
assert sleep_calls[0] == pytest.approx(0.3)
73+
74+
75+
@pytest.mark.parametrize(
76+
"model_name", ["voyage/voyage-code-3", "bedrock/amazon.titan-embed-text-v2:0"]
77+
)
78+
@pytest.mark.asyncio
79+
async def test_run_embedding_request_omits_encoding_format_for_native_providers(
80+
monkeypatch: pytest.MonkeyPatch, model_name: str
81+
) -> None:
82+
seen_kwargs: dict[str, Any] = {}
83+
84+
async def fake_aembedding(*, model: str, input: list[str], **kwargs: Any) -> Any:
85+
assert model == model_name
86+
assert input == ["hello"]
87+
seen_kwargs.update(kwargs)
88+
return SimpleNamespace(data=[{"embedding": [1.0, 2.0]}])
89+
90+
monkeypatch.setattr("cocoindex_code.litellm_embedder.litellm.aembedding", fake_aembedding)
91+
92+
embedder = PacedLiteLLMEmbedder(model_name)
93+
await embedder.run_embedding_request(input=["hello"])
94+
95+
assert "encoding_format" not in seen_kwargs
96+
assert "drop_params" not in seen_kwargs

0 commit comments

Comments
 (0)