diff --git a/langfuse/langchain/CallbackHandler.py b/langfuse/langchain/CallbackHandler.py index 595a050f3..fe96516e2 100644 --- a/langfuse/langchain/CallbackHandler.py +++ b/langfuse/langchain/CallbackHandler.py @@ -880,6 +880,8 @@ def _parse_usage_model(usage: typing.Union[pydantic.BaseModel, dict]): "token_count" ] + usage_model = {k: v for k, v in usage_model.items() if not isinstance(v, str)} + return usage_model if usage_model else None diff --git a/tests/test_extract_model.py b/tests/test_extract_model.py index 3f5a5bb64..5db2961f6 100644 --- a/tests/test_extract_model.py +++ b/tests/test_extract_model.py @@ -94,6 +94,7 @@ def test_models(expected_model: str, model: Any): # all models here need to be tested here because we take the model from the kwargs / invocation_params or we need to make an actual call for setup +@pytest.mark.skip("Flaky") @pytest.mark.parametrize( "expected_model,model", [