|
8 | 8 | import pytest |
9 | 9 | from braintrust import logger |
10 | 10 | from braintrust.integrations.langchain import BraintrustCallbackHandler |
11 | | -from braintrust.integrations.langchain.callbacks import _get_metrics_from_response |
12 | 11 | from braintrust.logger import flush |
13 | 12 | from braintrust.test_helpers import init_test_logger |
14 | 13 | from langchain_core.callbacks import BaseCallbackHandler |
15 | 14 | from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage |
16 | | -from langchain_core.outputs import ChatGeneration, LLMResult |
17 | 15 | from langchain_core.prompts import ChatPromptTemplate |
18 | 16 | from langchain_core.prompts.prompt import PromptTemplate |
19 | 17 | from langchain_core.runnables import RunnableMap, RunnableSerializable |
@@ -908,94 +906,6 @@ def test_streaming_ttft(logger_memory_logger): |
908 | 906 | ) |
909 | 907 |
|
910 | 908 |
|
911 | | -def _single_generation_response(usage_metadata: dict, model_name: str) -> LLMResult: |
912 | | - return LLMResult( |
913 | | - generations=[ |
914 | | - [ |
915 | | - ChatGeneration( |
916 | | - message=AIMessage( |
917 | | - content="Done", |
918 | | - response_metadata={"model_name": model_name}, |
919 | | - usage_metadata=cast(dict, usage_metadata), |
920 | | - ) |
921 | | - ) |
922 | | - ] |
923 | | - ] |
924 | | - ) |
925 | | - |
926 | | - |
927 | | -def test_folded_cache_tokens_are_not_double_counted(): |
928 | | - # langchain-anthropic >= 0.2.3 folds cache read/creation tokens into |
929 | | - # input_tokens, exposing them via input_token_details as a breakdown. |
930 | | - response = _single_generation_response( |
931 | | - { |
932 | | - "input_tokens": 1095, |
933 | | - "output_tokens": 40, |
934 | | - "total_tokens": 1135, |
935 | | - "input_token_details": { |
936 | | - "cache_read": 0, |
937 | | - "cache_creation": 0, |
938 | | - "ephemeral_5m_input_tokens": 1075, |
939 | | - "ephemeral_1h_input_tokens": 0, |
940 | | - }, |
941 | | - }, |
942 | | - model_name="claude-sonnet-4-5-20250929", |
943 | | - ) |
944 | | - |
945 | | - assert _get_metrics_from_response(response) == { |
946 | | - "prompt_tokens": 1095, |
947 | | - "completion_tokens": 40, |
948 | | - "total_tokens": 1135, |
949 | | - "tokens": 1135, |
950 | | - "prompt_cached_tokens": 0, |
951 | | - "prompt_cache_creation_5m_tokens": 1075, |
952 | | - "prompt_cache_creation_1h_tokens": 0, |
953 | | - } |
954 | | - |
955 | | - |
956 | | -def test_openai_cached_tokens_are_not_folded_into_prompt_tokens(): |
957 | | - response = _single_generation_response( |
958 | | - { |
959 | | - "input_tokens": 1000, |
960 | | - "output_tokens": 200, |
961 | | - "total_tokens": 1200, |
962 | | - "input_token_details": {"cache_read": 500}, |
963 | | - }, |
964 | | - model_name="gpt-4o-mini-2024-07-18", |
965 | | - ) |
966 | | - |
967 | | - assert _get_metrics_from_response(response) == { |
968 | | - "prompt_tokens": 1000, |
969 | | - "completion_tokens": 200, |
970 | | - "total_tokens": 1200, |
971 | | - "tokens": 1200, |
972 | | - "prompt_cached_tokens": 500, |
973 | | - } |
974 | | - |
975 | | - |
976 | | -def test_separately_reported_cache_tokens_are_folded_into_prompt_tokens(): |
977 | | - # Integrations that report uncached input only make cache tokens exceed |
978 | | - # the prompt total; normalize so prompt/total include cache tokens. |
979 | | - response = _single_generation_response( |
980 | | - { |
981 | | - "input_tokens": 20, |
982 | | - "output_tokens": 40, |
983 | | - "total_tokens": 60, |
984 | | - "input_token_details": {"cache_read": 1000, "cache_creation": 500}, |
985 | | - }, |
986 | | - model_name="claude-3-5-sonnet-20240620", |
987 | | - ) |
988 | | - |
989 | | - assert _get_metrics_from_response(response) == { |
990 | | - "prompt_tokens": 1520, |
991 | | - "completion_tokens": 40, |
992 | | - "total_tokens": 1560, |
993 | | - "tokens": 1560, |
994 | | - "prompt_cached_tokens": 1000, |
995 | | - "prompt_cache_creation_tokens": 500, |
996 | | - } |
997 | | - |
998 | | - |
999 | 909 | @pytest.mark.vcr |
1000 | 910 | def test_prompt_caching_tokens(logger_memory_logger): |
1001 | 911 | from langchain_anthropic import ChatAnthropic |
|
0 commit comments