Skip to content

Commit 4292a63

Browse files
committed
Add caching for tiktoken encodings
1 parent 3231b52 commit 4292a63

3 files changed

Lines changed: 27 additions & 12 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ megalinter-reports/
1010
# Benchmarks
1111
.asv/
1212

13+
# LLM Cache Files
14+
.tiktoken_cache
15+
1316
# Byte-compiled / optimized / DLL files
1417
__pycache__/
1518
*.py[cod]

tests/mlmodel_langchain/conftest.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,12 @@
1616
import os
1717

1818
import pytest
19-
import tiktoken
2019
from langchain_core.messages.ai import AIMessage
2120
from langchain_core.messages.tool import ToolMessage
2221
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
2322
from testing_support.fixture.event_loop import event_loop as loop
2423
from testing_support.fixture.vcr import * # noqa: F403
25-
from testing_support.fixture.vcr import VCR_IGNORED_HEADERS, VCR_REPLACE_HEADERS
24+
from testing_support.fixture.vcr import VCR_IGNORED_HEADERS, VCR_REPLACE_HEADERS, VCR_TIKTOKEN_ENCODINGS
2625
from testing_support.fixtures import collector_agent_registration_fixture, collector_available_fixture
2726
from testing_support.ml_testing_utils import set_trace_info
2827

@@ -77,10 +76,8 @@
7776
]
7877
)
7978

80-
TIKTOKEN_ENCODINGS = ["cl100k_base"]
79+
VCR_TIKTOKEN_ENCODINGS.extend(["cl100k_base"])
8180

82-
83-
# Intercept outgoing requests and log to file for mocking
8481
EXPECTED_AGENT_RESPONSE = "Hello!"
8582
EXPECTED_TOOL_OUTPUT = "Hello!"
8683

@@ -111,13 +108,6 @@ def embedding_openai_client(openai_clients):
111108
return embedding_client
112109

113110

114-
@pytest.fixture(scope="session")
115-
def load_tiktoken_encodings():
116-
"""Load tiktoken encodings before tests run to avoid issues with VCR blocking the network."""
117-
for encoding in TIKTOKEN_ENCODINGS:
118-
tiktoken.get_encoding(encoding)
119-
120-
121111
@pytest.fixture
122112
def chat_openai_client(openai_clients):
123113
chat_client, _ = openai_clients

tests/testing_support/fixture/vcr.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
raise ImportError("pytest-recording is required to use the vcr fixtures.") from exc
8080

8181
import json
82+
import os
8283
from pathlib import Path
8384

8485
import pytest
@@ -88,6 +89,7 @@
8889
VCR_IGNORED_HEADERS = ["content-length", "traceparent", "tracestate", "user-agent", "x-goog-api-client"]
8990
VCR_REPLACE_HEADERS = [] # Must be tuples of (header_name, replacement_value)
9091
VCR_MATCH_ON = ["method", "scheme", "host", "port", "path", "body", "headers", "query"]
92+
VCR_TIKTOKEN_ENCODINGS = []
9193

9294

9395
# === Settings fixtures, required and overridable ===
@@ -217,6 +219,7 @@ def vcr_config(
217219
vcr_match_on,
218220
vcr_before_record_request,
219221
vcr_before_record_response,
222+
vcr_cache_tiktoken_encodings,
220223
):
221224
"""
222225
Combines the overridable settings fixtures into VCR.py's final configuration.
@@ -297,3 +300,22 @@ def pytest_collection_modifyitems(items):
297300
"""
298301
for item in items:
299302
item.add_marker(pytest.mark.vcr)
303+
304+
305+
@pytest.fixture
306+
def vcr_cache_tiktoken_encodings(monkeypatch):
307+
"""Cache the tiktoken encodings before enabling VCR which blocks network access."""
308+
try:
309+
import tiktoken
310+
except ImportError:
311+
return # tiktoken is not installed, skip caching
312+
313+
# Set up temporary cache dir
314+
tox_env_dir = os.environ.get("TOX_ENV_DIR", None) or Path.cwd()
315+
cache_dir = Path(tox_env_dir) / ".tiktoken_cache"
316+
monkeypatch.setenv("TIKTOKEN_CACHE_DIR", str(cache_dir))
317+
cache_dir.mkdir(parents=True, exist_ok=True)
318+
319+
# Pre-fetch encodings used in tests
320+
for encoding in VCR_TIKTOKEN_ENCODINGS:
321+
tiktoken.get_encoding(encoding)

0 commit comments

Comments
 (0)