diff --git a/src/everos/entrypoints/api/app.py b/src/everos/entrypoints/api/app.py index 8e728fa80..0b638624d 100644 --- a/src/everos/entrypoints/api/app.py +++ b/src/everos/entrypoints/api/app.py @@ -29,6 +29,7 @@ from everos.core.observability.logging import get_logger from .lifespans import ( + BoundaryTokenizerLifespanProvider, CascadeLifespanProvider, LanceDBLifespanProvider, LLMLifespanProvider, @@ -67,7 +68,8 @@ def create_app( cors_allow_methods: Allowed CORS methods (default: ``["*"]``). cors_allow_headers: Allowed CORS headers (default: ``["*"]``). lifespan_providers: Optional list of LifespanProvider; defaults to - ``[MetricsLifespanProvider(), SqliteLifespanProvider(), + ``[MetricsLifespanProvider(), LLMLifespanProvider(), + BoundaryTokenizerLifespanProvider(), SqliteLifespanProvider(), LanceDBLifespanProvider(), CascadeLifespanProvider(), OmeLifespanProvider()]``. @@ -80,6 +82,7 @@ def create_app( lifespan_providers = [ MetricsLifespanProvider(), LLMLifespanProvider(), + BoundaryTokenizerLifespanProvider(), SqliteLifespanProvider(), LanceDBLifespanProvider(), CascadeLifespanProvider(), diff --git a/src/everos/entrypoints/api/lifespans/__init__.py b/src/everos/entrypoints/api/lifespans/__init__.py index 262106d35..4b872646a 100644 --- a/src/everos/entrypoints/api/lifespans/__init__.py +++ b/src/everos/entrypoints/api/lifespans/__init__.py @@ -12,6 +12,7 @@ External usage:: from everos.entrypoints.api.lifespans import ( + BoundaryTokenizerLifespanProvider, LLMLifespanProvider, SqliteLifespanProvider, LanceDBLifespanProvider, @@ -20,6 +21,9 @@ ) """ +from .boundary_tokenizer import ( + BoundaryTokenizerLifespanProvider as BoundaryTokenizerLifespanProvider, +) from .cascade import CascadeLifespanProvider as CascadeLifespanProvider from .lancedb import LanceDBLifespanProvider as LanceDBLifespanProvider from .llm import LLMLifespanProvider as LLMLifespanProvider @@ -27,6 +31,7 @@ from .sqlite import SqliteLifespanProvider as SqliteLifespanProvider __all__ = [ + "BoundaryTokenizerLifespanProvider", "CascadeLifespanProvider", "LLMLifespanProvider", "LanceDBLifespanProvider", diff --git a/src/everos/entrypoints/api/lifespans/boundary_tokenizer.py b/src/everos/entrypoints/api/lifespans/boundary_tokenizer.py new file mode 100644 index 000000000..d5fa363db --- /dev/null +++ b/src/everos/entrypoints/api/lifespans/boundary_tokenizer.py @@ -0,0 +1,52 @@ +"""Boundary-tokenizer lifespan provider. + +Prewarms the tiktoken encoding used by everalgo boundary detection so +the first ``/api/v1/memory/add`` request does not block on an on-demand +download of ``o200k_base``. +""" + +from __future__ import annotations + +from typing import Any + +import tiktoken +from fastapi import FastAPI + +from everos.core.lifespan import LifespanProvider +from everos.core.observability.logging import get_logger + +logger = get_logger(__name__) + +_BOUNDARY_ENCODING_NAME = "o200k_base" + + +def _warm_boundary_tokenizer() -> tiktoken.Encoding: + """Resolve the boundary detector's shared tiktoken encoding.""" + + try: + return tiktoken.get_encoding(_BOUNDARY_ENCODING_NAME) + except Exception as exc: # pragma: no cover - exercised via provider tests + raise RuntimeError( + "failed to prewarm the boundary tokenizer encoding " + f"{_BOUNDARY_ENCODING_NAME!r}; start the server once with network " + "access so tiktoken can cache it before serving /api/v1/memory/add" + ) from exc + + +class BoundaryTokenizerLifespanProvider(LifespanProvider): + """Prewarm the boundary tokenizer at startup; fail before serving traffic.""" + + def __init__(self, order: int = 9) -> None: + super().__init__(name="boundary_tokenizer", order=order) + + async def startup(self, app: FastAPI) -> Any: + encoding = _warm_boundary_tokenizer() + logger.info( + "boundary_tokenizer_lifespan_ready", + encoding=_BOUNDARY_ENCODING_NAME, + ) + return encoding + + async def shutdown(self, app: FastAPI) -> None: + # tiktoken keeps a process-local cache; nothing to tear down. + return None diff --git a/tests/unit/test_entrypoints/test_api/test_lifespans/test_boundary_tokenizer.py b/tests/unit/test_entrypoints/test_api/test_lifespans/test_boundary_tokenizer.py new file mode 100644 index 000000000..ebf82b4a0 --- /dev/null +++ b/tests/unit/test_entrypoints/test_api/test_lifespans/test_boundary_tokenizer.py @@ -0,0 +1,50 @@ +"""Boundary-tokenizer lifespan — prewarms tiktoken before the first /add.""" + +from __future__ import annotations + +from unittest.mock import patch + +import pytest +from fastapi import FastAPI + +from everos.entrypoints.api.lifespans import BoundaryTokenizerLifespanProvider + + +def test_provider_metadata() -> None: + provider = BoundaryTokenizerLifespanProvider(order=9) + assert provider.name == "boundary_tokenizer" + assert provider.order == 9 + + +async def test_startup_prewarms_o200k_base() -> None: + provider = BoundaryTokenizerLifespanProvider() + app = FastAPI() + sentinel = object() + + with patch( + "everos.entrypoints.api.lifespans.boundary_tokenizer.tiktoken.get_encoding", + return_value=sentinel, + ) as mock_get_encoding: + result = await provider.startup(app) + + assert result is sentinel + mock_get_encoding.assert_called_once_with("o200k_base") + + +async def test_startup_wraps_download_failures() -> None: + provider = BoundaryTokenizerLifespanProvider() + app = FastAPI() + + with ( + patch( + "everos.entrypoints.api.lifespans.boundary_tokenizer.tiktoken.get_encoding", + side_effect=OSError("download failed"), + ), + pytest.raises(RuntimeError, match="o200k_base"), + ): + await provider.startup(app) + + +async def test_shutdown_is_noop() -> None: + provider = BoundaryTokenizerLifespanProvider() + await provider.shutdown(FastAPI())