-
Notifications
You must be signed in to change notification settings - Fork 117
feat(serve): improve OpenAI API compatibility with usage, finish_reas… #771
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 1 commit
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
9bb9ac8
feat(serve): improve OpenAI API compatibility with usage, finish_reas…
markstur 97356b0
Remove made with Bob comment
markstur 9649315
fix(serve): set system_fingerprint to None per OpenAI spec
markstur 43b9656
fix: remove unused import
markstur cbe3eac
fix: calculate total_tokens from prompt + completion when missing
markstur 191a0c8
test: make completion ID format test less brittle
markstur File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,243 @@ | ||
| """Tests for the m serve OpenAI-compatible API server.""" | ||
|
|
||
| from unittest.mock import Mock, patch | ||
|
|
||
| import pytest | ||
|
|
||
| from cli.serve.app import make_chat_endpoint | ||
| from cli.serve.models import ( | ||
| ChatCompletion, | ||
| ChatCompletionRequest, | ||
| ChatMessage, | ||
| CompletionUsage, | ||
| ) | ||
| from mellea.core.base import ModelOutputThunk | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def mock_module(): | ||
| """Create a mock module with a serve function.""" | ||
| module = Mock() | ||
| module.__name__ = "test_module" | ||
| return module | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def sample_request(): | ||
| """Create a sample ChatCompletionRequest.""" | ||
| return ChatCompletionRequest( | ||
| model="test-model", | ||
| messages=[ChatMessage(role="user", content="Hello, world!")], | ||
| temperature=0.7, | ||
| max_tokens=100, | ||
| ) | ||
|
|
||
|
|
||
| class TestChatEndpoint: | ||
| """Tests for the chat completion endpoint.""" | ||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_basic_completion(self, mock_module, sample_request): | ||
| """Test basic chat completion returns correct structure.""" | ||
| # Setup mock output | ||
| mock_output = ModelOutputThunk("Hello! How can I help you?") | ||
| mock_module.serve.return_value = mock_output | ||
|
|
||
| # Create endpoint and call it | ||
| endpoint = make_chat_endpoint(mock_module) | ||
| response = await endpoint(sample_request) | ||
|
|
||
| # Verify response structure | ||
| assert isinstance(response, ChatCompletion) | ||
| assert response.model == "test-model" | ||
| assert len(response.choices) == 1 | ||
| assert response.choices[0].message.content == "Hello! How can I help you?" | ||
| assert response.choices[0].message.role == "assistant" | ||
| assert response.choices[0].index == 0 | ||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_finish_reason_included(self, mock_module, sample_request): | ||
| """Test that finish_reason is included in the response.""" | ||
| mock_output = ModelOutputThunk("Test response") | ||
| mock_module.serve.return_value = mock_output | ||
|
|
||
| endpoint = make_chat_endpoint(mock_module) | ||
| response = await endpoint(sample_request) | ||
|
|
||
| assert response.choices[0].finish_reason == "stop" | ||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_usage_field_populated(self, mock_module, sample_request): | ||
| """Test that usage field is populated when available.""" | ||
| mock_output = ModelOutputThunk("Test response") | ||
| mock_output.usage = { | ||
| "prompt_tokens": 10, | ||
| "completion_tokens": 5, | ||
| "total_tokens": 15, | ||
| } | ||
| mock_module.serve.return_value = mock_output | ||
|
|
||
| endpoint = make_chat_endpoint(mock_module) | ||
| response = await endpoint(sample_request) | ||
|
|
||
| assert response.usage is not None | ||
| assert isinstance(response.usage, CompletionUsage) | ||
| assert response.usage.prompt_tokens == 10 | ||
| assert response.usage.completion_tokens == 5 | ||
| assert response.usage.total_tokens == 15 | ||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_usage_field_none_when_unavailable(self, mock_module, sample_request): | ||
| """Test that usage field is None when not available.""" | ||
| mock_output = ModelOutputThunk("Test response") | ||
| # Don't set usage field | ||
| mock_module.serve.return_value = mock_output | ||
|
|
||
| endpoint = make_chat_endpoint(mock_module) | ||
| response = await endpoint(sample_request) | ||
|
|
||
| assert response.usage is None | ||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_system_fingerprint_from_model(self, mock_module, sample_request): | ||
| """Test that system_fingerprint is populated from model field.""" | ||
| mock_output = ModelOutputThunk("Test response") | ||
| mock_output.model = "gpt-4-turbo" | ||
| mock_module.serve.return_value = mock_output | ||
|
|
||
| endpoint = make_chat_endpoint(mock_module) | ||
| response = await endpoint(sample_request) | ||
|
|
||
| assert response.system_fingerprint == "gpt-4-turbo" | ||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_system_fingerprint_from_provider(self, mock_module, sample_request): | ||
| """Test that system_fingerprint falls back to provider field.""" | ||
| mock_output = ModelOutputThunk("Test response") | ||
| mock_output.provider = "ollama" | ||
| mock_module.serve.return_value = mock_output | ||
|
|
||
| endpoint = make_chat_endpoint(mock_module) | ||
| response = await endpoint(sample_request) | ||
|
|
||
| assert response.system_fingerprint == "ollama" | ||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_system_fingerprint_none_when_unavailable( | ||
| self, mock_module, sample_request | ||
| ): | ||
| """Test that system_fingerprint is None when not available.""" | ||
| mock_output = ModelOutputThunk("Test response") | ||
| # Don't set model or provider | ||
| mock_module.serve.return_value = mock_output | ||
|
|
||
| endpoint = make_chat_endpoint(mock_module) | ||
| response = await endpoint(sample_request) | ||
|
|
||
| assert response.system_fingerprint is None | ||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_model_options_passed_correctly(self, mock_module, sample_request): | ||
| """Test that model options are passed to serve function correctly.""" | ||
| mock_output = ModelOutputThunk("Test response") | ||
| mock_module.serve.return_value = mock_output | ||
|
|
||
| endpoint = make_chat_endpoint(mock_module) | ||
| await endpoint(sample_request) | ||
|
|
||
| # Verify serve was called with correct arguments | ||
| call_args = mock_module.serve.call_args | ||
| assert call_args is not None | ||
| assert "model_options" in call_args.kwargs | ||
| model_options = call_args.kwargs["model_options"] | ||
|
|
||
| # Should include temperature and max_tokens but not messages/requirements | ||
| assert "temperature" in model_options | ||
| assert model_options["temperature"] == 0.7 | ||
| assert "max_tokens" in model_options | ||
| assert model_options["max_tokens"] == 100 | ||
| assert "messages" not in model_options | ||
| assert "requirements" not in model_options | ||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_completion_id_format(self, mock_module, sample_request): | ||
| """Test that completion ID follows OpenAI format.""" | ||
| mock_output = ModelOutputThunk("Test response") | ||
| mock_module.serve.return_value = mock_output | ||
|
|
||
| endpoint = make_chat_endpoint(mock_module) | ||
| response = await endpoint(sample_request) | ||
|
|
||
| # Should start with "chatcmpl-" and be 38 characters total (chatcmpl- + 29 hex chars) | ||
| assert response.id.startswith("chatcmpl-") | ||
| assert len(response.id) == 38 | ||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_created_timestamp_present(self, mock_module, sample_request): | ||
| """Test that created timestamp is present and reasonable.""" | ||
| mock_output = ModelOutputThunk("Test response") | ||
| mock_module.serve.return_value = mock_output | ||
|
|
||
| endpoint = make_chat_endpoint(mock_module) | ||
| response = await endpoint(sample_request) | ||
|
|
||
| # Should be a Unix timestamp (positive integer) | ||
| assert isinstance(response.created, int) | ||
| assert response.created > 0 | ||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_object_type_correct(self, mock_module, sample_request): | ||
| """Test that object type is set correctly.""" | ||
| mock_output = ModelOutputThunk("Test response") | ||
| mock_module.serve.return_value = mock_output | ||
|
|
||
| endpoint = make_chat_endpoint(mock_module) | ||
| response = await endpoint(sample_request) | ||
|
|
||
| assert response.object == "chat.completion" | ||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_usage_with_partial_data(self, mock_module, sample_request): | ||
| """Test that usage handles missing fields gracefully.""" | ||
| mock_output = ModelOutputThunk("Test response") | ||
| # Only provide some fields | ||
| mock_output.usage = { | ||
| "prompt_tokens": 10 | ||
| # Missing completion_tokens and total_tokens | ||
| } | ||
| mock_module.serve.return_value = mock_output | ||
|
|
||
| endpoint = make_chat_endpoint(mock_module) | ||
| response = await endpoint(sample_request) | ||
|
|
||
| assert response.usage is not None | ||
| assert response.usage.prompt_tokens == 10 | ||
| assert response.usage.completion_tokens == 0 # Should default to 0 | ||
| assert response.usage.total_tokens == 0 | ||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_all_fields_together(self, mock_module, sample_request): | ||
| """Test that all new fields work together correctly.""" | ||
| mock_output = ModelOutputThunk("Complete response") | ||
| mock_output.usage = { | ||
| "prompt_tokens": 20, | ||
| "completion_tokens": 10, | ||
| "total_tokens": 30, | ||
| } | ||
| mock_output.model = "gpt-4" | ||
| mock_output.provider = "openai" | ||
| mock_module.serve.return_value = mock_output | ||
|
|
||
| endpoint = make_chat_endpoint(mock_module) | ||
| response = await endpoint(sample_request) | ||
|
|
||
| # Verify all fields are present | ||
| assert response.choices[0].finish_reason == "stop" | ||
| assert response.usage is not None | ||
| assert response.usage.total_tokens == 30 | ||
| assert response.system_fingerprint == "gpt-4" # model takes precedence | ||
| assert response.object == "chat.completion" | ||
| assert response.id.startswith("chatcmpl-") | ||
|
|
||
|
|
||
| # Made with Bob | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.