diff --git a/autotest/interface/restful/test_restful_anthropic_sdk_messages.py b/autotest/interface/restful/test_restful_anthropic_sdk_messages.py new file mode 100644 index 0000000000..e1185a1cb9 --- /dev/null +++ b/autotest/interface/restful/test_restful_anthropic_sdk_messages.py @@ -0,0 +1,132 @@ +from __future__ import annotations + +import asyncio +import json + +import pytest + +pytest.importorskip('anthropic') + +from utils.constant import BACKEND_LIST, RESTFUL_MODEL_LIST +from utils.tool_reasoning_definitions import get_async_anthropic_client_and_model + + +def _text_from_message(msg) -> str: + parts: list[str] = [] + for block in getattr(msg, 'content', []) or []: + if getattr(block, 'type', None) == 'text': + parts.append(getattr(block, 'text', '') or '') + return ''.join(parts) + + +def _first_message_start_usage(events: list) -> tuple[int, int] | None: + for ev in events: + if getattr(ev, 'type', None) != 'message_start': + continue + msg = getattr(ev, 'message', None) + if msg is None: + continue + u = getattr(msg, 'usage', None) + if u is None: + return None + return getattr(u, 'input_tokens', 0), getattr(u, 'output_tokens', 0) + return None + + +async def _sdk_simple_non_stream() -> object: + client, model_name = get_async_anthropic_client_and_model() + return await client.messages.create( + model=model_name, + max_tokens=1024, + temperature=0.01, + messages=[{'role': 'user', 'content': 'how are you!'}], + ) + + +async def _sdk_system_non_stream() -> object: + client, model_name = get_async_anthropic_client_and_model() + return await client.messages.create( + model=model_name, + max_tokens=1024, + temperature=0.01, + system='you are a helpful assistant', + messages=[{'role': 'user', 'content': 'how are you!'}], + ) + + +async def _sdk_stream_events_and_final() -> tuple[list, object | None]: + client, model_name = get_async_anthropic_client_and_model() + stream = await client.messages.create( + model=model_name, + max_tokens=1024, + temperature=0.01, + messages=[{'role': 'user', 'content': 'how are you!'}], + stream=True, + ) + events: list = [] + async for event in stream: + events.append(event) + final_msg = None + getter = getattr(stream, 'get_final_message', None) + if callable(getter): + try: + final_msg = await getter() + except Exception: + final_msg = None + return events, final_msg + + +@pytest.mark.order(8) +@pytest.mark.flaky(reruns=2) +@pytest.mark.parametrize('backend', BACKEND_LIST) +@pytest.mark.parametrize('model_case', RESTFUL_MODEL_LIST) +class TestRestfulAnthropicSdkMessages: + """Covers simple / system / streaming Messages (LMDeploy streams zero usage + on ``message_start``).""" + + def test_sdk_simple_messages_non_stream(self, backend, model_case): + msg = asyncio.run(_sdk_simple_non_stream()) + assert getattr(msg, 'role', None) == 'assistant' + assert getattr(msg, 'stop_reason', None) in ('end_turn', 'max_tokens') + text = _text_from_message(msg) + assert len(text) > 0 + usage = getattr(msg, 'usage', None) + assert usage is not None + assert getattr(usage, 'input_tokens', 0) > 0 + assert getattr(usage, 'output_tokens', 0) > 0 + + def test_sdk_system_message_non_stream(self, backend, model_case): + msg = asyncio.run(_sdk_system_non_stream()) + assert getattr(msg, 'role', None) == 'assistant' + assert getattr(msg, 'stop_reason', None) in ('end_turn', 'max_tokens') + text = _text_from_message(msg) + assert len(text) > 0 + + def test_sdk_streaming(self, backend, model_case): + events, final_msg = asyncio.run(_sdk_stream_events_and_final()) + assert len(events) > 0 + + usage0 = _first_message_start_usage(events) + assert usage0 is not None, 'message_start with usage not found in stream' + in0, out0 = usage0 + assert out0 == 0, 'LMDeploy streams output_tokens=0 until message_delta' + assert in0 == 0, 'LMDeploy streams input_tokens=0 on message_start (final usage appears in message_delta)' + + if final_msg is not None: + assert getattr(final_msg, 'role', None) == 'assistant' + u = getattr(final_msg, 'usage', None) + assert u is not None + assert getattr(u, 'input_tokens', 0) > 5 + assert getattr(u, 'output_tokens', 0) > 0 + text = _text_from_message(final_msg) + assert len(text) > 0 + return + + serialised = [] + for e in events: + if hasattr(e, 'model_dump'): + serialised.append(e.model_dump()) + else: + serialised.append({'repr': repr(e)}) + blob = json.dumps(serialised, default=str) + assert 'message_delta' in blob or 'output_tokens' in blob diff --git a/autotest/interface/restful/test_restful_anthropic_v1.py b/autotest/interface/restful/test_restful_anthropic_v1.py new file mode 100644 index 0000000000..bcaa1ec276 --- /dev/null +++ b/autotest/interface/restful/test_restful_anthropic_v1.py @@ -0,0 +1,1002 @@ +from __future__ import annotations + +import json +import os +from functools import lru_cache + +import pytest +import requests +from utils.config_utils import get_config +from utils.constant import BACKEND_LIST, BASE_URL, RESTFUL_MODEL_LIST +from utils.tool_reasoning_definitions import WEATHER_TOOL, openai_function_tool_to_anthropic + +from lmdeploy.serve.openai.api_client import APIClient + +ANTHROPIC_VERSION = '2023-06-01' + +_MESSAGES_URL = f'{BASE_URL}/v1/messages' +_COUNT_TOKENS_URL = f'{BASE_URL}/v1/messages/count_tokens' + +_EVAL_IMAGE_TIGER = 'tiger.jpeg' + +# 1×1 PNG (red), for ``source: {type: base64}`` smoke without relying on ``resource_path`` files. +_TINY_PNG_BASE64 = ( + 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==' +) + + +@pytest.fixture(scope='class') +def deployed_model_name() -> str: + """Single model id exposed by the RESTFUL api_server.""" + + return APIClient(BASE_URL).available_models[0] + + +@lru_cache(maxsize=1) +def _eval_resource_path() -> str: + """``resource_path`` from active autotest YAML (``TEST_ENV`` → + ``autotest/config_{tag}.yml``).""" + + cfg = get_config() + path = cfg.get('resource_path') + assert isinstance(path, str) and path, 'resource_path must be set in autotest config (e.g. config_h.yml)' + base = path.rstrip('/') + assert os.path.isdir(base), f'resource_path is not a directory: {base!r}' + return base + + +def _eval_resource_file(filename: str) -> str: + p = os.path.join(_eval_resource_path(), filename) + assert os.path.isfile(p), f'missing offline eval resource {filename!r}: {p}' + return p + + +def _anthropic_headers() -> dict[str, str]: + return { + 'Content-Type': 'application/json', + 'anthropic-version': ANTHROPIC_VERSION, + } + + +def _assistant_text_from_message_payload(data: dict) -> str: + return ''.join(b.get('text', '') for b in data.get('content', []) if b.get('type') == 'text') + + +def _model_likely_supports_anthropic_vlm(model_name: str) -> bool: + """Heuristic for RESTFUL matrix: skip image HTTP when the served id is clearly text-only.""" + + m = model_name.upper() + return any( + needle in m + for needle in ( + 'VL', + 'INTERNVL', + 'INTERN-VL', + 'QWEN-VL', + 'QWEN2-VL', + 'QWEN2.5-VL', + 'QWEN3.5', + 'MINICPM-V', + 'LLAVA', + 'COGVLM', + 'XCOMPOSER', + 'INTERNXCOMPOSER', + 'INTERNS', + )) + + +def _parse_anthropic_sse(raw: str) -> list[tuple[str | None, dict]]: + """Parse Anthropic-style SSE (``event:`` / ``data:`` lines) into + (event_name, json_payload) pairs.""" + + pairs: list[tuple[str | None, dict]] = [] + current_event: str | None = None + for line in raw.splitlines(): + line = line.rstrip('\r') + if line.startswith('event:'): + current_event = line[len('event:'):].strip() + elif line.startswith('data:'): + data_str = line[len('data:'):].strip() + if not data_str: + continue + pairs.append((current_event, json.loads(data_str))) + current_event = None + return pairs + + +def _aggregate_stream_text(events: list[tuple[str | None, dict]]) -> str: + text = '' + for _, obj in events: + if obj.get('type') != 'content_block_delta': + continue + delta = obj.get('delta') or {} + if delta.get('type') == 'text_delta': + text += delta.get('text') or '' + return text + + +def _assert_count_tokens_json(data: dict) -> int: + assert set(data.keys()) == {'input_tokens'}, data + n = data['input_tokens'] + assert isinstance(n, int) and n > 0, n + return n + + +def _assert_success_message_json(data: dict, *, model: str) -> dict: + """Non-stream ``/v1/messages`` success body: Anthropic message + usage + invariants.""" + + assert data.get('type') == 'message', data + assert data.get('role') == 'assistant' + assert data.get('model') == model + mid = data.get('id') + assert isinstance(mid, str) and mid.startswith('msg_'), mid + content = data.get('content') + assert isinstance(content, list) and len(content) >= 1, content + usage = data.get('usage') + assert isinstance(usage, dict), data + assert 'input_tokens' in usage and 'output_tokens' in usage, usage + assert isinstance(usage['input_tokens'], int) and usage['input_tokens'] >= 0 + assert isinstance(usage['output_tokens'], int) and usage['output_tokens'] > 0 + assert data.get('stop_reason') in ('end_turn', 'max_tokens', 'stop_sequence', 'tool_use', None) + return data + + +def _assert_anthropic_error_envelope(body: dict) -> None: + assert body.get('type') == 'error', body + err = body.get('error') + assert isinstance(err, dict) and 'type' in err and 'message' in err, err + + +def _assert_fastapi_validation_error(resp: requests.Response) -> dict: + """FastAPI ``RequestValidationError`` payload (not Anthropic ``type: + + error``). + """ + + assert resp.status_code == 422, resp.text + body = resp.json() + assert isinstance(body.get('detail'), list), body + return body + + +def _assert_tool_parser_required_message(resp: requests.Response) -> None: + assert resp.status_code == 400, resp.text + body = resp.json() + _assert_anthropic_error_envelope(body) + assert body['error']['type'] == 'invalid_request_error' + err = body['error']['message'] + assert '--tool-call-parser' in err + + +@pytest.mark.order(8) +@pytest.mark.flaky(reruns=2) +@pytest.mark.parametrize('backend', BACKEND_LIST) +@pytest.mark.parametrize('model_case', RESTFUL_MODEL_LIST) +class TestRestfulAnthropicV1: + + def test_list_models(self, backend, model_case, deployed_model_name: str): + url = f'{BASE_URL}/anthropic/v1/models' + resp = requests.get(url, timeout=30) + assert resp.status_code == 200, resp.text + data = resp.json() + assert isinstance(data.get('has_more'), bool) + assert 'data' in data + assert isinstance(data['data'], list) + for m in data['data']: + assert isinstance(m, dict) + assert m.get('type') == 'model' + assert isinstance(m.get('id'), str) and len(m['id']) > 0 + assert isinstance(m.get('display_name'), str) + ids = [m['id'] for m in data['data']] + assert deployed_model_name in ids, (deployed_model_name, ids) + if ids: + assert data.get('first_id') == ids[0] + assert data.get('last_id') == ids[-1] + + @pytest.mark.parametrize( + 'endpoint_url,body_without_model', + [ + pytest.param( + _MESSAGES_URL, + {'max_tokens': 8, 'messages': [{'role': 'user', 'content': 'Say hi in one word.'}]}, + id='messages', + ), + pytest.param( + _COUNT_TOKENS_URL, + {'messages': [{'role': 'user', 'content': 'Hi'}]}, + id='count_tokens', + ), + ], + ) + def test_messages_and_count_tokens_missing_version_header( + self, backend, model_case, deployed_model_name: str, endpoint_url: str, body_without_model: dict): + resp = requests.post( + endpoint_url, + headers={'Content-Type': 'application/json'}, + json={'model': deployed_model_name, **body_without_model}, + timeout=60, + ) + assert resp.status_code == 400, resp.text + body = resp.json() + _assert_anthropic_error_envelope(body) + assert body['error']['type'] == 'invalid_request_error' + assert body['error']['message'] == 'Missing required header: anthropic-version' + + @pytest.mark.parametrize( + 'endpoint_url,request_json', + [ + pytest.param( + _MESSAGES_URL, + { + 'model': 'definitely-not-a-deployed-model-name', + 'max_tokens': 8, + 'messages': [{'role': 'user', 'content': 'Hi'}], + }, + id='messages', + ), + pytest.param( + _COUNT_TOKENS_URL, + { + 'model': 'definitely-not-a-deployed-model-name', + 'messages': [{'role': 'user', 'content': 'Hi'}], + }, + id='count_tokens', + ), + ], + ) + def test_messages_and_count_tokens_unknown_model( + self, backend, model_case, endpoint_url: str, request_json: dict): + resp = requests.post( + endpoint_url, + headers=_anthropic_headers(), + json=request_json, + timeout=30, + ) + assert resp.status_code == 404, resp.text + body = resp.json() + _assert_anthropic_error_envelope(body) + assert body['error']['type'] == 'not_found_error' + assert 'does not exist' in body['error']['message'] + + def test_messages_with_system(self, backend, model_case, deployed_model_name: str): + """Anthropic ``system`` field (Messages API). + + Some chat models prefix visible chain-of-thought before the final reply; keep + ``max_tokens`` high enough that the instructed answer still fits the budget. + """ + + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 2048, + 'temperature': 0.01, + 'system': 'You reply only with the single word: Acknowledged.', + 'messages': [{'role': 'user', 'content': 'What is your instruction?'}], + }, + timeout=120, + ) + assert resp.status_code == 200, resp.text + data = _assert_success_message_json(resp.json(), model=deployed_model_name) + text = _assistant_text_from_message_payload(data) + assert 'acknowledged' in text.lower(), text[:500] + + def test_messages_user_content_as_blocks(self, backend, model_case, deployed_model_name: str): + """``messages[].content`` as a list of ``{type: text}`` blocks + (Anthropic-native shape).""" + + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 24, + 'temperature': 0.01, + 'messages': [{ + 'role': 'user', + 'content': [ + {'type': 'text', 'text': 'Answer with one word: color of grass? '}, + {'type': 'text', 'text': 'Just the color name.'}, + ], + }], + }, + timeout=120, + ) + assert resp.status_code == 200, resp.text + data = _assert_success_message_json(resp.json(), model=deployed_model_name) + text = _assistant_text_from_message_payload(data) + tl = text.lower() + assert any( + k in tl + for k in ('green', 'grass', '青', '綠', '绿')), f'expected color-of-grass hint in reply: {text[:500]!r}' + + def test_messages_system_as_content_blocks(self, backend, model_case, deployed_model_name: str): + """``system`` as Anthropic block list (concatenated server-side for the + chat template).""" + + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 256, + 'temperature': 0.01, + 'system': [ + {'type': 'text', 'text': 'You reply only with the single word: Confirmed.'}, + {'type': 'text', 'text': ' No extra words.'}, + ], + 'messages': [{'role': 'user', 'content': 'Acknowledge with your required reply.'}], + }, + timeout=120, + ) + assert resp.status_code == 200, resp.text + data = _assert_success_message_json(resp.json(), model=deployed_model_name) + text = _assistant_text_from_message_payload(data).lower() + assert 'confirmed' in text, text[:500] + + def test_messages_history_tool_use_and_tool_result_without_request_tools( + self, backend, model_case, deployed_model_name: str): + """Replay ``tool_use`` / ``tool_result`` blocks without top-level + ``tools`` (parserless RESTFUL).""" + + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 2048, + 'temperature': 0.01, + 'messages': [ + {'role': 'user', 'content': 'What is the weather in San Francisco?'}, + { + 'role': 'assistant', + 'content': [ + { + 'type': 'tool_use', + 'id': 'toolu_hist_restful_01', + 'name': 'get_current_weather', + 'input': {'location': 'San Francisco'}, + }, + ], + }, + { + 'role': 'user', + 'content': [ + { + 'type': 'tool_result', + 'tool_use_id': 'toolu_hist_restful_01', + 'content': '72F and sunny.', + }, + ], + }, + { + 'role': 'user', + 'content': 'In one short phrase, was it warm? Answer yes or no.', + }, + ], + }, + timeout=120, + ) + assert resp.status_code == 200, resp.text + data = _assert_success_message_json(resp.json(), model=deployed_model_name) + text = _assistant_text_from_message_payload(data) + tl = text.lower() + assert 'yes' in tl or '是' in text or '温暖' in text or '暖和' in text, ( + 'expected warm/yes style answer given 72F sunny tool result; ' + f'stop_reason={data.get("stop_reason")!r} text={text[:500]!r}' + ) + + def test_messages_history_thinking_and_text_blocks(self, backend, model_case, deployed_model_name: str): + """Assistant history with ``thinking`` + ``text`` (reasoning replay + path).""" + + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 2048, + 'temperature': 0.01, + 'messages': [ + {'role': 'user', 'content': 'Hi.'}, + { + 'role': 'assistant', + 'content': [ + {'type': 'thinking', 'thinking': '(internal scratchpad)'}, + {'type': 'text', 'text': 'Hello — how can I help?'}, + ], + }, + {'role': 'user', 'content': 'Reply with exactly: ACK'}, + ], + }, + timeout=120, + ) + assert resp.status_code == 200, resp.text + data = _assert_success_message_json(resp.json(), model=deployed_model_name) + text = _assistant_text_from_message_payload(data) + assert 'ack' in text.lower(), ( + 'expected literal ACK from final user instruction; ' + f'stop_reason={data.get("stop_reason")!r} text={text[:500]!r}' + ) + + def test_messages_user_image_file_from_config_resource(self, backend, model_case, deployed_model_name: str): + """``user`` message with Anthropic ``image`` + local ``resource_path`` + file (``config_h.yml``).""" + + if not _model_likely_supports_anthropic_vlm(deployed_model_name): + pytest.skip(f'model {deployed_model_name!r} is not treated as vision-capable for this test') + + image_path = _eval_resource_file(_EVAL_IMAGE_TIGER) + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 128, + 'temperature': 0.01, + 'messages': [{ + 'role': 'user', + 'content': [ + {'type': 'text', 'text': 'In one word, name the animal in the image.'}, + { + 'type': 'image', + 'source': {'type': 'url', 'url': image_path}, + }, + ], + }], + }, + timeout=180, + ) + assert resp.status_code == 200, resp.text + data = _assert_success_message_json(resp.json(), model=deployed_model_name) + text = _assistant_text_from_message_payload(data).lower() + assert any( + k in text + for k in ('tiger', 'cat', 'big cat', '虎', '猫', 'feline')), text[:800] + + def test_count_tokens_user_image_block_exceeds_text_only(self, backend, model_case, deployed_model_name: str): + """``count_tokens`` flattens ``image`` blocks in + ``to_lmdeploy_messages``; count should exceed text-only.""" + + image_path = _eval_resource_file(_EVAL_IMAGE_TIGER) + base = { + 'model': deployed_model_name, + 'messages': [{ + 'role': 'user', + 'content': [{'type': 'text', 'text': 'Describe briefly.'}], + }], + } + r0 = requests.post(_COUNT_TOKENS_URL, headers=_anthropic_headers(), json=base, timeout=120) + assert r0.status_code == 200, r0.text + n0 = _assert_count_tokens_json(r0.json()) + + r1 = requests.post( + _COUNT_TOKENS_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'messages': [{ + 'role': 'user', + 'content': [ + {'type': 'text', 'text': 'Describe briefly.'}, + {'type': 'image', 'source': {'type': 'url', 'url': image_path}}, + ], + }], + }, + timeout=120, + ) + assert r1.status_code == 200, r1.text + n1 = _assert_count_tokens_json(r1.json()) + assert n1 > n0, ('image-bearing user message should tokenize longer than text-only', n1, n0) + + def test_messages_user_image_interleaved_text_blocks(self, backend, model_case, deployed_model_name: str): + """Multimodal user turn: ``text`` → ``image`` → ``text`` (ordering + VLM path).""" + + if not _model_likely_supports_anthropic_vlm(deployed_model_name): + pytest.skip(f'model {deployed_model_name!r} is not treated as vision-capable for this test') + + image_path = _eval_resource_file(_EVAL_IMAGE_TIGER) + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 128, + 'temperature': 0.01, + 'messages': [{ + 'role': 'user', + 'content': [ + { + 'type': 'text', + 'text': 'The next block is an image. After it, follow the final instruction only.', + }, + {'type': 'image', 'source': {'type': 'url', 'url': image_path}}, + {'type': 'text', 'text': 'In one word, name the animal in the image.'}, + ], + }], + }, + timeout=180, + ) + assert resp.status_code == 200, resp.text + data = _assert_success_message_json(resp.json(), model=deployed_model_name) + text = _assistant_text_from_message_payload(data).lower() + assert any( + k in text + for k in ('tiger', 'cat', 'big cat', '虎', '猫', 'feline')), text[:800] + + def test_messages_user_image_base64_stream(self, backend, model_case, deployed_model_name: str): + """Tiny PNG via ``base64`` source + ``stream: true`` (VLM + SSE + path).""" + + if not _model_likely_supports_anthropic_vlm(deployed_model_name): + pytest.skip(f'model {deployed_model_name!r} is not treated as vision-capable for this test') + + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + # Same as tool_parser HTTP solid-color VLM test: leave room after thinking_delta. + 'max_tokens': 16384, + 'temperature': 0.01, + 'stream': True, + 'messages': [{ + 'role': 'user', + 'content': [ + { + 'type': 'text', + 'text': ( + 'The image is a single solid color (one pixel). ' + 'Reply with at most three words: name that color only (e.g. red).' + ), + }, + { + 'type': 'image', + 'source': { + 'type': 'base64', + 'media_type': 'image/png', + 'data': _TINY_PNG_BASE64, + }, + }, + ], + }], + }, + stream=True, + timeout=180, + ) + assert resp.status_code == 200, resp.text + raw = ''.join(chunk.decode('utf-8') for chunk in resp.iter_content(chunk_size=None) if chunk) + events = _parse_anthropic_sse(raw) + types = [obj.get('type') for _, obj in events] + assert 'message_start' in types + assert 'message_stop' in types + assembled = _aggregate_stream_text(events) + assert len(assembled.strip()) > 0, repr(assembled[:300]) + al = assembled.lower() + assert any( + k in al + for k in ( + 'red', + 'crimson', + 'scarlet', + 'maroon', + 'ruby', + 'vermilion', + '红', + '赤', + '朱', + '绯', + )), f'expected red-ish color name in streamed reply: {assembled[:500]!r}' + + def test_messages_multi_turn(self, backend, model_case, deployed_model_name: str): + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 2048, + 'temperature': 0.01, + 'messages': [ + {'role': 'user', 'content': 'Remember the code word: banana.'}, + {'role': 'assistant', 'content': 'Understood, the code word is banana.'}, + {'role': 'user', 'content': 'What was the code word? Reply with that word only.'}, + ], + }, + timeout=120, + ) + assert resp.status_code == 200, resp.text + data = _assert_success_message_json(resp.json(), model=deployed_model_name) + text = _assistant_text_from_message_payload(data).lower() + assert 'banana' in text, text[:500] + + def test_messages_max_tokens_budget(self, backend, model_case, deployed_model_name: str): + """Tight ``max_tokens`` should cap generation (``stop_reason`` often + ``max_tokens``).""" + + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 6, + 'temperature': 0.01, + 'messages': [{ + 'role': 'user', + 'content': 'Write a very long essay about world history. Do not stop early.', + }], + }, + timeout=120, + ) + assert resp.status_code == 200, resp.text + data = _assert_success_message_json(resp.json(), model=deployed_model_name) + out = data['usage']['output_tokens'] + assert out <= 8 + assert out >= 4 + assert data['stop_reason'] in ('max_tokens', 'end_turn') + assert _assistant_text_from_message_payload(data), data['content'] + + def test_messages_stop_sequences(self, backend, model_case, deployed_model_name: str): + """Maps to LMDeploy ``stop_sequences`` / + ``GenerationConfig.stop_words`` (cf. + + chat completion stop tests). + """ + + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 200, + 'temperature': 0.01, + 'stop_sequences': [' Shanghai', ' city', ' China'], + 'messages': [{'role': 'user', 'content': 'Shanghai is'}], + }, + timeout=120, + ) + assert resp.status_code == 200, resp.text + data = _assert_success_message_json(resp.json(), model=deployed_model_name) + text = _assistant_text_from_message_payload(data) + assert ' Shanghai' not in text + assert ' city' not in text + assert ' China' not in text + assert data['stop_reason'] in ('end_turn', 'max_tokens', 'stop_sequence') + assert len(text) > 0, 'stop_sequence should still yield visible assistant text before the stop' + + def test_messages_non_stream(self, backend, model_case, deployed_model_name: str): + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 32, + 'temperature': 0.01, + 'messages': [{'role': 'user', 'content': 'Reply with a single short greeting.'}], + }, + timeout=120, + ) + assert resp.status_code == 200, resp.text + data = _assert_success_message_json(resp.json(), model=deployed_model_name) + assert data['content'][0]['type'] == 'text' + assert len(_assistant_text_from_message_payload(data).strip()) > 0 + + def test_messages_stream(self, backend, model_case, deployed_model_name: str): + """SSE lifecycle including ``message_start`` shape (usage zero until + ``message_delta``).""" + + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 48, + 'temperature': 0.01, + 'stream': True, + 'messages': [{'role': 'user', 'content': 'Count from 1 to 3, one number per line.'}], + }, + stream=True, + timeout=120, + ) + assert resp.status_code == 200, resp.text + raw = ''.join(chunk.decode('utf-8') for chunk in resp.iter_content(chunk_size=None) if chunk) + events = _parse_anthropic_sse(raw) + types = [obj.get('type') for _, obj in events] + assert 'message_start' in types + assert 'message_delta' in types + assert 'message_stop' in types + start_evt = next((obj for _, obj in events if obj.get('type') == 'message_start'), None) + assert start_evt is not None + m0 = start_evt['message'] + assert m0.get('type') == 'message' + assert m0.get('role') == 'assistant' + assert m0.get('model') == deployed_model_name + assert isinstance(m0.get('id'), str) and m0['id'].startswith('msg_') + assert m0.get('usage', {}).get('input_tokens') == 0 + assert m0.get('usage', {}).get('output_tokens') == 0 + assembled = _aggregate_stream_text(events) + assert len(assembled) > 0 + assert sum(1 for d in ('1', '2', '3') if d in assembled) >= 2, ( + 'expected at least two of the digits 1–3 in streamed text', repr(assembled[:200]) + ) + delta_evt = next((obj for _, obj in events if obj.get('type') == 'message_delta'), None) + assert delta_evt is not None + du = delta_evt['usage'] + assert 'output_tokens' in du and isinstance(du['output_tokens'], int) + assert du['output_tokens'] > 0 + assert 'input_tokens' in du and isinstance(du['input_tokens'], int) and du['input_tokens'] >= 0 + assert any(obj.get('type') == 'message_stop' for _, obj in events) + + def test_count_tokens(self, backend, model_case, deployed_model_name: str): + r_short = requests.post( + _COUNT_TOKENS_URL, + headers=_anthropic_headers(), + json={'model': deployed_model_name, 'messages': [{'role': 'user', 'content': 'Hi'}]}, + timeout=60, + ) + assert r_short.status_code == 200, r_short.text + short = _assert_count_tokens_json(r_short.json()) + r_long = requests.post( + _COUNT_TOKENS_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'messages': [{'role': 'user', 'content': 'Hello, estimate my token count.'}], + }, + timeout=60, + ) + assert r_long.status_code == 200, r_long.text + long = _assert_count_tokens_json(r_long.json()) + assert long > short, (long, short) + + @pytest.mark.parametrize( + 'endpoint_url', + [_MESSAGES_URL, _COUNT_TOKENS_URL], + ids=['messages', 'count_tokens'], + ) + def test_messages_and_count_tokens_invalid_json_body( + self, backend, model_case, deployed_model_name: str, endpoint_url: str): + resp = requests.post( + endpoint_url, + headers=_anthropic_headers(), + data='{"model":', + timeout=30, + ) + _assert_fastapi_validation_error(resp) + + def test_count_tokens_rejects_tools(self, backend, model_case, deployed_model_name: str): + """``count_tokens`` rejects Anthropic ``tools`` until supported (400 + + fixed message).""" + + base_json = { + 'model': deployed_model_name, + 'messages': [{'role': 'user', 'content': 'Hi'}], + } + r_base = requests.post( + _COUNT_TOKENS_URL, + headers=_anthropic_headers(), + json=base_json, + timeout=30, + ) + assert r_base.status_code == 200, r_base.text + _assert_count_tokens_json(r_base.json()) + + resp = requests.post( + _COUNT_TOKENS_URL, + headers=_anthropic_headers(), + json={ + **base_json, + 'tools': [{ + 'name': 'demo', + 'description': 'x', + 'input_schema': { + 'type': 'object', + 'properties': {} + }, + }], + }, + timeout=30, + ) + assert resp.status_code == 400, resp.text + body = resp.json() + _assert_anthropic_error_envelope(body) + assert body['error']['type'] == 'invalid_request_error' + assert body['error']['message'] == 'Anthropic tool fields are temporarily unsupported.' + + def test_count_tokens_with_system_content_blocks(self, backend, model_case, deployed_model_name: str): + """``count_tokens`` with ``system`` as block list + (``to_lmdeploy_messages`` flattens text).""" + + messages = [{'role': 'user', 'content': 'Hello, estimate my token count.'}] + resp_base = requests.post( + _COUNT_TOKENS_URL, + headers=_anthropic_headers(), + json={'model': deployed_model_name, 'messages': messages}, + timeout=60, + ) + assert resp_base.status_code == 200, resp_base.text + base_data = resp_base.json() + assert set(base_data.keys()) == {'input_tokens'}, base_data + baseline = base_data['input_tokens'] + assert isinstance(baseline, int) and baseline > 0 + + resp = requests.post( + _COUNT_TOKENS_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'system': [ + {'type': 'text', 'text': 'You are helpful.'}, + {'type': 'text', 'text': 'Answer briefly.'}, + ], + 'messages': messages, + }, + timeout=60, + ) + assert resp.status_code == 200, resp.text + data = resp.json() + assert set(data.keys()) == {'input_tokens'}, data + with_system = data['input_tokens'] + assert isinstance(with_system, int) + assert with_system > baseline, ( + 'system blocks should increase tokenized prompt vs same messages alone', + with_system, + baseline, + ) + + def test_messages_wrong_content_type(self, backend, model_case, deployed_model_name: str): + resp = requests.post( + _MESSAGES_URL, + headers={ + 'Content-Type': 'text/plain', + 'anthropic-version': ANTHROPIC_VERSION, + }, + data='{}', + timeout=30, + ) + _assert_fastapi_validation_error(resp) + + def test_messages_invalid_message_role(self, backend, model_case, deployed_model_name: str): + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 8, + 'messages': [{'role': 'system', 'content': 'not allowed here'}], + }, + timeout=30, + ) + _assert_fastapi_validation_error(resp) + + def test_messages_message_missing_role(self, backend, model_case, deployed_model_name: str): + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 8, + 'messages': [{'content': 'Hi'}], + }, + timeout=30, + ) + _assert_fastapi_validation_error(resp) + + def test_messages_max_tokens_zero(self, backend, model_case, deployed_model_name: str): + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 0, + 'messages': [{'role': 'user', 'content': 'Hi'}], + }, + timeout=30, + ) + _assert_fastapi_validation_error(resp) + + def test_messages_messages_not_list(self, backend, model_case, deployed_model_name: str): + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 8, + 'messages': {'role': 'user', 'content': 'Hi'}, + }, + timeout=30, + ) + _assert_fastapi_validation_error(resp) + + def test_messages_stream_validation_error_returns_json(self, backend, model_case, deployed_model_name: str): + """Invalid bodies must not upgrade to ``text/event-stream``; FastAPI + returns JSON 422.""" + + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': -1, + 'stream': True, + 'messages': [{'role': 'user', 'content': 'Hi'}], + }, + stream=True, + timeout=30, + ) + _assert_fastapi_validation_error(resp) + ctype = (resp.headers.get('content-type') or '').lower() + assert 'application/json' in ctype + assert 'text/event-stream' not in ctype + + def test_count_tokens_empty_messages(self, backend, model_case, deployed_model_name: str): + """Pydantic allows ``messages: []``; counting should still return a + positive estimate.""" + + resp = requests.post( + _COUNT_TOKENS_URL, + headers=_anthropic_headers(), + json={'model': deployed_model_name, 'messages': []}, + timeout=60, + ) + assert resp.status_code == 200, resp.text + _assert_count_tokens_json(resp.json()) + + def test_messages_large_user_payload(self, backend, model_case, deployed_model_name: str): + """Regression guard for large JSON bodies (CI-sized payload, not + stress-test scale).""" + + big = 'x' * (128 * 1024) + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 8, + 'temperature': 0.01, + 'messages': [{'role': 'user', 'content': f'Reply with one word: OK. Context:\n{big}'}], + }, + timeout=180, + ) + assert resp.status_code == 200, resp.text + data = _assert_success_message_json(resp.json(), model=deployed_model_name) + assert len(_assistant_text_from_message_payload(data).strip()) > 0 + + def test_messages_rejects_tools_without_tool_call_parser(self, backend, model_case, deployed_model_name: str): + """``RESTFUL`` jobs start api_server *without* ``--tool-call-parser``; + ``tools`` must yield 400.""" + + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 64, + 'temperature': 0, + 'messages': [{'role': 'user', 'content': 'What is the weather in Dallas, TX?'}], + 'tools': [openai_function_tool_to_anthropic(WEATHER_TOOL)], + }, + timeout=120, + ) + _assert_tool_parser_required_message(resp) + + def test_messages_rejects_tool_choice_with_tools_without_tool_call_parser( + self, backend, model_case, deployed_model_name: str): + """``tool_choice`` is only meaningful with ``tools``; still blocked + without ``--tool-call-parser``.""" + + resp = requests.post( + _MESSAGES_URL, + headers=_anthropic_headers(), + json={ + 'model': deployed_model_name, + 'max_tokens': 64, + 'temperature': 0, + 'messages': [{'role': 'user', 'content': 'What is the weather in Dallas, TX?'}], + 'tools': [openai_function_tool_to_anthropic(WEATHER_TOOL)], + 'tool_choice': {'type': 'auto'}, + }, + timeout=120, + ) + _assert_tool_parser_required_message(resp) diff --git a/autotest/interface/restful/test_restful_chat_completions_v1.py b/autotest/interface/restful/test_restful_chat_completions_v1.py index d3e9e7c119..63727793cc 100644 --- a/autotest/interface/restful/test_restful_chat_completions_v1.py +++ b/autotest/interface/restful/test_restful_chat_completions_v1.py @@ -2,7 +2,7 @@ import pytest from openai import OpenAI -from utils.constant import BACKEND_LIST, RESTFUL_MODEL_LIST +from utils.constant import BACKEND_LIST, BASE_URL, RESTFUL_MODEL_LIST from utils.restful_return_check import ( assert_chat_completions_batch_return, assert_chat_completions_stream_return, @@ -17,10 +17,7 @@ from lmdeploy.serve.openai.api_client import APIClient, get_model_list -BASE_HTTP_URL = 'http://localhost' -DEFAULT_PORT = 23333 MODEL = 'internlm/Intern-S1' -BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)]) @pytest.mark.order(8) diff --git a/autotest/interface/restful/test_restful_completions_v1.py b/autotest/interface/restful/test_restful_completions_v1.py index 8c187aa09d..5012e06285 100644 --- a/autotest/interface/restful/test_restful_completions_v1.py +++ b/autotest/interface/restful/test_restful_completions_v1.py @@ -1,13 +1,10 @@ import pytest -from utils.constant import BACKEND_LIST, RESTFUL_BASE_MODEL_LIST +from utils.constant import BACKEND_LIST, BASE_URL, RESTFUL_BASE_MODEL_LIST from utils.restful_return_check import assert_completions_batch_return, assert_completions_stream_return from lmdeploy.serve.openai.api_client import APIClient -BASE_HTTP_URL = 'http://localhost' -DEFAULT_PORT = 23333 MODEL = 'internlm/internlm2_5-20b' -BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)]) @pytest.mark.parametrize('backend', BACKEND_LIST) diff --git a/autotest/interface/restful/test_restful_generate.py b/autotest/interface/restful/test_restful_generate.py index 6babb550e7..5a08ba445a 100644 --- a/autotest/interface/restful/test_restful_generate.py +++ b/autotest/interface/restful/test_restful_generate.py @@ -9,15 +9,11 @@ import pytest import requests from transformers import AutoTokenizer -from utils.constant import BACKEND_LIST, DEFAULT_SERVER, RESTFUL_MODEL_LIST +from utils.constant import BACKEND_LIST, BASE_URL, RESTFUL_MODEL_LIST from utils.toolkit import encode_text, parse_sse_stream from lmdeploy.serve.openai.api_client import APIClient -BASE_HTTP_URL = f'http://{DEFAULT_SERVER}' -DEFAULT_PORT = 23333 -BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)]) - @pytest.mark.parametrize('backend', BACKEND_LIST) @pytest.mark.parametrize('model_name', RESTFUL_MODEL_LIST) diff --git a/autotest/interface/restful/tool_parser/test_tool_call_anthropic_sdk.py b/autotest/interface/restful/tool_parser/test_tool_call_anthropic_sdk.py new file mode 100644 index 0000000000..65b9861e41 --- /dev/null +++ b/autotest/interface/restful/tool_parser/test_tool_call_anthropic_sdk.py @@ -0,0 +1,920 @@ +from __future__ import annotations + +import asyncio +import json +import os +from functools import lru_cache + +import pytest +import requests +from utils.config_utils import get_config +from utils.constant import BASE_URL +from utils.tool_reasoning_definitions import ( + SEARCH_TOOL, + WEATHER_TOOL, + WEATHER_TOOL_SINGLE_LOCATION_ANTHROPIC, + get_async_anthropic_client_and_model, + openai_chat_messages_to_anthropic_kwargs, + openai_function_tool_to_anthropic, +) + +from lmdeploy.serve.openai.api_client import APIClient + +from .conftest import MESSAGES_ASKING_FOR_WEATHER, _apply_marks, _ToolCallTestBase + +ANTHROPIC_VERSION = '2023-06-01' + +_EVAL_IMAGE_TIGER = 'tiger.jpeg' +_TINY_PNG_BASE64 = ( + 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==' +) + +_SOLID_COLOR_VLM_PROMPT = ( + 'The image is a single solid color (one pixel). ' + 'Reply with at most three words: name that color only (e.g. red).' +) + +_REDISH_COLOR_KEYWORDS = ( + 'red', + 'crimson', + 'scarlet', + 'maroon', + 'ruby', + 'vermilion', + '红', + '赤', + '朱', + '绯', +) + + +@lru_cache(maxsize=1) +def _eval_resource_path() -> str: + cfg = get_config() + path = cfg.get('resource_path') + assert isinstance(path, str) and path, 'resource_path must be set in autotest config (e.g. config_h.yml)' + base = path.rstrip('/') + assert os.path.isdir(base), f'resource_path is not a directory: {base!r}' + return base + + +def _eval_resource_file(filename: str) -> str: + p = os.path.join(_eval_resource_path(), filename) + assert os.path.isfile(p), f'missing offline eval resource {filename!r}: {p}' + return p + + +def _model_likely_supports_anthropic_vlm(model_name: str) -> bool: + m = model_name.upper() + return any( + needle in m + for needle in ( + 'VL', + 'INTERNVL', + 'INTERN-VL', + 'QWEN-VL', + 'QWEN2-VL', + 'QWEN2.5-VL', + 'QWEN3.5', + 'MINICPM-V', + 'LLAVA', + 'COGVLM', + 'XCOMPOSER', + 'INTERNXCOMPOSER', + 'INTERNS', + )) + + +def _http_headers() -> dict[str, str]: + return { + 'Content-Type': 'application/json', + 'anthropic-version': ANTHROPIC_VERSION, + } + + +def _parse_anthropic_sse(raw: str) -> list[tuple[str | None, dict]]: + pairs: list[tuple[str | None, dict]] = [] + current_event: str | None = None + for line in raw.splitlines(): + line = line.rstrip('\r') + if line.startswith('event:'): + current_event = line[len('event:'):].strip() + elif line.startswith('data:'): + data_str = line[len('data:'):].strip() + if not data_str: + continue + pairs.append((current_event, json.loads(data_str))) + current_event = None + return pairs + + +def _aggregate_stream_text(events: list[tuple[str | None, dict]]) -> str: + text = '' + for _, obj in events: + if obj.get('type') != 'content_block_delta': + continue + delta = obj.get('delta') or {} + if delta.get('type') == 'text_delta': + text += delta.get('text') or '' + return text + + +def _sse_tool_use_names(raw: str) -> list[str]: + names: list[str] = [] + for _, obj in _parse_anthropic_sse(raw): + if obj.get('type') != 'content_block_start': + continue + cb = obj.get('content_block') or {} + if cb.get('type') == 'tool_use' and cb.get('name'): + names.append(cb['name']) + return names + + +def _assert_redish_color_in_text(assembled: str, *, ctx: str) -> None: + assert len(assembled.strip()) > 0, ( + f'{ctx}: no text_delta content in stream (prefix {assembled[:300]!r}). ' + 'Reasoning models may stream long thinking_delta first; if max_tokens is too low, ' + 'the run can end before any visible text block is emitted.' + ) + al = assembled.lower() + assert any(k in al for k in _REDISH_COLOR_KEYWORDS), ( + f'{ctx}: expected red-ish color in reply: {assembled[:500]!r}', + ) + + +def _assistant_text_from_messages_json(data: dict) -> str: + return ''.join(b.get('text', '') for b in data.get('content', []) if b.get('type') == 'text') + + +def _http_tool_use_blocks(data: dict) -> list[dict]: + return [b for b in data.get('content', []) if isinstance(b, dict) and b.get('type') == 'tool_use'] + + +def _sdk_tool_use_blocks(msg) -> list: + return [b for b in msg.content if getattr(b, 'type', None) == 'tool_use'] + + +def _assert_weather_tool_city_state(inp: dict, *, ctx: str = '') -> None: + """``get_current_weather`` OpenAI-style args (``city`` / ``state``) after + parser mapping.""" + + assert isinstance(inp, dict), (ctx, type(inp)) + city, state = inp.get('city'), inp.get('state') + assert isinstance(city, str) and len(city) > 0, (ctx, inp) + assert isinstance(state, str) and len(state) > 0, (ctx, inp) + + +def _log_append(path: str, text: str) -> None: + try: + with open(path, 'a', encoding='utf-8') as f: + f.write(text + '\n') + except OSError: + pass + + +def _trace_anthropic_http( + log_file: str, + case: str, + *, + url: str, + http_status: int, + request_json: dict | None, + response_text: str, + max_chars: int = 24000, +) -> None: + """Append one JSON line to ``tool_calls/*.log``. + + Same tree as ``test_tool_call_advanced`` (``setup_log_file``). + """ + + rtxt = ( + response_text + if len(response_text) <= max_chars + else response_text[:max_chars] + '\n…[truncated]' + ) + _log_append( + log_file, + json.dumps( + { + 'anthropic_http_trace': case, + 'url': url, + 'status': http_status, + 'request': request_json, + 'response': rtxt, + }, + ensure_ascii=False, + default=str, + ), + ) + + +# --- HTTP: ``tool_parser`` / ``--tool-call-parser`` jobs only ---------------------------- + + +@_apply_marks +class TestAnthropicHttpToolMessages(_ToolCallTestBase): + """``POST /v1/messages`` over HTTP when api_server is launched *with* + ``--tool-call-parser``. + + Adapter block semantics (``lmdeploy.serve.anthropic.adapter``): + + * ``tool_use`` (assistant): prior tool call replay (``id``, ``name``, ``input``). + * ``tool_result`` (user): plain-text result for ``tool_use_id``. + * ``thinking`` / ``redacted_thinking``: replayed reasoning segments (e.g. Claude Code style). + """ + + def test_http_stream_tool_choice_force_named_tool(self, backend, model_case): + model_name = APIClient(BASE_URL).available_models[0] + url = f'{BASE_URL}/v1/messages' + req_json = { + 'model': model_name, + 'max_tokens': 512, + 'temperature': 0, + 'stream': True, + 'tool_choice': { + 'type': 'tool', + 'name': WEATHER_TOOL['function']['name'], + }, + 'messages': [{'role': 'user', 'content': 'What is the weather in Dallas, TX?'}], + 'tools': [ + openai_function_tool_to_anthropic(WEATHER_TOOL), + openai_function_tool_to_anthropic(SEARCH_TOOL), + ], + } + resp = requests.post( + url, + headers=_http_headers(), + json=req_json, + stream=True, + timeout=180, + ) + assert resp.status_code == 200, resp.text + raw = ''.join(chunk.decode('utf-8') for chunk in resp.iter_content(chunk_size=None) if chunk) + _trace_anthropic_http( + self._log_file, + 'test_http_stream_tool_choice_force_named_tool', + url=url, + http_status=resp.status_code, + request_json=req_json, + response_text=raw, + ) + names = _sse_tool_use_names(raw) + assert names, f'no tool_use content_block_start in SSE (first 800 chars): {raw[:800]!r}' + assert WEATHER_TOOL['function']['name'] in names, names + + def test_http_stream_single_location_weather_tool(self, backend, model_case): + model_name = APIClient(BASE_URL).available_models[0] + url = f'{BASE_URL}/v1/messages' + req_json = { + 'model': model_name, + 'max_tokens': 512, + 'temperature': 0, + 'stream': True, + 'messages': [{'role': 'user', 'content': "What's the weather like in New York today?"}], + 'tools': [WEATHER_TOOL_SINGLE_LOCATION_ANTHROPIC], + } + resp = requests.post( + url, + headers=_http_headers(), + json=req_json, + stream=True, + timeout=180, + ) + assert resp.status_code == 200, resp.text + raw = ''.join(chunk.decode('utf-8') for chunk in resp.iter_content(chunk_size=None) if chunk) + _trace_anthropic_http( + self._log_file, + 'test_http_stream_single_location_weather_tool', + url=url, + http_status=resp.status_code, + request_json=req_json, + response_text=raw, + ) + names = _sse_tool_use_names(raw) + assert names, f'no tool_use content_block_start in SSE (first 800 chars): {raw[:800]!r}' + assert 'get_current_weather' in names, names + + def test_http_history_tool_use_and_tool_result_blocks(self, backend, model_case): + model_name = APIClient(BASE_URL).available_models[0] + url = f'{BASE_URL}/v1/messages' + req_json = { + 'model': model_name, + 'max_tokens': 8192, + 'temperature': 0.01, + 'messages': [ + {'role': 'user', 'content': 'What is the weather in San Francisco?'}, + { + 'role': 'assistant', + 'content': [ + { + 'type': 'tool_use', + 'id': 'toolu_hist_01', + 'name': 'get_current_weather', + 'input': {'location': 'San Francisco'}, + }, + ], + }, + { + 'role': 'user', + 'content': [ + { + 'type': 'tool_result', + 'tool_use_id': 'toolu_hist_01', + 'content': '72F and sunny.', + }, + ], + }, + {'role': 'user', 'content': 'In one short phrase, was it warm? Answer yes or no.'}, + ], + } + resp = requests.post( + url, + headers=_http_headers(), + json=req_json, + timeout=120, + ) + assert resp.status_code == 200, resp.text + data = resp.json() + _trace_anthropic_http( + self._log_file, + 'test_http_history_tool_use_and_tool_result_blocks', + url=url, + http_status=resp.status_code, + request_json=req_json, + response_text=json.dumps(data, ensure_ascii=False, default=str), + ) + text = _assistant_text_from_messages_json(data) + assert len(text) > 0, ( + 'expected non-empty assistant text blocks; ' + f'stop_reason={data.get("stop_reason")!r} content={data.get("content")!r}' + ) + + def test_http_history_thinking_block_replay(self, backend, model_case): + model_name = APIClient(BASE_URL).available_models[0] + url = f'{BASE_URL}/v1/messages' + req_json = { + 'model': model_name, + 'max_tokens': 8192, + 'temperature': 0.01, + 'messages': [ + {'role': 'user', 'content': 'Hi.'}, + { + 'role': 'assistant', + 'content': [ + {'type': 'thinking', 'thinking': '(internal scratchpad)'}, + {'type': 'text', 'text': 'Hello — how can I help?'}, + ], + }, + {'role': 'user', 'content': 'Reply with exactly: ACK'}, + ], + } + resp = requests.post( + url, + headers=_http_headers(), + json=req_json, + timeout=120, + ) + assert resp.status_code == 200, resp.text + data = resp.json() + _trace_anthropic_http( + self._log_file, + 'test_http_history_thinking_block_replay', + url=url, + http_status=resp.status_code, + request_json=req_json, + response_text=json.dumps(data, ensure_ascii=False, default=str), + ) + text = _assistant_text_from_messages_json(data) + assert len(text) > 0, ( + 'expected non-empty assistant text blocks; ' + f'stop_reason={data.get("stop_reason")!r} content={data.get("content")!r}' + ) + + def test_http_non_stream_tools_with_user_image_url(self, backend, model_case): + """``tools`` + user ``content`` blocks with ``image`` (VLM matrix only; + same tool contract as text-only).""" + + model_name = APIClient(BASE_URL).available_models[0] + if not _model_likely_supports_anthropic_vlm(model_name): + pytest.skip(f'model {model_name!r} is not treated as vision-capable for this test') + + image_path = _eval_resource_file(_EVAL_IMAGE_TIGER) + url = f'{BASE_URL}/v1/messages' + req_json = { + 'model': model_name, + 'max_tokens': 512, + 'temperature': 0, + 'messages': [{ + 'role': 'user', + 'content': [ + { + 'type': 'text', + 'text': ( + 'What is the weather in Dallas, TX? ' + 'Use the tools; the attached image is unrelated decoration.' + ), + }, + {'type': 'image', 'source': {'type': 'url', 'url': image_path}}, + ], + }], + 'tools': [ + openai_function_tool_to_anthropic(WEATHER_TOOL), + openai_function_tool_to_anthropic(SEARCH_TOOL), + ], + } + resp = requests.post(url, headers=_http_headers(), json=req_json, timeout=180) + assert resp.status_code == 200, resp.text + data = resp.json() + _trace_anthropic_http( + self._log_file, + 'test_http_non_stream_tools_with_user_image_url', + url=url, + http_status=resp.status_code, + request_json=req_json, + response_text=json.dumps(data, ensure_ascii=False, default=str), + ) + assert data.get('stop_reason') == 'tool_use', data + blocks = _http_tool_use_blocks(data) + assert len(blocks) >= 1, data.get('content') + assert blocks[0].get('name') == WEATHER_TOOL['function']['name'] + inp = blocks[0].get('input') + assert isinstance(inp, dict), inp + _assert_weather_tool_city_state(inp, ctx='test_http_non_stream_tools_with_user_image_url') + + def test_http_stream_tools_with_user_image_url(self, backend, model_case): + """Streaming ``tools`` + user image URL (VLM): SSE must still surface + ``tool_use``.""" + + model_name = APIClient(BASE_URL).available_models[0] + if not _model_likely_supports_anthropic_vlm(model_name): + pytest.skip(f'model {model_name!r} is not treated as vision-capable for this test') + + image_path = _eval_resource_file(_EVAL_IMAGE_TIGER) + url = f'{BASE_URL}/v1/messages' + req_json = { + 'model': model_name, + 'max_tokens': 512, + 'temperature': 0, + 'stream': True, + 'messages': [{ + 'role': 'user', + 'content': [ + {'type': 'text', 'text': 'What is the weather in Dallas, TX? Use tools.'}, + {'type': 'image', 'source': {'type': 'url', 'url': image_path}}, + ], + }], + 'tools': [ + openai_function_tool_to_anthropic(WEATHER_TOOL), + openai_function_tool_to_anthropic(SEARCH_TOOL), + ], + } + resp = requests.post(url, headers=_http_headers(), json=req_json, stream=True, timeout=180) + assert resp.status_code == 200, resp.text + raw = ''.join(chunk.decode('utf-8') for chunk in resp.iter_content(chunk_size=None) if chunk) + _trace_anthropic_http( + self._log_file, + 'test_http_stream_tools_with_user_image_url', + url=url, + http_status=resp.status_code, + request_json=req_json, + response_text=raw, + ) + names = _sse_tool_use_names(raw) + assert names, f'no tool_use in SSE (first 800 chars): {raw[:800]!r}' + assert WEATHER_TOOL['function']['name'] in names, names + + def test_http_stream_user_image_base64_solid_color_vlm(self, backend, model_case): + """Align with RESTful ``test_messages_user_image_base64_stream``: SSE + text names the solid color.""" + + model_name = APIClient(BASE_URL).available_models[0] + if not _model_likely_supports_anthropic_vlm(model_name): + pytest.skip(f'model {model_name!r} is not treated as vision-capable for this test') + + url = f'{BASE_URL}/v1/messages' + req_json = { + 'model': model_name, + 'max_tokens': 16384, + 'temperature': 0.01, + 'stream': True, + 'messages': [{ + 'role': 'user', + 'content': [ + {'type': 'text', 'text': _SOLID_COLOR_VLM_PROMPT}, + { + 'type': 'image', + 'source': { + 'type': 'base64', + 'media_type': 'image/png', + 'data': _TINY_PNG_BASE64, + }, + }, + ], + }], + } + resp = requests.post(url, headers=_http_headers(), json=req_json, stream=True, timeout=180) + assert resp.status_code == 200, resp.text + raw = ''.join(chunk.decode('utf-8') for chunk in resp.iter_content(chunk_size=None) if chunk) + _trace_anthropic_http( + self._log_file, + 'test_http_stream_user_image_base64_solid_color_vlm', + url=url, + http_status=resp.status_code, + request_json=req_json, + response_text=raw, + ) + events = _parse_anthropic_sse(raw) + types = [obj.get('type') for _, obj in events] + assert 'message_start' in types + assert 'message_stop' in types + assembled = _aggregate_stream_text(events) + _assert_redish_color_in_text(assembled, ctx='test_http_stream_user_image_base64_solid_color_vlm') + + +def _event_to_dict(event) -> dict: + if hasattr(event, 'model_dump'): + return event.model_dump() + if isinstance(event, dict): + return event + return {'repr': repr(event)} + + +async def _async_weather_tool_single_location_non_stream(log_file: str): + client, model_name = get_async_anthropic_client_and_model() + msg = await client.messages.create( + model=model_name, + max_tokens=1024, + temperature=0, + messages=[{'role': 'user', 'content': "What's the weather like in New York today?"}], + tools=[WEATHER_TOOL_SINGLE_LOCATION_ANTHROPIC], + ) + try: + dumped = msg.model_dump_json() + except Exception: + try: + dumped = json.dumps(msg.model_dump()) + except Exception: + dumped = repr(msg) + _log_append(log_file, dumped) + return msg + + +async def _async_tool_choice_force_named_tool(log_file: str): + client, model_name = get_async_anthropic_client_and_model() + kwargs = openai_chat_messages_to_anthropic_kwargs(MESSAGES_ASKING_FOR_WEATHER) + tools = [ + openai_function_tool_to_anthropic(WEATHER_TOOL), + openai_function_tool_to_anthropic(SEARCH_TOOL), + ] + msg = await client.messages.create( + model=model_name, + max_tokens=1024, + temperature=0, + tools=tools, + tool_choice={'type': 'tool', 'name': WEATHER_TOOL['function']['name']}, + **kwargs, + ) + try: + dumped = msg.model_dump_json() + except Exception: + try: + dumped = json.dumps(msg.model_dump()) + except Exception: + dumped = repr(msg) + _log_append(log_file, dumped) + return msg + + +async def _async_tool_choice_any(log_file: str): + client, model_name = get_async_anthropic_client_and_model() + kwargs = openai_chat_messages_to_anthropic_kwargs(MESSAGES_ASKING_FOR_WEATHER) + tools = [ + openai_function_tool_to_anthropic(WEATHER_TOOL), + openai_function_tool_to_anthropic(SEARCH_TOOL), + ] + msg = await client.messages.create( + model=model_name, + max_tokens=1024, + temperature=0, + tools=tools, + tool_choice={'type': 'any'}, + **kwargs, + ) + try: + dumped = msg.model_dump_json() + except Exception: + try: + dumped = json.dumps(msg.model_dump()) + except Exception: + dumped = repr(msg) + _log_append(log_file, dumped) + return msg + + +async def _async_messages_tool_non_stream_with_user_image(log_file: str, image_url: str): + client, model_name = get_async_anthropic_client_and_model() + tools = [ + openai_function_tool_to_anthropic(WEATHER_TOOL), + openai_function_tool_to_anthropic(SEARCH_TOOL), + ] + msg = await client.messages.create( + model=model_name, + max_tokens=1024, + temperature=0, + tools=tools, + messages=[{ + 'role': 'user', + 'content': [ + { + 'type': 'text', + 'text': ( + 'What is the weather in Dallas, TX? ' + 'Use tools; the image is unrelated context.' + ), + }, + {'type': 'image', 'source': {'type': 'url', 'url': image_url}}, + ], + }], + ) + try: + dumped = msg.model_dump_json() + except Exception: + try: + dumped = json.dumps(msg.model_dump()) + except Exception: + dumped = repr(msg) + _log_append(log_file, dumped) + return msg + + +async def _async_messages_tool_non_stream_with_user_image_base64(log_file: str): + client, model_name = get_async_anthropic_client_and_model() + tools = [ + openai_function_tool_to_anthropic(WEATHER_TOOL), + openai_function_tool_to_anthropic(SEARCH_TOOL), + ] + msg = await client.messages.create( + model=model_name, + max_tokens=1024, + temperature=0, + tools=tools, + messages=[{ + 'role': 'user', + 'content': [ + {'type': 'text', 'text': 'What is the weather in Dallas, TX? Use tools.'}, + { + 'type': 'image', + 'source': { + 'type': 'base64', + 'media_type': 'image/png', + 'data': _TINY_PNG_BASE64, + }, + }, + ], + }], + ) + try: + dumped = msg.model_dump_json() + except Exception: + try: + dumped = json.dumps(msg.model_dump()) + except Exception: + dumped = repr(msg) + _log_append(log_file, dumped) + return msg + + +async def _async_messages_tool_non_stream(log_file: str): + client, model_name = get_async_anthropic_client_and_model() + kwargs = openai_chat_messages_to_anthropic_kwargs(MESSAGES_ASKING_FOR_WEATHER) + tools = [ + openai_function_tool_to_anthropic(WEATHER_TOOL), + openai_function_tool_to_anthropic(SEARCH_TOOL), + ] + msg = await client.messages.create( + model=model_name, + max_tokens=1024, + temperature=0, + tools=tools, + **kwargs, + ) + try: + dumped = msg.model_dump_json() + except Exception: + try: + dumped = json.dumps(msg.model_dump()) + except Exception: + dumped = repr(msg) + _log_append(log_file, dumped) + return msg + + +async def _async_messages_tool_stream(log_file: str): + client, model_name = get_async_anthropic_client_and_model() + kwargs = openai_chat_messages_to_anthropic_kwargs(MESSAGES_ASKING_FOR_WEATHER) + tools = [ + openai_function_tool_to_anthropic(WEATHER_TOOL), + openai_function_tool_to_anthropic(SEARCH_TOOL), + ] + stream = await client.messages.create( + model=model_name, + max_tokens=1024, + temperature=0, + tools=tools, + stream=True, + **kwargs, + ) + events = [] + async for event in stream: + events.append(event) + _log_append(log_file, f'anthropic_stream_event_count={len(events)}') + + final_msg = None + getter = getattr(stream, 'get_final_message', None) + if callable(getter): + try: + final_msg = await getter() + except Exception as err: # noqa: BLE001 — SDK / server variance + _log_append(log_file, f'get_final_message_failed: {err!r}') + + if final_msg is not None: + try: + dumped = final_msg.model_dump_json() + except Exception: + try: + dumped = json.dumps(final_msg.model_dump()) + except Exception: + dumped = repr(final_msg) + _log_append(log_file, dumped) + return 'final', final_msg, events + + blob = json.dumps([_event_to_dict(e) for e in events], default=str) + _log_append(log_file, blob[:16000]) + return 'raw', blob, events + + +async def _async_vlm_base64_solid_color_stream(log_file: str) -> tuple[str, str]: + """Returns ``(kind, text_or_blob)`` where ``kind`` is ``'final'`` or + ``'raw'``.""" + + client, model_name = get_async_anthropic_client_and_model() + stream = await client.messages.create( + model=model_name, + max_tokens=16384, + temperature=0.01, + stream=True, + messages=[{ + 'role': 'user', + 'content': [ + {'type': 'text', 'text': _SOLID_COLOR_VLM_PROMPT}, + { + 'type': 'image', + 'source': { + 'type': 'base64', + 'media_type': 'image/png', + 'data': _TINY_PNG_BASE64, + }, + }, + ], + }], + ) + events: list = [] + async for event in stream: + events.append(event) + _log_append(log_file, f'vlm_color_sdk_stream_events={len(events)}') + + final_msg = None + getter = getattr(stream, 'get_final_message', None) + if callable(getter): + try: + final_msg = await getter() + except Exception as err: # noqa: BLE001 + _log_append(log_file, f'get_final_message_failed: {err!r}') + + if final_msg is not None: + text = ''.join( + (getattr(b, 'text', None) or '') + for b in final_msg.content + if getattr(b, 'type', None) == 'text' + ) + try: + _log_append(log_file, final_msg.model_dump_json()) + except Exception: + _log_append(log_file, repr(final_msg)) + return 'final', text + + blob = json.dumps([_event_to_dict(e) for e in events], default=str) + _log_append(log_file, blob[:16000]) + return 'raw', blob + + +@_apply_marks +class TestAnthropicSdkToolCall(_ToolCallTestBase): + """Anthropic Messages + tools via official async SDK (end-to-end + integration).""" + + @pytest.fixture(autouse=True) + def _require_anthropic_sdk(self): + pytest.importorskip('anthropic') + + def test_tool_non_stream_weather(self, backend, model_case): + msg = asyncio.run(_async_messages_tool_non_stream(self._log_file)) + + assert msg.stop_reason == 'tool_use' + assert msg.role == 'assistant' + tool_blocks = _sdk_tool_use_blocks(msg) + assert len(tool_blocks) >= 1 + block = tool_blocks[0] + assert block.name == WEATHER_TOOL['function']['name'] + + _assert_weather_tool_city_state(block.input, ctx='test_tool_non_stream_weather') + + assert msg.usage is not None + assert msg.usage.input_tokens > 0 + assert msg.usage.output_tokens > 0 + + def test_tool_stream_weather(self, backend, model_case): + kind, payload, events = asyncio.run(_async_messages_tool_stream(self._log_file)) + + assert len(events) > 0, 'expected at least one stream event' + + if kind == 'final': + assert payload.stop_reason == 'tool_use' + tool_blocks = _sdk_tool_use_blocks(payload) + assert len(tool_blocks) >= 1 + assert tool_blocks[0].name == WEATHER_TOOL['function']['name'] + _assert_weather_tool_city_state(tool_blocks[0].input, ctx='test_tool_stream_weather/final') + return + + blob = payload + assert WEATHER_TOOL['function']['name'] in blob + assert 'tool_use' in blob + assert 'Dallas' in blob or 'dallas' in blob.lower() + + def test_tool_non_stream_weather_single_location_schema(self, backend, model_case): + msg = asyncio.run(_async_weather_tool_single_location_non_stream(self._log_file)) + assert msg.stop_reason == 'tool_use' + tool_blocks = _sdk_tool_use_blocks(msg) + assert len(tool_blocks) >= 1 + assert tool_blocks[0].name == 'get_current_weather' + inp = tool_blocks[0].input + assert isinstance(inp, dict) + loc = inp.get('location', '') + assert isinstance(loc, str) and len(loc) > 0 + loc_low = loc.lower() + assert 'new york' in loc_low or 'nyc' in loc_low + + def test_tool_non_stream_tool_choice_force_named(self, backend, model_case): + msg = asyncio.run(_async_tool_choice_force_named_tool(self._log_file)) + assert msg.stop_reason == 'tool_use' + tool_blocks = _sdk_tool_use_blocks(msg) + assert len(tool_blocks) >= 1 + assert tool_blocks[0].name == WEATHER_TOOL['function']['name'] + + def test_tool_non_stream_tool_choice_any(self, backend, model_case): + msg = asyncio.run(_async_tool_choice_any(self._log_file)) + assert msg.stop_reason == 'tool_use' + tool_blocks = _sdk_tool_use_blocks(msg) + assert len(tool_blocks) >= 1 + names = {b.name for b in tool_blocks} + assert WEATHER_TOOL['function']['name'] in names, names + + def test_tool_non_stream_weather_with_user_image_url(self, backend, model_case): + model_name = APIClient(BASE_URL).available_models[0] + if not _model_likely_supports_anthropic_vlm(model_name): + pytest.skip(f'model {model_name!r} is not treated as vision-capable for this test') + + image_path = _eval_resource_file(_EVAL_IMAGE_TIGER) + msg = asyncio.run(_async_messages_tool_non_stream_with_user_image(self._log_file, image_path)) + assert msg.stop_reason == 'tool_use' + tool_blocks = _sdk_tool_use_blocks(msg) + assert len(tool_blocks) >= 1 + assert tool_blocks[0].name == WEATHER_TOOL['function']['name'] + _assert_weather_tool_city_state(tool_blocks[0].input, ctx='test_tool_non_stream_weather_with_user_image_url') + + def test_tool_non_stream_weather_with_user_image_base64(self, backend, model_case): + model_name = APIClient(BASE_URL).available_models[0] + if not _model_likely_supports_anthropic_vlm(model_name): + pytest.skip(f'model {model_name!r} is not treated as vision-capable for this test') + + msg = asyncio.run(_async_messages_tool_non_stream_with_user_image_base64(self._log_file)) + assert msg.stop_reason == 'tool_use' + tool_blocks = _sdk_tool_use_blocks(msg) + assert len(tool_blocks) >= 1 + assert tool_blocks[0].name == WEATHER_TOOL['function']['name'] + _assert_weather_tool_city_state( + tool_blocks[0].input, + ctx='test_tool_non_stream_weather_with_user_image_base64', + ) + + def test_sdk_stream_vlm_user_image_base64_solid_color(self, backend, model_case): + """SDK streaming + 1×1 red PNG: final text (or raw event blob) should + mention a red-ish color.""" + + model_name = APIClient(BASE_URL).available_models[0] + if not _model_likely_supports_anthropic_vlm(model_name): + pytest.skip(f'model {model_name!r} is not treated as vision-capable for this test') + + kind, payload = asyncio.run(_async_vlm_base64_solid_color_stream(self._log_file)) + ctx = f'test_sdk_stream_vlm_user_image_base64_solid_color/{kind}' + _assert_redish_color_in_text(payload, ctx=ctx) diff --git a/autotest/utils/constant.py b/autotest/utils/constant.py index 87e2759395..fbc389d9c1 100644 --- a/autotest/utils/constant.py +++ b/autotest/utils/constant.py @@ -4,8 +4,9 @@ DEFAULT_SERVER = os.getenv('MASTER_ADDR', '127.0.0.1') PROXY_PORT = 8000 -# Scalar presets for export/normalize fallback. Model-specific sampling (reasoning-effort, -# top-k, chat-template-kwargs, …) live in per-model ``autotest/configs/**/gen_config``. +BASE_HTTP_URL = f'http://{DEFAULT_SERVER}' +BASE_URL = f'{BASE_HTTP_URL}:{os.getenv("LMDEPLOY_PORT", str(DEFAULT_PORT))}' + EVAL_CONFIGS = { 'default': { 'query_per_second': 4, @@ -236,6 +237,7 @@ def _deps_profile_is_legacy() -> bool: 'unsloth/gpt-oss-20b-BF16', 'Qwen/Qwen2.5-7B-Instruct', 'internlm/Intern-S1-Pro-FP8', + 'internlm/interns2-preview-0509', ] TOOL_REASONING_MODEL_LIST_LEGACY = [ diff --git a/autotest/utils/tool_reasoning_definitions.py b/autotest/utils/tool_reasoning_definitions.py index df4fa3ba04..a6a886605b 100644 --- a/autotest/utils/tool_reasoning_definitions.py +++ b/autotest/utils/tool_reasoning_definitions.py @@ -3,11 +3,7 @@ import re from openai import OpenAI -from utils.constant import DEFAULT_PORT - -BASE_HTTP_URL = f"http://{os.getenv('MASTER_ADDR', 'localhost')}" -PORT = os.getenv('LMDEPLOY_PORT', str(DEFAULT_PORT)) -BASE_URL = f'{BASE_HTTP_URL}:{PORT}' +from utils.constant import BASE_URL #: Think-tag delimiters used by DeepSeek-R1 and QwenQwQ parsers THINK_START_TOKEN = '' @@ -61,6 +57,22 @@ }, } +# Anthropic ``tools[]`` entry: single ``location`` argument (Messages API style). +WEATHER_TOOL_SINGLE_LOCATION_ANTHROPIC = { + 'name': 'get_current_weather', + 'description': 'Useful for querying the weather in a specified city.', + 'input_schema': { + 'type': 'object', + 'properties': { + 'location': { + 'type': 'string', + 'description': 'City or region, for example: New York, London, Tokyo, etc.', + }, + }, + 'required': ['location'], + }, +} + CALCULATOR_TOOL = { 'type': 'function', 'function': { @@ -191,6 +203,64 @@ def get_client_and_model(base_url=None): return client, model_name +def openai_function_tool_to_anthropic(openai_style_tool: dict) -> dict: + """Convert OpenAI ``{'type':'function','function':{...}}`` to Anthropic + ``tools[]`` item.""" + + fn = openai_style_tool['function'] + return { + 'name': fn['name'], + 'description': fn.get('description') or '', + 'input_schema': fn['parameters'], + } + + +def openai_chat_messages_to_anthropic_kwargs(messages: list[dict]) -> dict: + """Split OpenAI-style *messages* into Anthropic ``system`` plus + ``messages`` kwargs.""" + + system_chunks: list[str] = [] + out: list[dict] = [] + for m in messages: + role = m['role'] + content = m['content'] + if role == 'system': + if not isinstance(content, str): + raise TypeError('Anthropic path expects string system message content.') + system_chunks.append(content) + elif role in ('user', 'assistant'): + out.append({'role': role, 'content': content}) + else: + raise ValueError(f'Unsupported message role for Anthropic: {role!r}') + kwargs: dict = {'messages': out} + if system_chunks: + kwargs['system'] = '\n\n'.join(system_chunks) + return kwargs + + +def get_async_anthropic_client_and_model(base_url: str | None = None): + """Return ``(AsyncAnthropic, model_name)`` for LMDeploy (Anthropic routes + on server root).""" + + import anthropic + + from lmdeploy.serve.openai.api_client import get_model_list + + url = base_url or BASE_URL + model_names = get_model_list(f'{url}/v1/models') + if not model_names: + raise RuntimeError(f'No models returned from {url}/v1/models') + model_name = model_names[0] + client = anthropic.AsyncAnthropic( + api_key=os.getenv('ANTHROPIC_API_KEY', 'YOUR_API_KEY'), + base_url=url, + max_retries=0, + timeout=600.0, + default_headers={'anthropic-version': '2023-06-01'}, + ) + return client, model_name + + # -- Logging / client helpers ------------------------------------------------ @@ -388,7 +458,9 @@ def collect_stream_reasoning(stream): finish_reason – last non-None finish_reason finish_reason_count – how many chunks carried a non-None finish_reason role – first non-None role value - role_count – how many chunks carried a non-None role + role_count – number of *distinct* role values in stream order; + consecutive chunks repeating the same ``delta.role`` + count once (some LMDeploy backends resend ``role`` every chunk) chunk_count – total number of chunks received reasoning_chunks – number of chunks containing reasoning content_chunks – number of chunks containing content @@ -406,6 +478,7 @@ def collect_stream_reasoning(stream): 'content_chunks': 0, } + last_distinct_role = None for chunk in stream: result['chunk_count'] += 1 if not chunk.choices: @@ -418,8 +491,11 @@ def collect_stream_reasoning(stream): delta = choice.delta if delta.role: - result['role'] = delta.role - result['role_count'] += 1 + if result['role'] is None: + result['role'] = delta.role + if last_distinct_role != delta.role: + result['role_count'] += 1 + last_distinct_role = delta.role # -- reasoning_content (lmdeploy extension field) ------------------- rc = getattr(delta, 'reasoning_content', None) diff --git a/requirements/test.txt b/requirements/test.txt index 6061aaafde..3580dfc179 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,4 +1,5 @@ allure-pytest +anthropic>=0.39.0 coverage jsonschema matplotlib