diff --git a/autotest/interface/restful/test_restful_anthropic_sdk_messages.py b/autotest/interface/restful/test_restful_anthropic_sdk_messages.py
new file mode 100644
index 0000000000..e1185a1cb9
--- /dev/null
+++ b/autotest/interface/restful/test_restful_anthropic_sdk_messages.py
@@ -0,0 +1,132 @@
+from __future__ import annotations
+
+import asyncio
+import json
+
+import pytest
+
+pytest.importorskip('anthropic')
+
+from utils.constant import BACKEND_LIST, RESTFUL_MODEL_LIST
+from utils.tool_reasoning_definitions import get_async_anthropic_client_and_model
+
+
+def _text_from_message(msg) -> str:
+    parts: list[str] = []
+    for block in getattr(msg, 'content', []) or []:
+        if getattr(block, 'type', None) == 'text':
+            parts.append(getattr(block, 'text', '') or '')
+    return ''.join(parts)
+
+
+def _first_message_start_usage(events: list) -> tuple[int, int] | None:
+    for ev in events:
+        if getattr(ev, 'type', None) != 'message_start':
+            continue
+        msg = getattr(ev, 'message', None)
+        if msg is None:
+            continue
+        u = getattr(msg, 'usage', None)
+        if u is None:
+            return None
+        return getattr(u, 'input_tokens', 0), getattr(u, 'output_tokens', 0)
+    return None
+
+
+async def _sdk_simple_non_stream() -> object:
+    client, model_name = get_async_anthropic_client_and_model()
+    return await client.messages.create(
+        model=model_name,
+        max_tokens=1024,
+        temperature=0.01,
+        messages=[{'role': 'user', 'content': 'how are you!'}],
+    )
+
+
+async def _sdk_system_non_stream() -> object:
+    client, model_name = get_async_anthropic_client_and_model()
+    return await client.messages.create(
+        model=model_name,
+        max_tokens=1024,
+        temperature=0.01,
+        system='you are a helpful assistant',
+        messages=[{'role': 'user', 'content': 'how are you!'}],
+    )
+
+
+async def _sdk_stream_events_and_final() -> tuple[list, object | None]:
+    client, model_name = get_async_anthropic_client_and_model()
+    stream = await client.messages.create(
+        model=model_name,
+        max_tokens=1024,
+        temperature=0.01,
+        messages=[{'role': 'user', 'content': 'how are you!'}],
+        stream=True,
+    )
+    events: list = []
+    async for event in stream:
+        events.append(event)
+    final_msg = None
+    getter = getattr(stream, 'get_final_message', None)
+    if callable(getter):
+        try:
+            final_msg = await getter()
+        except Exception:
+            final_msg = None
+    return events, final_msg
+
+
+@pytest.mark.order(8)
+@pytest.mark.flaky(reruns=2)
+@pytest.mark.parametrize('backend', BACKEND_LIST)
+@pytest.mark.parametrize('model_case', RESTFUL_MODEL_LIST)
+class TestRestfulAnthropicSdkMessages:
+    """Covers simple / system / streaming Messages (LMDeploy streams zero usage
+    on ``message_start``)."""
+
+    def test_sdk_simple_messages_non_stream(self, backend, model_case):
+        msg = asyncio.run(_sdk_simple_non_stream())
+        assert getattr(msg, 'role', None) == 'assistant'
+        assert getattr(msg, 'stop_reason', None) in ('end_turn', 'max_tokens')
+        text = _text_from_message(msg)
+        assert len(text) > 0
+        usage = getattr(msg, 'usage', None)
+        assert usage is not None
+        assert getattr(usage, 'input_tokens', 0) > 0
+        assert getattr(usage, 'output_tokens', 0) > 0
+
+    def test_sdk_system_message_non_stream(self, backend, model_case):
+        msg = asyncio.run(_sdk_system_non_stream())
+        assert getattr(msg, 'role', None) == 'assistant'
+        assert getattr(msg, 'stop_reason', None) in ('end_turn', 'max_tokens')
+        text = _text_from_message(msg)
+        assert len(text) > 0
+
+    def test_sdk_streaming(self, backend, model_case):
+        events, final_msg = asyncio.run(_sdk_stream_events_and_final())
+        assert len(events) > 0
+
+        usage0 = _first_message_start_usage(events)
+        assert usage0 is not None, 'message_start with usage not found in stream'
+        in0, out0 = usage0
+        assert out0 == 0, 'LMDeploy streams output_tokens=0 until message_delta'
+        assert in0 == 0, 'LMDeploy streams input_tokens=0 on message_start (final usage appears in message_delta)'
+
+        if final_msg is not None:
+            assert getattr(final_msg, 'role', None) == 'assistant'
+            u = getattr(final_msg, 'usage', None)
+            assert u is not None
+            assert getattr(u, 'input_tokens', 0) > 5
+            assert getattr(u, 'output_tokens', 0) > 0
+            text = _text_from_message(final_msg)
+            assert len(text) > 0
+            return
+
+        serialised = []
+        for e in events:
+            if hasattr(e, 'model_dump'):
+                serialised.append(e.model_dump())
+            else:
+                serialised.append({'repr': repr(e)})
+        blob = json.dumps(serialised, default=str)
+        assert 'message_delta' in blob or 'output_tokens' in blob
diff --git a/autotest/interface/restful/test_restful_anthropic_v1.py b/autotest/interface/restful/test_restful_anthropic_v1.py
new file mode 100644
index 0000000000..bcaa1ec276
--- /dev/null
+++ b/autotest/interface/restful/test_restful_anthropic_v1.py
@@ -0,0 +1,1002 @@
+from __future__ import annotations
+
+import json
+import os
+from functools import lru_cache
+
+import pytest
+import requests
+from utils.config_utils import get_config
+from utils.constant import BACKEND_LIST, BASE_URL, RESTFUL_MODEL_LIST
+from utils.tool_reasoning_definitions import WEATHER_TOOL, openai_function_tool_to_anthropic
+
+from lmdeploy.serve.openai.api_client import APIClient
+
+ANTHROPIC_VERSION = '2023-06-01'
+
+_MESSAGES_URL = f'{BASE_URL}/v1/messages'
+_COUNT_TOKENS_URL = f'{BASE_URL}/v1/messages/count_tokens'
+
+_EVAL_IMAGE_TIGER = 'tiger.jpeg'
+
+# 1×1 PNG (red), for ``source: {type: base64}`` smoke without relying on ``resource_path`` files.
+_TINY_PNG_BASE64 = (
+    'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=='
+)
+
+
+@pytest.fixture(scope='class')
+def deployed_model_name() -> str:
+    """Single model id exposed by the RESTFUL api_server."""
+
+    return APIClient(BASE_URL).available_models[0]
+
+
+@lru_cache(maxsize=1)
+def _eval_resource_path() -> str:
+    """``resource_path`` from active autotest YAML (``TEST_ENV`` →
+    ``autotest/config_{tag}.yml``)."""
+
+    cfg = get_config()
+    path = cfg.get('resource_path')
+    assert isinstance(path, str) and path, 'resource_path must be set in autotest config (e.g. config_h.yml)'
+    base = path.rstrip('/')
+    assert os.path.isdir(base), f'resource_path is not a directory: {base!r}'
+    return base
+
+
+def _eval_resource_file(filename: str) -> str:
+    p = os.path.join(_eval_resource_path(), filename)
+    assert os.path.isfile(p), f'missing offline eval resource {filename!r}: {p}'
+    return p
+
+
+def _anthropic_headers() -> dict[str, str]:
+    return {
+        'Content-Type': 'application/json',
+        'anthropic-version': ANTHROPIC_VERSION,
+    }
+
+
+def _assistant_text_from_message_payload(data: dict) -> str:
+    return ''.join(b.get('text', '') for b in data.get('content', []) if b.get('type') == 'text')
+
+
+def _model_likely_supports_anthropic_vlm(model_name: str) -> bool:
+    """Heuristic for RESTFUL matrix: skip image HTTP when the served id is clearly text-only."""
+
+    m = model_name.upper()
+    return any(
+        needle in m
+        for needle in (
+            'VL',
+            'INTERNVL',
+            'INTERN-VL',
+            'QWEN-VL',
+            'QWEN2-VL',
+            'QWEN2.5-VL',
+            'QWEN3.5',
+            'MINICPM-V',
+            'LLAVA',
+            'COGVLM',
+            'XCOMPOSER',
+            'INTERNXCOMPOSER',
+            'INTERNS',
+        ))
+
+
+def _parse_anthropic_sse(raw: str) -> list[tuple[str | None, dict]]:
+    """Parse Anthropic-style SSE (``event:`` / ``data:`` lines) into
+    (event_name, json_payload) pairs."""
+
+    pairs: list[tuple[str | None, dict]] = []
+    current_event: str | None = None
+    for line in raw.splitlines():
+        line = line.rstrip('\r')
+        if line.startswith('event:'):
+            current_event = line[len('event:'):].strip()
+        elif line.startswith('data:'):
+            data_str = line[len('data:'):].strip()
+            if not data_str:
+                continue
+            pairs.append((current_event, json.loads(data_str)))
+            current_event = None
+    return pairs
+
+
+def _aggregate_stream_text(events: list[tuple[str | None, dict]]) -> str:
+    text = ''
+    for _, obj in events:
+        if obj.get('type') != 'content_block_delta':
+            continue
+        delta = obj.get('delta') or {}
+        if delta.get('type') == 'text_delta':
+            text += delta.get('text') or ''
+    return text
+
+
+def _assert_count_tokens_json(data: dict) -> int:
+    assert set(data.keys()) == {'input_tokens'}, data
+    n = data['input_tokens']
+    assert isinstance(n, int) and n > 0, n
+    return n
+
+
+def _assert_success_message_json(data: dict, *, model: str) -> dict:
+    """Non-stream ``/v1/messages`` success body: Anthropic message + usage
+    invariants."""
+
+    assert data.get('type') == 'message', data
+    assert data.get('role') == 'assistant'
+    assert data.get('model') == model
+    mid = data.get('id')
+    assert isinstance(mid, str) and mid.startswith('msg_'), mid
+    content = data.get('content')
+    assert isinstance(content, list) and len(content) >= 1, content
+    usage = data.get('usage')
+    assert isinstance(usage, dict), data
+    assert 'input_tokens' in usage and 'output_tokens' in usage, usage
+    assert isinstance(usage['input_tokens'], int) and usage['input_tokens'] >= 0
+    assert isinstance(usage['output_tokens'], int) and usage['output_tokens'] > 0
+    assert data.get('stop_reason') in ('end_turn', 'max_tokens', 'stop_sequence', 'tool_use', None)
+    return data
+
+
+def _assert_anthropic_error_envelope(body: dict) -> None:
+    assert body.get('type') == 'error', body
+    err = body.get('error')
+    assert isinstance(err, dict) and 'type' in err and 'message' in err, err
+
+
+def _assert_fastapi_validation_error(resp: requests.Response) -> dict:
+    """FastAPI ``RequestValidationError`` payload (not Anthropic ``type:
+
+    error``).
+    """
+
+    assert resp.status_code == 422, resp.text
+    body = resp.json()
+    assert isinstance(body.get('detail'), list), body
+    return body
+
+
+def _assert_tool_parser_required_message(resp: requests.Response) -> None:
+    assert resp.status_code == 400, resp.text
+    body = resp.json()
+    _assert_anthropic_error_envelope(body)
+    assert body['error']['type'] == 'invalid_request_error'
+    err = body['error']['message']
+    assert '--tool-call-parser' in err
+
+
+@pytest.mark.order(8)
+@pytest.mark.flaky(reruns=2)
+@pytest.mark.parametrize('backend', BACKEND_LIST)
+@pytest.mark.parametrize('model_case', RESTFUL_MODEL_LIST)
+class TestRestfulAnthropicV1:
+
+    def test_list_models(self, backend, model_case, deployed_model_name: str):
+        url = f'{BASE_URL}/anthropic/v1/models'
+        resp = requests.get(url, timeout=30)
+        assert resp.status_code == 200, resp.text
+        data = resp.json()
+        assert isinstance(data.get('has_more'), bool)
+        assert 'data' in data
+        assert isinstance(data['data'], list)
+        for m in data['data']:
+            assert isinstance(m, dict)
+            assert m.get('type') == 'model'
+            assert isinstance(m.get('id'), str) and len(m['id']) > 0
+            assert isinstance(m.get('display_name'), str)
+        ids = [m['id'] for m in data['data']]
+        assert deployed_model_name in ids, (deployed_model_name, ids)
+        if ids:
+            assert data.get('first_id') == ids[0]
+            assert data.get('last_id') == ids[-1]
+
+    @pytest.mark.parametrize(
+        'endpoint_url,body_without_model',
+        [
+            pytest.param(
+                _MESSAGES_URL,
+                {'max_tokens': 8, 'messages': [{'role': 'user', 'content': 'Say hi in one word.'}]},
+                id='messages',
+            ),
+            pytest.param(
+                _COUNT_TOKENS_URL,
+                {'messages': [{'role': 'user', 'content': 'Hi'}]},
+                id='count_tokens',
+            ),
+        ],
+    )
+    def test_messages_and_count_tokens_missing_version_header(
+            self, backend, model_case, deployed_model_name: str, endpoint_url: str, body_without_model: dict):
+        resp = requests.post(
+            endpoint_url,
+            headers={'Content-Type': 'application/json'},
+            json={'model': deployed_model_name, **body_without_model},
+            timeout=60,
+        )
+        assert resp.status_code == 400, resp.text
+        body = resp.json()
+        _assert_anthropic_error_envelope(body)
+        assert body['error']['type'] == 'invalid_request_error'
+        assert body['error']['message'] == 'Missing required header: anthropic-version'
+
+    @pytest.mark.parametrize(
+        'endpoint_url,request_json',
+        [
+            pytest.param(
+                _MESSAGES_URL,
+                {
+                    'model': 'definitely-not-a-deployed-model-name',
+                    'max_tokens': 8,
+                    'messages': [{'role': 'user', 'content': 'Hi'}],
+                },
+                id='messages',
+            ),
+            pytest.param(
+                _COUNT_TOKENS_URL,
+                {
+                    'model': 'definitely-not-a-deployed-model-name',
+                    'messages': [{'role': 'user', 'content': 'Hi'}],
+                },
+                id='count_tokens',
+            ),
+        ],
+    )
+    def test_messages_and_count_tokens_unknown_model(
+            self, backend, model_case, endpoint_url: str, request_json: dict):
+        resp = requests.post(
+            endpoint_url,
+            headers=_anthropic_headers(),
+            json=request_json,
+            timeout=30,
+        )
+        assert resp.status_code == 404, resp.text
+        body = resp.json()
+        _assert_anthropic_error_envelope(body)
+        assert body['error']['type'] == 'not_found_error'
+        assert 'does not exist' in body['error']['message']
+
+    def test_messages_with_system(self, backend, model_case, deployed_model_name: str):
+        """Anthropic ``system`` field (Messages API).
+
+        Some chat models prefix visible chain-of-thought before the final reply; keep
+        ``max_tokens`` high enough that the instructed answer still fits the budget.
+        """
+
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 2048,
+                'temperature': 0.01,
+                'system': 'You reply only with the single word: Acknowledged.',
+                'messages': [{'role': 'user', 'content': 'What is your instruction?'}],
+            },
+            timeout=120,
+        )
+        assert resp.status_code == 200, resp.text
+        data = _assert_success_message_json(resp.json(), model=deployed_model_name)
+        text = _assistant_text_from_message_payload(data)
+        assert 'acknowledged' in text.lower(), text[:500]
+
+    def test_messages_user_content_as_blocks(self, backend, model_case, deployed_model_name: str):
+        """``messages[].content`` as a list of ``{type: text}`` blocks
+        (Anthropic-native shape)."""
+
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 24,
+                'temperature': 0.01,
+                'messages': [{
+                    'role': 'user',
+                    'content': [
+                        {'type': 'text', 'text': 'Answer with one word: color of grass? '},
+                        {'type': 'text', 'text': 'Just the color name.'},
+                    ],
+                }],
+            },
+            timeout=120,
+        )
+        assert resp.status_code == 200, resp.text
+        data = _assert_success_message_json(resp.json(), model=deployed_model_name)
+        text = _assistant_text_from_message_payload(data)
+        tl = text.lower()
+        assert any(
+            k in tl
+            for k in ('green', 'grass', '青', '綠', '绿')), f'expected color-of-grass hint in reply: {text[:500]!r}'
+
+    def test_messages_system_as_content_blocks(self, backend, model_case, deployed_model_name: str):
+        """``system`` as Anthropic block list (concatenated server-side for the
+        chat template)."""
+
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 256,
+                'temperature': 0.01,
+                'system': [
+                    {'type': 'text', 'text': 'You reply only with the single word: Confirmed.'},
+                    {'type': 'text', 'text': ' No extra words.'},
+                ],
+                'messages': [{'role': 'user', 'content': 'Acknowledge with your required reply.'}],
+            },
+            timeout=120,
+        )
+        assert resp.status_code == 200, resp.text
+        data = _assert_success_message_json(resp.json(), model=deployed_model_name)
+        text = _assistant_text_from_message_payload(data).lower()
+        assert 'confirmed' in text, text[:500]
+
+    def test_messages_history_tool_use_and_tool_result_without_request_tools(
+            self, backend, model_case, deployed_model_name: str):
+        """Replay ``tool_use`` / ``tool_result`` blocks without top-level
+        ``tools`` (parserless RESTFUL)."""
+
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 2048,
+                'temperature': 0.01,
+                'messages': [
+                    {'role': 'user', 'content': 'What is the weather in San Francisco?'},
+                    {
+                        'role': 'assistant',
+                        'content': [
+                            {
+                                'type': 'tool_use',
+                                'id': 'toolu_hist_restful_01',
+                                'name': 'get_current_weather',
+                                'input': {'location': 'San Francisco'},
+                            },
+                        ],
+                    },
+                    {
+                        'role': 'user',
+                        'content': [
+                            {
+                                'type': 'tool_result',
+                                'tool_use_id': 'toolu_hist_restful_01',
+                                'content': '72F and sunny.',
+                            },
+                        ],
+                    },
+                    {
+                        'role': 'user',
+                        'content': 'In one short phrase, was it warm? Answer yes or no.',
+                    },
+                ],
+            },
+            timeout=120,
+        )
+        assert resp.status_code == 200, resp.text
+        data = _assert_success_message_json(resp.json(), model=deployed_model_name)
+        text = _assistant_text_from_message_payload(data)
+        tl = text.lower()
+        assert 'yes' in tl or '是' in text or '温暖' in text or '暖和' in text, (
+            'expected warm/yes style answer given 72F sunny tool result; '
+            f'stop_reason={data.get("stop_reason")!r} text={text[:500]!r}'
+        )
+
+    def test_messages_history_thinking_and_text_blocks(self, backend, model_case, deployed_model_name: str):
+        """Assistant history with ``thinking`` + ``text`` (reasoning replay
+        path)."""
+
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 2048,
+                'temperature': 0.01,
+                'messages': [
+                    {'role': 'user', 'content': 'Hi.'},
+                    {
+                        'role': 'assistant',
+                        'content': [
+                            {'type': 'thinking', 'thinking': '(internal scratchpad)'},
+                            {'type': 'text', 'text': 'Hello — how can I help?'},
+                        ],
+                    },
+                    {'role': 'user', 'content': 'Reply with exactly: ACK'},
+                ],
+            },
+            timeout=120,
+        )
+        assert resp.status_code == 200, resp.text
+        data = _assert_success_message_json(resp.json(), model=deployed_model_name)
+        text = _assistant_text_from_message_payload(data)
+        assert 'ack' in text.lower(), (
+            'expected literal ACK from final user instruction; '
+            f'stop_reason={data.get("stop_reason")!r} text={text[:500]!r}'
+        )
+
+    def test_messages_user_image_file_from_config_resource(self, backend, model_case, deployed_model_name: str):
+        """``user`` message with Anthropic ``image`` + local ``resource_path``
+        file (``config_h.yml``)."""
+
+        if not _model_likely_supports_anthropic_vlm(deployed_model_name):
+            pytest.skip(f'model {deployed_model_name!r} is not treated as vision-capable for this test')
+
+        image_path = _eval_resource_file(_EVAL_IMAGE_TIGER)
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 128,
+                'temperature': 0.01,
+                'messages': [{
+                    'role': 'user',
+                    'content': [
+                        {'type': 'text', 'text': 'In one word, name the animal in the image.'},
+                        {
+                            'type': 'image',
+                            'source': {'type': 'url', 'url': image_path},
+                        },
+                    ],
+                }],
+            },
+            timeout=180,
+        )
+        assert resp.status_code == 200, resp.text
+        data = _assert_success_message_json(resp.json(), model=deployed_model_name)
+        text = _assistant_text_from_message_payload(data).lower()
+        assert any(
+            k in text
+            for k in ('tiger', 'cat', 'big cat', '虎', '猫', 'feline')), text[:800]
+
+    def test_count_tokens_user_image_block_exceeds_text_only(self, backend, model_case, deployed_model_name: str):
+        """``count_tokens`` flattens ``image`` blocks in
+        ``to_lmdeploy_messages``; count should exceed text-only."""
+
+        image_path = _eval_resource_file(_EVAL_IMAGE_TIGER)
+        base = {
+            'model': deployed_model_name,
+            'messages': [{
+                'role': 'user',
+                'content': [{'type': 'text', 'text': 'Describe briefly.'}],
+            }],
+        }
+        r0 = requests.post(_COUNT_TOKENS_URL, headers=_anthropic_headers(), json=base, timeout=120)
+        assert r0.status_code == 200, r0.text
+        n0 = _assert_count_tokens_json(r0.json())
+
+        r1 = requests.post(
+            _COUNT_TOKENS_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'messages': [{
+                    'role': 'user',
+                    'content': [
+                        {'type': 'text', 'text': 'Describe briefly.'},
+                        {'type': 'image', 'source': {'type': 'url', 'url': image_path}},
+                    ],
+                }],
+            },
+            timeout=120,
+        )
+        assert r1.status_code == 200, r1.text
+        n1 = _assert_count_tokens_json(r1.json())
+        assert n1 > n0, ('image-bearing user message should tokenize longer than text-only', n1, n0)
+
+    def test_messages_user_image_interleaved_text_blocks(self, backend, model_case, deployed_model_name: str):
+        """Multimodal user turn: ``text`` → ``image`` → ``text`` (ordering + VLM path)."""
+
+        if not _model_likely_supports_anthropic_vlm(deployed_model_name):
+            pytest.skip(f'model {deployed_model_name!r} is not treated as vision-capable for this test')
+
+        image_path = _eval_resource_file(_EVAL_IMAGE_TIGER)
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 128,
+                'temperature': 0.01,
+                'messages': [{
+                    'role': 'user',
+                    'content': [
+                        {
+                            'type': 'text',
+                            'text': 'The next block is an image. After it, follow the final instruction only.',
+                        },
+                        {'type': 'image', 'source': {'type': 'url', 'url': image_path}},
+                        {'type': 'text', 'text': 'In one word, name the animal in the image.'},
+                    ],
+                }],
+            },
+            timeout=180,
+        )
+        assert resp.status_code == 200, resp.text
+        data = _assert_success_message_json(resp.json(), model=deployed_model_name)
+        text = _assistant_text_from_message_payload(data).lower()
+        assert any(
+            k in text
+            for k in ('tiger', 'cat', 'big cat', '虎', '猫', 'feline')), text[:800]
+
+    def test_messages_user_image_base64_stream(self, backend, model_case, deployed_model_name: str):
+        """Tiny PNG via ``base64`` source + ``stream: true`` (VLM + SSE
+        path)."""
+
+        if not _model_likely_supports_anthropic_vlm(deployed_model_name):
+            pytest.skip(f'model {deployed_model_name!r} is not treated as vision-capable for this test')
+
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                # Same as tool_parser HTTP solid-color VLM test: leave room after thinking_delta.
+                'max_tokens': 16384,
+                'temperature': 0.01,
+                'stream': True,
+                'messages': [{
+                    'role': 'user',
+                    'content': [
+                        {
+                            'type': 'text',
+                            'text': (
+                                'The image is a single solid color (one pixel). '
+                                'Reply with at most three words: name that color only (e.g. red).'
+                            ),
+                        },
+                        {
+                            'type': 'image',
+                            'source': {
+                                'type': 'base64',
+                                'media_type': 'image/png',
+                                'data': _TINY_PNG_BASE64,
+                            },
+                        },
+                    ],
+                }],
+            },
+            stream=True,
+            timeout=180,
+        )
+        assert resp.status_code == 200, resp.text
+        raw = ''.join(chunk.decode('utf-8') for chunk in resp.iter_content(chunk_size=None) if chunk)
+        events = _parse_anthropic_sse(raw)
+        types = [obj.get('type') for _, obj in events]
+        assert 'message_start' in types
+        assert 'message_stop' in types
+        assembled = _aggregate_stream_text(events)
+        assert len(assembled.strip()) > 0, repr(assembled[:300])
+        al = assembled.lower()
+        assert any(
+            k in al
+            for k in (
+                'red',
+                'crimson',
+                'scarlet',
+                'maroon',
+                'ruby',
+                'vermilion',
+                '红',
+                '赤',
+                '朱',
+                '绯',
+            )), f'expected red-ish color name in streamed reply: {assembled[:500]!r}'
+
+    def test_messages_multi_turn(self, backend, model_case, deployed_model_name: str):
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 2048,
+                'temperature': 0.01,
+                'messages': [
+                    {'role': 'user', 'content': 'Remember the code word: banana.'},
+                    {'role': 'assistant', 'content': 'Understood, the code word is banana.'},
+                    {'role': 'user', 'content': 'What was the code word? Reply with that word only.'},
+                ],
+            },
+            timeout=120,
+        )
+        assert resp.status_code == 200, resp.text
+        data = _assert_success_message_json(resp.json(), model=deployed_model_name)
+        text = _assistant_text_from_message_payload(data).lower()
+        assert 'banana' in text, text[:500]
+
+    def test_messages_max_tokens_budget(self, backend, model_case, deployed_model_name: str):
+        """Tight ``max_tokens`` should cap generation (``stop_reason`` often
+        ``max_tokens``)."""
+
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 6,
+                'temperature': 0.01,
+                'messages': [{
+                    'role': 'user',
+                    'content': 'Write a very long essay about world history. Do not stop early.',
+                }],
+            },
+            timeout=120,
+        )
+        assert resp.status_code == 200, resp.text
+        data = _assert_success_message_json(resp.json(), model=deployed_model_name)
+        out = data['usage']['output_tokens']
+        assert out <= 8
+        assert out >= 4
+        assert data['stop_reason'] in ('max_tokens', 'end_turn')
+        assert _assistant_text_from_message_payload(data), data['content']
+
+    def test_messages_stop_sequences(self, backend, model_case, deployed_model_name: str):
+        """Maps to LMDeploy ``stop_sequences`` /
+        ``GenerationConfig.stop_words`` (cf.
+
+        chat completion stop tests).
+        """
+
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 200,
+                'temperature': 0.01,
+                'stop_sequences': [' Shanghai', ' city', ' China'],
+                'messages': [{'role': 'user', 'content': 'Shanghai is'}],
+            },
+            timeout=120,
+        )
+        assert resp.status_code == 200, resp.text
+        data = _assert_success_message_json(resp.json(), model=deployed_model_name)
+        text = _assistant_text_from_message_payload(data)
+        assert ' Shanghai' not in text
+        assert ' city' not in text
+        assert ' China' not in text
+        assert data['stop_reason'] in ('end_turn', 'max_tokens', 'stop_sequence')
+        assert len(text) > 0, 'stop_sequence should still yield visible assistant text before the stop'
+
+    def test_messages_non_stream(self, backend, model_case, deployed_model_name: str):
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 32,
+                'temperature': 0.01,
+                'messages': [{'role': 'user', 'content': 'Reply with a single short greeting.'}],
+            },
+            timeout=120,
+        )
+        assert resp.status_code == 200, resp.text
+        data = _assert_success_message_json(resp.json(), model=deployed_model_name)
+        assert data['content'][0]['type'] == 'text'
+        assert len(_assistant_text_from_message_payload(data).strip()) > 0
+
+    def test_messages_stream(self, backend, model_case, deployed_model_name: str):
+        """SSE lifecycle including ``message_start`` shape (usage zero until
+        ``message_delta``)."""
+
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 48,
+                'temperature': 0.01,
+                'stream': True,
+                'messages': [{'role': 'user', 'content': 'Count from 1 to 3, one number per line.'}],
+            },
+            stream=True,
+            timeout=120,
+        )
+        assert resp.status_code == 200, resp.text
+        raw = ''.join(chunk.decode('utf-8') for chunk in resp.iter_content(chunk_size=None) if chunk)
+        events = _parse_anthropic_sse(raw)
+        types = [obj.get('type') for _, obj in events]
+        assert 'message_start' in types
+        assert 'message_delta' in types
+        assert 'message_stop' in types
+        start_evt = next((obj for _, obj in events if obj.get('type') == 'message_start'), None)
+        assert start_evt is not None
+        m0 = start_evt['message']
+        assert m0.get('type') == 'message'
+        assert m0.get('role') == 'assistant'
+        assert m0.get('model') == deployed_model_name
+        assert isinstance(m0.get('id'), str) and m0['id'].startswith('msg_')
+        assert m0.get('usage', {}).get('input_tokens') == 0
+        assert m0.get('usage', {}).get('output_tokens') == 0
+        assembled = _aggregate_stream_text(events)
+        assert len(assembled) > 0
+        assert sum(1 for d in ('1', '2', '3') if d in assembled) >= 2, (
+            'expected at least two of the digits 1–3 in streamed text', repr(assembled[:200])
+        )
+        delta_evt = next((obj for _, obj in events if obj.get('type') == 'message_delta'), None)
+        assert delta_evt is not None
+        du = delta_evt['usage']
+        assert 'output_tokens' in du and isinstance(du['output_tokens'], int)
+        assert du['output_tokens'] > 0
+        assert 'input_tokens' in du and isinstance(du['input_tokens'], int) and du['input_tokens'] >= 0
+        assert any(obj.get('type') == 'message_stop' for _, obj in events)
+
+    def test_count_tokens(self, backend, model_case, deployed_model_name: str):
+        r_short = requests.post(
+            _COUNT_TOKENS_URL,
+            headers=_anthropic_headers(),
+            json={'model': deployed_model_name, 'messages': [{'role': 'user', 'content': 'Hi'}]},
+            timeout=60,
+        )
+        assert r_short.status_code == 200, r_short.text
+        short = _assert_count_tokens_json(r_short.json())
+        r_long = requests.post(
+            _COUNT_TOKENS_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'messages': [{'role': 'user', 'content': 'Hello, estimate my token count.'}],
+            },
+            timeout=60,
+        )
+        assert r_long.status_code == 200, r_long.text
+        long = _assert_count_tokens_json(r_long.json())
+        assert long > short, (long, short)
+
+    @pytest.mark.parametrize(
+        'endpoint_url',
+        [_MESSAGES_URL, _COUNT_TOKENS_URL],
+        ids=['messages', 'count_tokens'],
+    )
+    def test_messages_and_count_tokens_invalid_json_body(
+            self, backend, model_case, deployed_model_name: str, endpoint_url: str):
+        resp = requests.post(
+            endpoint_url,
+            headers=_anthropic_headers(),
+            data='{"model":',
+            timeout=30,
+        )
+        _assert_fastapi_validation_error(resp)
+
+    def test_count_tokens_rejects_tools(self, backend, model_case, deployed_model_name: str):
+        """``count_tokens`` rejects Anthropic ``tools`` until supported (400 +
+        fixed message)."""
+
+        base_json = {
+            'model': deployed_model_name,
+            'messages': [{'role': 'user', 'content': 'Hi'}],
+        }
+        r_base = requests.post(
+            _COUNT_TOKENS_URL,
+            headers=_anthropic_headers(),
+            json=base_json,
+            timeout=30,
+        )
+        assert r_base.status_code == 200, r_base.text
+        _assert_count_tokens_json(r_base.json())
+
+        resp = requests.post(
+            _COUNT_TOKENS_URL,
+            headers=_anthropic_headers(),
+            json={
+                **base_json,
+                'tools': [{
+                    'name': 'demo',
+                    'description': 'x',
+                    'input_schema': {
+                        'type': 'object',
+                        'properties': {}
+                    },
+                }],
+            },
+            timeout=30,
+        )
+        assert resp.status_code == 400, resp.text
+        body = resp.json()
+        _assert_anthropic_error_envelope(body)
+        assert body['error']['type'] == 'invalid_request_error'
+        assert body['error']['message'] == 'Anthropic tool fields are temporarily unsupported.'
+
+    def test_count_tokens_with_system_content_blocks(self, backend, model_case, deployed_model_name: str):
+        """``count_tokens`` with ``system`` as block list
+        (``to_lmdeploy_messages`` flattens text)."""
+
+        messages = [{'role': 'user', 'content': 'Hello, estimate my token count.'}]
+        resp_base = requests.post(
+            _COUNT_TOKENS_URL,
+            headers=_anthropic_headers(),
+            json={'model': deployed_model_name, 'messages': messages},
+            timeout=60,
+        )
+        assert resp_base.status_code == 200, resp_base.text
+        base_data = resp_base.json()
+        assert set(base_data.keys()) == {'input_tokens'}, base_data
+        baseline = base_data['input_tokens']
+        assert isinstance(baseline, int) and baseline > 0
+
+        resp = requests.post(
+            _COUNT_TOKENS_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'system': [
+                    {'type': 'text', 'text': 'You are helpful.'},
+                    {'type': 'text', 'text': 'Answer briefly.'},
+                ],
+                'messages': messages,
+            },
+            timeout=60,
+        )
+        assert resp.status_code == 200, resp.text
+        data = resp.json()
+        assert set(data.keys()) == {'input_tokens'}, data
+        with_system = data['input_tokens']
+        assert isinstance(with_system, int)
+        assert with_system > baseline, (
+            'system blocks should increase tokenized prompt vs same messages alone',
+            with_system,
+            baseline,
+        )
+
+    def test_messages_wrong_content_type(self, backend, model_case, deployed_model_name: str):
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers={
+                'Content-Type': 'text/plain',
+                'anthropic-version': ANTHROPIC_VERSION,
+            },
+            data='{}',
+            timeout=30,
+        )
+        _assert_fastapi_validation_error(resp)
+
+    def test_messages_invalid_message_role(self, backend, model_case, deployed_model_name: str):
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 8,
+                'messages': [{'role': 'system', 'content': 'not allowed here'}],
+            },
+            timeout=30,
+        )
+        _assert_fastapi_validation_error(resp)
+
+    def test_messages_message_missing_role(self, backend, model_case, deployed_model_name: str):
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 8,
+                'messages': [{'content': 'Hi'}],
+            },
+            timeout=30,
+        )
+        _assert_fastapi_validation_error(resp)
+
+    def test_messages_max_tokens_zero(self, backend, model_case, deployed_model_name: str):
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 0,
+                'messages': [{'role': 'user', 'content': 'Hi'}],
+            },
+            timeout=30,
+        )
+        _assert_fastapi_validation_error(resp)
+
+    def test_messages_messages_not_list(self, backend, model_case, deployed_model_name: str):
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 8,
+                'messages': {'role': 'user', 'content': 'Hi'},
+            },
+            timeout=30,
+        )
+        _assert_fastapi_validation_error(resp)
+
+    def test_messages_stream_validation_error_returns_json(self, backend, model_case, deployed_model_name: str):
+        """Invalid bodies must not upgrade to ``text/event-stream``; FastAPI
+        returns JSON 422."""
+
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': -1,
+                'stream': True,
+                'messages': [{'role': 'user', 'content': 'Hi'}],
+            },
+            stream=True,
+            timeout=30,
+        )
+        _assert_fastapi_validation_error(resp)
+        ctype = (resp.headers.get('content-type') or '').lower()
+        assert 'application/json' in ctype
+        assert 'text/event-stream' not in ctype
+
+    def test_count_tokens_empty_messages(self, backend, model_case, deployed_model_name: str):
+        """Pydantic allows ``messages: []``; counting should still return a
+        positive estimate."""
+
+        resp = requests.post(
+            _COUNT_TOKENS_URL,
+            headers=_anthropic_headers(),
+            json={'model': deployed_model_name, 'messages': []},
+            timeout=60,
+        )
+        assert resp.status_code == 200, resp.text
+        _assert_count_tokens_json(resp.json())
+
+    def test_messages_large_user_payload(self, backend, model_case, deployed_model_name: str):
+        """Regression guard for large JSON bodies (CI-sized payload, not
+        stress-test scale)."""
+
+        big = 'x' * (128 * 1024)
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 8,
+                'temperature': 0.01,
+                'messages': [{'role': 'user', 'content': f'Reply with one word: OK. Context:\n{big}'}],
+            },
+            timeout=180,
+        )
+        assert resp.status_code == 200, resp.text
+        data = _assert_success_message_json(resp.json(), model=deployed_model_name)
+        assert len(_assistant_text_from_message_payload(data).strip()) > 0
+
+    def test_messages_rejects_tools_without_tool_call_parser(self, backend, model_case, deployed_model_name: str):
+        """``RESTFUL`` jobs start api_server *without* ``--tool-call-parser``;
+        ``tools`` must yield 400."""
+
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 64,
+                'temperature': 0,
+                'messages': [{'role': 'user', 'content': 'What is the weather in Dallas, TX?'}],
+                'tools': [openai_function_tool_to_anthropic(WEATHER_TOOL)],
+            },
+            timeout=120,
+        )
+        _assert_tool_parser_required_message(resp)
+
+    def test_messages_rejects_tool_choice_with_tools_without_tool_call_parser(
+            self, backend, model_case, deployed_model_name: str):
+        """``tool_choice`` is only meaningful with ``tools``; still blocked
+        without ``--tool-call-parser``."""
+
+        resp = requests.post(
+            _MESSAGES_URL,
+            headers=_anthropic_headers(),
+            json={
+                'model': deployed_model_name,
+                'max_tokens': 64,
+                'temperature': 0,
+                'messages': [{'role': 'user', 'content': 'What is the weather in Dallas, TX?'}],
+                'tools': [openai_function_tool_to_anthropic(WEATHER_TOOL)],
+                'tool_choice': {'type': 'auto'},
+            },
+            timeout=120,
+        )
+        _assert_tool_parser_required_message(resp)
diff --git a/autotest/interface/restful/test_restful_chat_completions_v1.py b/autotest/interface/restful/test_restful_chat_completions_v1.py
index d3e9e7c119..63727793cc 100644
--- a/autotest/interface/restful/test_restful_chat_completions_v1.py
+++ b/autotest/interface/restful/test_restful_chat_completions_v1.py
@@ -2,7 +2,7 @@
 
 import pytest
 from openai import OpenAI
-from utils.constant import BACKEND_LIST, RESTFUL_MODEL_LIST
+from utils.constant import BACKEND_LIST, BASE_URL, RESTFUL_MODEL_LIST
 from utils.restful_return_check import (
     assert_chat_completions_batch_return,
     assert_chat_completions_stream_return,
@@ -17,10 +17,7 @@
 
 from lmdeploy.serve.openai.api_client import APIClient, get_model_list
 
-BASE_HTTP_URL = 'http://localhost'
-DEFAULT_PORT = 23333
 MODEL = 'internlm/Intern-S1'
-BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)])
 
 
 @pytest.mark.order(8)
diff --git a/autotest/interface/restful/test_restful_completions_v1.py b/autotest/interface/restful/test_restful_completions_v1.py
index 8c187aa09d..5012e06285 100644
--- a/autotest/interface/restful/test_restful_completions_v1.py
+++ b/autotest/interface/restful/test_restful_completions_v1.py
@@ -1,13 +1,10 @@
 import pytest
-from utils.constant import BACKEND_LIST, RESTFUL_BASE_MODEL_LIST
+from utils.constant import BACKEND_LIST, BASE_URL, RESTFUL_BASE_MODEL_LIST
 from utils.restful_return_check import assert_completions_batch_return, assert_completions_stream_return
 
 from lmdeploy.serve.openai.api_client import APIClient
 
-BASE_HTTP_URL = 'http://localhost'
-DEFAULT_PORT = 23333
 MODEL = 'internlm/internlm2_5-20b'
-BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)])
 
 
 @pytest.mark.parametrize('backend', BACKEND_LIST)
diff --git a/autotest/interface/restful/test_restful_generate.py b/autotest/interface/restful/test_restful_generate.py
index 6babb550e7..5a08ba445a 100644
--- a/autotest/interface/restful/test_restful_generate.py
+++ b/autotest/interface/restful/test_restful_generate.py
@@ -9,15 +9,11 @@
 import pytest
 import requests
 from transformers import AutoTokenizer
-from utils.constant import BACKEND_LIST, DEFAULT_SERVER, RESTFUL_MODEL_LIST
+from utils.constant import BACKEND_LIST, BASE_URL, RESTFUL_MODEL_LIST
 from utils.toolkit import encode_text, parse_sse_stream
 
 from lmdeploy.serve.openai.api_client import APIClient
 
-BASE_HTTP_URL = f'http://{DEFAULT_SERVER}'
-DEFAULT_PORT = 23333
-BASE_URL = ':'.join([BASE_HTTP_URL, str(DEFAULT_PORT)])
-
 
 @pytest.mark.parametrize('backend', BACKEND_LIST)
 @pytest.mark.parametrize('model_name', RESTFUL_MODEL_LIST)
diff --git a/autotest/interface/restful/tool_parser/test_tool_call_anthropic_sdk.py b/autotest/interface/restful/tool_parser/test_tool_call_anthropic_sdk.py
new file mode 100644
index 0000000000..65b9861e41
--- /dev/null
+++ b/autotest/interface/restful/tool_parser/test_tool_call_anthropic_sdk.py
@@ -0,0 +1,920 @@
+from __future__ import annotations
+
+import asyncio
+import json
+import os
+from functools import lru_cache
+
+import pytest
+import requests
+from utils.config_utils import get_config
+from utils.constant import BASE_URL
+from utils.tool_reasoning_definitions import (
+    SEARCH_TOOL,
+    WEATHER_TOOL,
+    WEATHER_TOOL_SINGLE_LOCATION_ANTHROPIC,
+    get_async_anthropic_client_and_model,
+    openai_chat_messages_to_anthropic_kwargs,
+    openai_function_tool_to_anthropic,
+)
+
+from lmdeploy.serve.openai.api_client import APIClient
+
+from .conftest import MESSAGES_ASKING_FOR_WEATHER, _apply_marks, _ToolCallTestBase
+
+ANTHROPIC_VERSION = '2023-06-01'
+
+_EVAL_IMAGE_TIGER = 'tiger.jpeg'
+_TINY_PNG_BASE64 = (
+    'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=='
+)
+
+_SOLID_COLOR_VLM_PROMPT = (
+    'The image is a single solid color (one pixel). '
+    'Reply with at most three words: name that color only (e.g. red).'
+)
+
+_REDISH_COLOR_KEYWORDS = (
+    'red',
+    'crimson',
+    'scarlet',
+    'maroon',
+    'ruby',
+    'vermilion',
+    '红',
+    '赤',
+    '朱',
+    '绯',
+)
+
+
+@lru_cache(maxsize=1)
+def _eval_resource_path() -> str:
+    cfg = get_config()
+    path = cfg.get('resource_path')
+    assert isinstance(path, str) and path, 'resource_path must be set in autotest config (e.g. config_h.yml)'
+    base = path.rstrip('/')
+    assert os.path.isdir(base), f'resource_path is not a directory: {base!r}'
+    return base
+
+
+def _eval_resource_file(filename: str) -> str:
+    p = os.path.join(_eval_resource_path(), filename)
+    assert os.path.isfile(p), f'missing offline eval resource {filename!r}: {p}'
+    return p
+
+
+def _model_likely_supports_anthropic_vlm(model_name: str) -> bool:
+    m = model_name.upper()
+    return any(
+        needle in m
+        for needle in (
+            'VL',
+            'INTERNVL',
+            'INTERN-VL',
+            'QWEN-VL',
+            'QWEN2-VL',
+            'QWEN2.5-VL',
+            'QWEN3.5',
+            'MINICPM-V',
+            'LLAVA',
+            'COGVLM',
+            'XCOMPOSER',
+            'INTERNXCOMPOSER',
+            'INTERNS',
+        ))
+
+
+def _http_headers() -> dict[str, str]:
+    return {
+        'Content-Type': 'application/json',
+        'anthropic-version': ANTHROPIC_VERSION,
+    }
+
+
+def _parse_anthropic_sse(raw: str) -> list[tuple[str | None, dict]]:
+    pairs: list[tuple[str | None, dict]] = []
+    current_event: str | None = None
+    for line in raw.splitlines():
+        line = line.rstrip('\r')
+        if line.startswith('event:'):
+            current_event = line[len('event:'):].strip()
+        elif line.startswith('data:'):
+            data_str = line[len('data:'):].strip()
+            if not data_str:
+                continue
+            pairs.append((current_event, json.loads(data_str)))
+            current_event = None
+    return pairs
+
+
+def _aggregate_stream_text(events: list[tuple[str | None, dict]]) -> str:
+    text = ''
+    for _, obj in events:
+        if obj.get('type') != 'content_block_delta':
+            continue
+        delta = obj.get('delta') or {}
+        if delta.get('type') == 'text_delta':
+            text += delta.get('text') or ''
+    return text
+
+
+def _sse_tool_use_names(raw: str) -> list[str]:
+    names: list[str] = []
+    for _, obj in _parse_anthropic_sse(raw):
+        if obj.get('type') != 'content_block_start':
+            continue
+        cb = obj.get('content_block') or {}
+        if cb.get('type') == 'tool_use' and cb.get('name'):
+            names.append(cb['name'])
+    return names
+
+
+def _assert_redish_color_in_text(assembled: str, *, ctx: str) -> None:
+    assert len(assembled.strip()) > 0, (
+        f'{ctx}: no text_delta content in stream (prefix {assembled[:300]!r}). '
+        'Reasoning models may stream long thinking_delta first; if max_tokens is too low, '
+        'the run can end before any visible text block is emitted.'
+    )
+    al = assembled.lower()
+    assert any(k in al for k in _REDISH_COLOR_KEYWORDS), (
+        f'{ctx}: expected red-ish color in reply: {assembled[:500]!r}',
+    )
+
+
+def _assistant_text_from_messages_json(data: dict) -> str:
+    return ''.join(b.get('text', '') for b in data.get('content', []) if b.get('type') == 'text')
+
+
+def _http_tool_use_blocks(data: dict) -> list[dict]:
+    return [b for b in data.get('content', []) if isinstance(b, dict) and b.get('type') == 'tool_use']
+
+
+def _sdk_tool_use_blocks(msg) -> list:
+    return [b for b in msg.content if getattr(b, 'type', None) == 'tool_use']
+
+
+def _assert_weather_tool_city_state(inp: dict, *, ctx: str = '') -> None:
+    """``get_current_weather`` OpenAI-style args (``city`` / ``state``) after
+    parser mapping."""
+
+    assert isinstance(inp, dict), (ctx, type(inp))
+    city, state = inp.get('city'), inp.get('state')
+    assert isinstance(city, str) and len(city) > 0, (ctx, inp)
+    assert isinstance(state, str) and len(state) > 0, (ctx, inp)
+
+
+def _log_append(path: str, text: str) -> None:
+    try:
+        with open(path, 'a', encoding='utf-8') as f:
+            f.write(text + '\n')
+    except OSError:
+        pass
+
+
+def _trace_anthropic_http(
+    log_file: str,
+    case: str,
+    *,
+    url: str,
+    http_status: int,
+    request_json: dict | None,
+    response_text: str,
+    max_chars: int = 24000,
+) -> None:
+    """Append one JSON line to ``tool_calls/*.log``.
+
+    Same tree as ``test_tool_call_advanced`` (``setup_log_file``).
+    """
+
+    rtxt = (
+        response_text
+        if len(response_text) <= max_chars
+        else response_text[:max_chars] + '\n…[truncated]'
+    )
+    _log_append(
+        log_file,
+        json.dumps(
+            {
+                'anthropic_http_trace': case,
+                'url': url,
+                'status': http_status,
+                'request': request_json,
+                'response': rtxt,
+            },
+            ensure_ascii=False,
+            default=str,
+        ),
+    )
+
+
+# --- HTTP: ``tool_parser`` / ``--tool-call-parser`` jobs only ----------------------------
+
+
+@_apply_marks
+class TestAnthropicHttpToolMessages(_ToolCallTestBase):
+    """``POST /v1/messages`` over HTTP when api_server is launched *with*
+    ``--tool-call-parser``.
+
+    Adapter block semantics (``lmdeploy.serve.anthropic.adapter``):
+
+    * ``tool_use`` (assistant): prior tool call replay (``id``, ``name``, ``input``).
+    * ``tool_result`` (user): plain-text result for ``tool_use_id``.
+    * ``thinking`` / ``redacted_thinking``: replayed reasoning segments (e.g. Claude Code style).
+    """
+
+    def test_http_stream_tool_choice_force_named_tool(self, backend, model_case):
+        model_name = APIClient(BASE_URL).available_models[0]
+        url = f'{BASE_URL}/v1/messages'
+        req_json = {
+            'model': model_name,
+            'max_tokens': 512,
+            'temperature': 0,
+            'stream': True,
+            'tool_choice': {
+                'type': 'tool',
+                'name': WEATHER_TOOL['function']['name'],
+            },
+            'messages': [{'role': 'user', 'content': 'What is the weather in Dallas, TX?'}],
+            'tools': [
+                openai_function_tool_to_anthropic(WEATHER_TOOL),
+                openai_function_tool_to_anthropic(SEARCH_TOOL),
+            ],
+        }
+        resp = requests.post(
+            url,
+            headers=_http_headers(),
+            json=req_json,
+            stream=True,
+            timeout=180,
+        )
+        assert resp.status_code == 200, resp.text
+        raw = ''.join(chunk.decode('utf-8') for chunk in resp.iter_content(chunk_size=None) if chunk)
+        _trace_anthropic_http(
+            self._log_file,
+            'test_http_stream_tool_choice_force_named_tool',
+            url=url,
+            http_status=resp.status_code,
+            request_json=req_json,
+            response_text=raw,
+        )
+        names = _sse_tool_use_names(raw)
+        assert names, f'no tool_use content_block_start in SSE (first 800 chars): {raw[:800]!r}'
+        assert WEATHER_TOOL['function']['name'] in names, names
+
+    def test_http_stream_single_location_weather_tool(self, backend, model_case):
+        model_name = APIClient(BASE_URL).available_models[0]
+        url = f'{BASE_URL}/v1/messages'
+        req_json = {
+            'model': model_name,
+            'max_tokens': 512,
+            'temperature': 0,
+            'stream': True,
+            'messages': [{'role': 'user', 'content': "What's the weather like in New York today?"}],
+            'tools': [WEATHER_TOOL_SINGLE_LOCATION_ANTHROPIC],
+        }
+        resp = requests.post(
+            url,
+            headers=_http_headers(),
+            json=req_json,
+            stream=True,
+            timeout=180,
+        )
+        assert resp.status_code == 200, resp.text
+        raw = ''.join(chunk.decode('utf-8') for chunk in resp.iter_content(chunk_size=None) if chunk)
+        _trace_anthropic_http(
+            self._log_file,
+            'test_http_stream_single_location_weather_tool',
+            url=url,
+            http_status=resp.status_code,
+            request_json=req_json,
+            response_text=raw,
+        )
+        names = _sse_tool_use_names(raw)
+        assert names, f'no tool_use content_block_start in SSE (first 800 chars): {raw[:800]!r}'
+        assert 'get_current_weather' in names, names
+
+    def test_http_history_tool_use_and_tool_result_blocks(self, backend, model_case):
+        model_name = APIClient(BASE_URL).available_models[0]
+        url = f'{BASE_URL}/v1/messages'
+        req_json = {
+            'model': model_name,
+            'max_tokens': 8192,
+            'temperature': 0.01,
+            'messages': [
+                {'role': 'user', 'content': 'What is the weather in San Francisco?'},
+                {
+                    'role': 'assistant',
+                    'content': [
+                        {
+                            'type': 'tool_use',
+                            'id': 'toolu_hist_01',
+                            'name': 'get_current_weather',
+                            'input': {'location': 'San Francisco'},
+                        },
+                    ],
+                },
+                {
+                    'role': 'user',
+                    'content': [
+                        {
+                            'type': 'tool_result',
+                            'tool_use_id': 'toolu_hist_01',
+                            'content': '72F and sunny.',
+                        },
+                    ],
+                },
+                {'role': 'user', 'content': 'In one short phrase, was it warm? Answer yes or no.'},
+            ],
+        }
+        resp = requests.post(
+            url,
+            headers=_http_headers(),
+            json=req_json,
+            timeout=120,
+        )
+        assert resp.status_code == 200, resp.text
+        data = resp.json()
+        _trace_anthropic_http(
+            self._log_file,
+            'test_http_history_tool_use_and_tool_result_blocks',
+            url=url,
+            http_status=resp.status_code,
+            request_json=req_json,
+            response_text=json.dumps(data, ensure_ascii=False, default=str),
+        )
+        text = _assistant_text_from_messages_json(data)
+        assert len(text) > 0, (
+            'expected non-empty assistant text blocks; '
+            f'stop_reason={data.get("stop_reason")!r} content={data.get("content")!r}'
+        )
+
+    def test_http_history_thinking_block_replay(self, backend, model_case):
+        model_name = APIClient(BASE_URL).available_models[0]
+        url = f'{BASE_URL}/v1/messages'
+        req_json = {
+            'model': model_name,
+            'max_tokens': 8192,
+            'temperature': 0.01,
+            'messages': [
+                {'role': 'user', 'content': 'Hi.'},
+                {
+                    'role': 'assistant',
+                    'content': [
+                        {'type': 'thinking', 'thinking': '(internal scratchpad)'},
+                        {'type': 'text', 'text': 'Hello — how can I help?'},
+                    ],
+                },
+                {'role': 'user', 'content': 'Reply with exactly: ACK'},
+            ],
+        }
+        resp = requests.post(
+            url,
+            headers=_http_headers(),
+            json=req_json,
+            timeout=120,
+        )
+        assert resp.status_code == 200, resp.text
+        data = resp.json()
+        _trace_anthropic_http(
+            self._log_file,
+            'test_http_history_thinking_block_replay',
+            url=url,
+            http_status=resp.status_code,
+            request_json=req_json,
+            response_text=json.dumps(data, ensure_ascii=False, default=str),
+        )
+        text = _assistant_text_from_messages_json(data)
+        assert len(text) > 0, (
+            'expected non-empty assistant text blocks; '
+            f'stop_reason={data.get("stop_reason")!r} content={data.get("content")!r}'
+        )
+
+    def test_http_non_stream_tools_with_user_image_url(self, backend, model_case):
+        """``tools`` + user ``content`` blocks with ``image`` (VLM matrix only;
+        same tool contract as text-only)."""
+
+        model_name = APIClient(BASE_URL).available_models[0]
+        if not _model_likely_supports_anthropic_vlm(model_name):
+            pytest.skip(f'model {model_name!r} is not treated as vision-capable for this test')
+
+        image_path = _eval_resource_file(_EVAL_IMAGE_TIGER)
+        url = f'{BASE_URL}/v1/messages'
+        req_json = {
+            'model': model_name,
+            'max_tokens': 512,
+            'temperature': 0,
+            'messages': [{
+                'role': 'user',
+                'content': [
+                    {
+                        'type': 'text',
+                        'text': (
+                            'What is the weather in Dallas, TX? '
+                            'Use the tools; the attached image is unrelated decoration.'
+                        ),
+                    },
+                    {'type': 'image', 'source': {'type': 'url', 'url': image_path}},
+                ],
+            }],
+            'tools': [
+                openai_function_tool_to_anthropic(WEATHER_TOOL),
+                openai_function_tool_to_anthropic(SEARCH_TOOL),
+            ],
+        }
+        resp = requests.post(url, headers=_http_headers(), json=req_json, timeout=180)
+        assert resp.status_code == 200, resp.text
+        data = resp.json()
+        _trace_anthropic_http(
+            self._log_file,
+            'test_http_non_stream_tools_with_user_image_url',
+            url=url,
+            http_status=resp.status_code,
+            request_json=req_json,
+            response_text=json.dumps(data, ensure_ascii=False, default=str),
+        )
+        assert data.get('stop_reason') == 'tool_use', data
+        blocks = _http_tool_use_blocks(data)
+        assert len(blocks) >= 1, data.get('content')
+        assert blocks[0].get('name') == WEATHER_TOOL['function']['name']
+        inp = blocks[0].get('input')
+        assert isinstance(inp, dict), inp
+        _assert_weather_tool_city_state(inp, ctx='test_http_non_stream_tools_with_user_image_url')
+
+    def test_http_stream_tools_with_user_image_url(self, backend, model_case):
+        """Streaming ``tools`` + user image URL (VLM): SSE must still surface
+        ``tool_use``."""
+
+        model_name = APIClient(BASE_URL).available_models[0]
+        if not _model_likely_supports_anthropic_vlm(model_name):
+            pytest.skip(f'model {model_name!r} is not treated as vision-capable for this test')
+
+        image_path = _eval_resource_file(_EVAL_IMAGE_TIGER)
+        url = f'{BASE_URL}/v1/messages'
+        req_json = {
+            'model': model_name,
+            'max_tokens': 512,
+            'temperature': 0,
+            'stream': True,
+            'messages': [{
+                'role': 'user',
+                'content': [
+                    {'type': 'text', 'text': 'What is the weather in Dallas, TX? Use tools.'},
+                    {'type': 'image', 'source': {'type': 'url', 'url': image_path}},
+                ],
+            }],
+            'tools': [
+                openai_function_tool_to_anthropic(WEATHER_TOOL),
+                openai_function_tool_to_anthropic(SEARCH_TOOL),
+            ],
+        }
+        resp = requests.post(url, headers=_http_headers(), json=req_json, stream=True, timeout=180)
+        assert resp.status_code == 200, resp.text
+        raw = ''.join(chunk.decode('utf-8') for chunk in resp.iter_content(chunk_size=None) if chunk)
+        _trace_anthropic_http(
+            self._log_file,
+            'test_http_stream_tools_with_user_image_url',
+            url=url,
+            http_status=resp.status_code,
+            request_json=req_json,
+            response_text=raw,
+        )
+        names = _sse_tool_use_names(raw)
+        assert names, f'no tool_use in SSE (first 800 chars): {raw[:800]!r}'
+        assert WEATHER_TOOL['function']['name'] in names, names
+
+    def test_http_stream_user_image_base64_solid_color_vlm(self, backend, model_case):
+        """Align with RESTful ``test_messages_user_image_base64_stream``: SSE
+        text names the solid color."""
+
+        model_name = APIClient(BASE_URL).available_models[0]
+        if not _model_likely_supports_anthropic_vlm(model_name):
+            pytest.skip(f'model {model_name!r} is not treated as vision-capable for this test')
+
+        url = f'{BASE_URL}/v1/messages'
+        req_json = {
+            'model': model_name,
+            'max_tokens': 16384,
+            'temperature': 0.01,
+            'stream': True,
+            'messages': [{
+                'role': 'user',
+                'content': [
+                    {'type': 'text', 'text': _SOLID_COLOR_VLM_PROMPT},
+                    {
+                        'type': 'image',
+                        'source': {
+                            'type': 'base64',
+                            'media_type': 'image/png',
+                            'data': _TINY_PNG_BASE64,
+                        },
+                    },
+                ],
+            }],
+        }
+        resp = requests.post(url, headers=_http_headers(), json=req_json, stream=True, timeout=180)
+        assert resp.status_code == 200, resp.text
+        raw = ''.join(chunk.decode('utf-8') for chunk in resp.iter_content(chunk_size=None) if chunk)
+        _trace_anthropic_http(
+            self._log_file,
+            'test_http_stream_user_image_base64_solid_color_vlm',
+            url=url,
+            http_status=resp.status_code,
+            request_json=req_json,
+            response_text=raw,
+        )
+        events = _parse_anthropic_sse(raw)
+        types = [obj.get('type') for _, obj in events]
+        assert 'message_start' in types
+        assert 'message_stop' in types
+        assembled = _aggregate_stream_text(events)
+        _assert_redish_color_in_text(assembled, ctx='test_http_stream_user_image_base64_solid_color_vlm')
+
+
+def _event_to_dict(event) -> dict:
+    if hasattr(event, 'model_dump'):
+        return event.model_dump()
+    if isinstance(event, dict):
+        return event
+    return {'repr': repr(event)}
+
+
+async def _async_weather_tool_single_location_non_stream(log_file: str):
+    client, model_name = get_async_anthropic_client_and_model()
+    msg = await client.messages.create(
+        model=model_name,
+        max_tokens=1024,
+        temperature=0,
+        messages=[{'role': 'user', 'content': "What's the weather like in New York today?"}],
+        tools=[WEATHER_TOOL_SINGLE_LOCATION_ANTHROPIC],
+    )
+    try:
+        dumped = msg.model_dump_json()
+    except Exception:
+        try:
+            dumped = json.dumps(msg.model_dump())
+        except Exception:
+            dumped = repr(msg)
+    _log_append(log_file, dumped)
+    return msg
+
+
+async def _async_tool_choice_force_named_tool(log_file: str):
+    client, model_name = get_async_anthropic_client_and_model()
+    kwargs = openai_chat_messages_to_anthropic_kwargs(MESSAGES_ASKING_FOR_WEATHER)
+    tools = [
+        openai_function_tool_to_anthropic(WEATHER_TOOL),
+        openai_function_tool_to_anthropic(SEARCH_TOOL),
+    ]
+    msg = await client.messages.create(
+        model=model_name,
+        max_tokens=1024,
+        temperature=0,
+        tools=tools,
+        tool_choice={'type': 'tool', 'name': WEATHER_TOOL['function']['name']},
+        **kwargs,
+    )
+    try:
+        dumped = msg.model_dump_json()
+    except Exception:
+        try:
+            dumped = json.dumps(msg.model_dump())
+        except Exception:
+            dumped = repr(msg)
+    _log_append(log_file, dumped)
+    return msg
+
+
+async def _async_tool_choice_any(log_file: str):
+    client, model_name = get_async_anthropic_client_and_model()
+    kwargs = openai_chat_messages_to_anthropic_kwargs(MESSAGES_ASKING_FOR_WEATHER)
+    tools = [
+        openai_function_tool_to_anthropic(WEATHER_TOOL),
+        openai_function_tool_to_anthropic(SEARCH_TOOL),
+    ]
+    msg = await client.messages.create(
+        model=model_name,
+        max_tokens=1024,
+        temperature=0,
+        tools=tools,
+        tool_choice={'type': 'any'},
+        **kwargs,
+    )
+    try:
+        dumped = msg.model_dump_json()
+    except Exception:
+        try:
+            dumped = json.dumps(msg.model_dump())
+        except Exception:
+            dumped = repr(msg)
+    _log_append(log_file, dumped)
+    return msg
+
+
+async def _async_messages_tool_non_stream_with_user_image(log_file: str, image_url: str):
+    client, model_name = get_async_anthropic_client_and_model()
+    tools = [
+        openai_function_tool_to_anthropic(WEATHER_TOOL),
+        openai_function_tool_to_anthropic(SEARCH_TOOL),
+    ]
+    msg = await client.messages.create(
+        model=model_name,
+        max_tokens=1024,
+        temperature=0,
+        tools=tools,
+        messages=[{
+            'role': 'user',
+            'content': [
+                {
+                    'type': 'text',
+                    'text': (
+                        'What is the weather in Dallas, TX? '
+                        'Use tools; the image is unrelated context.'
+                    ),
+                },
+                {'type': 'image', 'source': {'type': 'url', 'url': image_url}},
+            ],
+        }],
+    )
+    try:
+        dumped = msg.model_dump_json()
+    except Exception:
+        try:
+            dumped = json.dumps(msg.model_dump())
+        except Exception:
+            dumped = repr(msg)
+    _log_append(log_file, dumped)
+    return msg
+
+
+async def _async_messages_tool_non_stream_with_user_image_base64(log_file: str):
+    client, model_name = get_async_anthropic_client_and_model()
+    tools = [
+        openai_function_tool_to_anthropic(WEATHER_TOOL),
+        openai_function_tool_to_anthropic(SEARCH_TOOL),
+    ]
+    msg = await client.messages.create(
+        model=model_name,
+        max_tokens=1024,
+        temperature=0,
+        tools=tools,
+        messages=[{
+            'role': 'user',
+            'content': [
+                {'type': 'text', 'text': 'What is the weather in Dallas, TX? Use tools.'},
+                {
+                    'type': 'image',
+                    'source': {
+                        'type': 'base64',
+                        'media_type': 'image/png',
+                        'data': _TINY_PNG_BASE64,
+                    },
+                },
+            ],
+        }],
+    )
+    try:
+        dumped = msg.model_dump_json()
+    except Exception:
+        try:
+            dumped = json.dumps(msg.model_dump())
+        except Exception:
+            dumped = repr(msg)
+    _log_append(log_file, dumped)
+    return msg
+
+
+async def _async_messages_tool_non_stream(log_file: str):
+    client, model_name = get_async_anthropic_client_and_model()
+    kwargs = openai_chat_messages_to_anthropic_kwargs(MESSAGES_ASKING_FOR_WEATHER)
+    tools = [
+        openai_function_tool_to_anthropic(WEATHER_TOOL),
+        openai_function_tool_to_anthropic(SEARCH_TOOL),
+    ]
+    msg = await client.messages.create(
+        model=model_name,
+        max_tokens=1024,
+        temperature=0,
+        tools=tools,
+        **kwargs,
+    )
+    try:
+        dumped = msg.model_dump_json()
+    except Exception:
+        try:
+            dumped = json.dumps(msg.model_dump())
+        except Exception:
+            dumped = repr(msg)
+    _log_append(log_file, dumped)
+    return msg
+
+
+async def _async_messages_tool_stream(log_file: str):
+    client, model_name = get_async_anthropic_client_and_model()
+    kwargs = openai_chat_messages_to_anthropic_kwargs(MESSAGES_ASKING_FOR_WEATHER)
+    tools = [
+        openai_function_tool_to_anthropic(WEATHER_TOOL),
+        openai_function_tool_to_anthropic(SEARCH_TOOL),
+    ]
+    stream = await client.messages.create(
+        model=model_name,
+        max_tokens=1024,
+        temperature=0,
+        tools=tools,
+        stream=True,
+        **kwargs,
+    )
+    events = []
+    async for event in stream:
+        events.append(event)
+    _log_append(log_file, f'anthropic_stream_event_count={len(events)}')
+
+    final_msg = None
+    getter = getattr(stream, 'get_final_message', None)
+    if callable(getter):
+        try:
+            final_msg = await getter()
+        except Exception as err:  # noqa: BLE001 — SDK / server variance
+            _log_append(log_file, f'get_final_message_failed: {err!r}')
+
+    if final_msg is not None:
+        try:
+            dumped = final_msg.model_dump_json()
+        except Exception:
+            try:
+                dumped = json.dumps(final_msg.model_dump())
+            except Exception:
+                dumped = repr(final_msg)
+        _log_append(log_file, dumped)
+        return 'final', final_msg, events
+
+    blob = json.dumps([_event_to_dict(e) for e in events], default=str)
+    _log_append(log_file, blob[:16000])
+    return 'raw', blob, events
+
+
+async def _async_vlm_base64_solid_color_stream(log_file: str) -> tuple[str, str]:
+    """Returns ``(kind, text_or_blob)`` where ``kind`` is ``'final'`` or
+    ``'raw'``."""
+
+    client, model_name = get_async_anthropic_client_and_model()
+    stream = await client.messages.create(
+        model=model_name,
+        max_tokens=16384,
+        temperature=0.01,
+        stream=True,
+        messages=[{
+            'role': 'user',
+            'content': [
+                {'type': 'text', 'text': _SOLID_COLOR_VLM_PROMPT},
+                {
+                    'type': 'image',
+                    'source': {
+                        'type': 'base64',
+                        'media_type': 'image/png',
+                        'data': _TINY_PNG_BASE64,
+                    },
+                },
+            ],
+        }],
+    )
+    events: list = []
+    async for event in stream:
+        events.append(event)
+    _log_append(log_file, f'vlm_color_sdk_stream_events={len(events)}')
+
+    final_msg = None
+    getter = getattr(stream, 'get_final_message', None)
+    if callable(getter):
+        try:
+            final_msg = await getter()
+        except Exception as err:  # noqa: BLE001
+            _log_append(log_file, f'get_final_message_failed: {err!r}')
+
+    if final_msg is not None:
+        text = ''.join(
+            (getattr(b, 'text', None) or '')
+            for b in final_msg.content
+            if getattr(b, 'type', None) == 'text'
+        )
+        try:
+            _log_append(log_file, final_msg.model_dump_json())
+        except Exception:
+            _log_append(log_file, repr(final_msg))
+        return 'final', text
+
+    blob = json.dumps([_event_to_dict(e) for e in events], default=str)
+    _log_append(log_file, blob[:16000])
+    return 'raw', blob
+
+
+@_apply_marks
+class TestAnthropicSdkToolCall(_ToolCallTestBase):
+    """Anthropic Messages + tools via official async SDK (end-to-end
+    integration)."""
+
+    @pytest.fixture(autouse=True)
+    def _require_anthropic_sdk(self):
+        pytest.importorskip('anthropic')
+
+    def test_tool_non_stream_weather(self, backend, model_case):
+        msg = asyncio.run(_async_messages_tool_non_stream(self._log_file))
+
+        assert msg.stop_reason == 'tool_use'
+        assert msg.role == 'assistant'
+        tool_blocks = _sdk_tool_use_blocks(msg)
+        assert len(tool_blocks) >= 1
+        block = tool_blocks[0]
+        assert block.name == WEATHER_TOOL['function']['name']
+
+        _assert_weather_tool_city_state(block.input, ctx='test_tool_non_stream_weather')
+
+        assert msg.usage is not None
+        assert msg.usage.input_tokens > 0
+        assert msg.usage.output_tokens > 0
+
+    def test_tool_stream_weather(self, backend, model_case):
+        kind, payload, events = asyncio.run(_async_messages_tool_stream(self._log_file))
+
+        assert len(events) > 0, 'expected at least one stream event'
+
+        if kind == 'final':
+            assert payload.stop_reason == 'tool_use'
+            tool_blocks = _sdk_tool_use_blocks(payload)
+            assert len(tool_blocks) >= 1
+            assert tool_blocks[0].name == WEATHER_TOOL['function']['name']
+            _assert_weather_tool_city_state(tool_blocks[0].input, ctx='test_tool_stream_weather/final')
+            return
+
+        blob = payload
+        assert WEATHER_TOOL['function']['name'] in blob
+        assert 'tool_use' in blob
+        assert 'Dallas' in blob or 'dallas' in blob.lower()
+
+    def test_tool_non_stream_weather_single_location_schema(self, backend, model_case):
+        msg = asyncio.run(_async_weather_tool_single_location_non_stream(self._log_file))
+        assert msg.stop_reason == 'tool_use'
+        tool_blocks = _sdk_tool_use_blocks(msg)
+        assert len(tool_blocks) >= 1
+        assert tool_blocks[0].name == 'get_current_weather'
+        inp = tool_blocks[0].input
+        assert isinstance(inp, dict)
+        loc = inp.get('location', '')
+        assert isinstance(loc, str) and len(loc) > 0
+        loc_low = loc.lower()
+        assert 'new york' in loc_low or 'nyc' in loc_low
+
+    def test_tool_non_stream_tool_choice_force_named(self, backend, model_case):
+        msg = asyncio.run(_async_tool_choice_force_named_tool(self._log_file))
+        assert msg.stop_reason == 'tool_use'
+        tool_blocks = _sdk_tool_use_blocks(msg)
+        assert len(tool_blocks) >= 1
+        assert tool_blocks[0].name == WEATHER_TOOL['function']['name']
+
+    def test_tool_non_stream_tool_choice_any(self, backend, model_case):
+        msg = asyncio.run(_async_tool_choice_any(self._log_file))
+        assert msg.stop_reason == 'tool_use'
+        tool_blocks = _sdk_tool_use_blocks(msg)
+        assert len(tool_blocks) >= 1
+        names = {b.name for b in tool_blocks}
+        assert WEATHER_TOOL['function']['name'] in names, names
+
+    def test_tool_non_stream_weather_with_user_image_url(self, backend, model_case):
+        model_name = APIClient(BASE_URL).available_models[0]
+        if not _model_likely_supports_anthropic_vlm(model_name):
+            pytest.skip(f'model {model_name!r} is not treated as vision-capable for this test')
+
+        image_path = _eval_resource_file(_EVAL_IMAGE_TIGER)
+        msg = asyncio.run(_async_messages_tool_non_stream_with_user_image(self._log_file, image_path))
+        assert msg.stop_reason == 'tool_use'
+        tool_blocks = _sdk_tool_use_blocks(msg)
+        assert len(tool_blocks) >= 1
+        assert tool_blocks[0].name == WEATHER_TOOL['function']['name']
+        _assert_weather_tool_city_state(tool_blocks[0].input, ctx='test_tool_non_stream_weather_with_user_image_url')
+
+    def test_tool_non_stream_weather_with_user_image_base64(self, backend, model_case):
+        model_name = APIClient(BASE_URL).available_models[0]
+        if not _model_likely_supports_anthropic_vlm(model_name):
+            pytest.skip(f'model {model_name!r} is not treated as vision-capable for this test')
+
+        msg = asyncio.run(_async_messages_tool_non_stream_with_user_image_base64(self._log_file))
+        assert msg.stop_reason == 'tool_use'
+        tool_blocks = _sdk_tool_use_blocks(msg)
+        assert len(tool_blocks) >= 1
+        assert tool_blocks[0].name == WEATHER_TOOL['function']['name']
+        _assert_weather_tool_city_state(
+            tool_blocks[0].input,
+            ctx='test_tool_non_stream_weather_with_user_image_base64',
+        )
+
+    def test_sdk_stream_vlm_user_image_base64_solid_color(self, backend, model_case):
+        """SDK streaming + 1×1 red PNG: final text (or raw event blob) should
+        mention a red-ish color."""
+
+        model_name = APIClient(BASE_URL).available_models[0]
+        if not _model_likely_supports_anthropic_vlm(model_name):
+            pytest.skip(f'model {model_name!r} is not treated as vision-capable for this test')
+
+        kind, payload = asyncio.run(_async_vlm_base64_solid_color_stream(self._log_file))
+        ctx = f'test_sdk_stream_vlm_user_image_base64_solid_color/{kind}'
+        _assert_redish_color_in_text(payload, ctx=ctx)
diff --git a/autotest/utils/constant.py b/autotest/utils/constant.py
index 87e2759395..fbc389d9c1 100644
--- a/autotest/utils/constant.py
+++ b/autotest/utils/constant.py
@@ -4,8 +4,9 @@
 DEFAULT_SERVER = os.getenv('MASTER_ADDR', '127.0.0.1')
 PROXY_PORT = 8000
 
-# Scalar presets for export/normalize fallback. Model-specific sampling (reasoning-effort,
-# top-k, chat-template-kwargs, …) live in per-model ``autotest/configs/**/gen_config``.
+BASE_HTTP_URL = f'http://{DEFAULT_SERVER}'
+BASE_URL = f'{BASE_HTTP_URL}:{os.getenv("LMDEPLOY_PORT", str(DEFAULT_PORT))}'
+
 EVAL_CONFIGS = {
     'default': {
         'query_per_second': 4,
@@ -236,6 +237,7 @@ def _deps_profile_is_legacy() -> bool:
     'unsloth/gpt-oss-20b-BF16',
     'Qwen/Qwen2.5-7B-Instruct',
     'internlm/Intern-S1-Pro-FP8',
+    'internlm/interns2-preview-0509',
 ]
 
 TOOL_REASONING_MODEL_LIST_LEGACY = [
diff --git a/autotest/utils/tool_reasoning_definitions.py b/autotest/utils/tool_reasoning_definitions.py
index df4fa3ba04..a6a886605b 100644
--- a/autotest/utils/tool_reasoning_definitions.py
+++ b/autotest/utils/tool_reasoning_definitions.py
@@ -3,11 +3,7 @@
 import re
 
 from openai import OpenAI
-from utils.constant import DEFAULT_PORT
-
-BASE_HTTP_URL = f"http://{os.getenv('MASTER_ADDR', 'localhost')}"
-PORT = os.getenv('LMDEPLOY_PORT', str(DEFAULT_PORT))
-BASE_URL = f'{BASE_HTTP_URL}:{PORT}'
+from utils.constant import BASE_URL
 
 #: Think-tag delimiters used by DeepSeek-R1 and QwenQwQ parsers
 THINK_START_TOKEN = '<think>'
@@ -61,6 +57,22 @@
     },
 }
 
+# Anthropic ``tools[]`` entry: single ``location`` argument (Messages API style).
+WEATHER_TOOL_SINGLE_LOCATION_ANTHROPIC = {
+    'name': 'get_current_weather',
+    'description': 'Useful for querying the weather in a specified city.',
+    'input_schema': {
+        'type': 'object',
+        'properties': {
+            'location': {
+                'type': 'string',
+                'description': 'City or region, for example: New York, London, Tokyo, etc.',
+            },
+        },
+        'required': ['location'],
+    },
+}
+
 CALCULATOR_TOOL = {
     'type': 'function',
     'function': {
@@ -191,6 +203,64 @@ def get_client_and_model(base_url=None):
     return client, model_name
 
 
+def openai_function_tool_to_anthropic(openai_style_tool: dict) -> dict:
+    """Convert OpenAI ``{'type':'function','function':{...}}`` to Anthropic
+    ``tools[]`` item."""
+
+    fn = openai_style_tool['function']
+    return {
+        'name': fn['name'],
+        'description': fn.get('description') or '',
+        'input_schema': fn['parameters'],
+    }
+
+
+def openai_chat_messages_to_anthropic_kwargs(messages: list[dict]) -> dict:
+    """Split OpenAI-style *messages* into Anthropic ``system`` plus
+    ``messages`` kwargs."""
+
+    system_chunks: list[str] = []
+    out: list[dict] = []
+    for m in messages:
+        role = m['role']
+        content = m['content']
+        if role == 'system':
+            if not isinstance(content, str):
+                raise TypeError('Anthropic path expects string system message content.')
+            system_chunks.append(content)
+        elif role in ('user', 'assistant'):
+            out.append({'role': role, 'content': content})
+        else:
+            raise ValueError(f'Unsupported message role for Anthropic: {role!r}')
+    kwargs: dict = {'messages': out}
+    if system_chunks:
+        kwargs['system'] = '\n\n'.join(system_chunks)
+    return kwargs
+
+
+def get_async_anthropic_client_and_model(base_url: str | None = None):
+    """Return ``(AsyncAnthropic, model_name)`` for LMDeploy (Anthropic routes
+    on server root)."""
+
+    import anthropic
+
+    from lmdeploy.serve.openai.api_client import get_model_list
+
+    url = base_url or BASE_URL
+    model_names = get_model_list(f'{url}/v1/models')
+    if not model_names:
+        raise RuntimeError(f'No models returned from {url}/v1/models')
+    model_name = model_names[0]
+    client = anthropic.AsyncAnthropic(
+        api_key=os.getenv('ANTHROPIC_API_KEY', 'YOUR_API_KEY'),
+        base_url=url,
+        max_retries=0,
+        timeout=600.0,
+        default_headers={'anthropic-version': '2023-06-01'},
+    )
+    return client, model_name
+
+
 # -- Logging / client helpers ------------------------------------------------
 
 
@@ -388,7 +458,9 @@ def collect_stream_reasoning(stream):
         finish_reason       – last non-None finish_reason
         finish_reason_count – how many chunks carried a non-None finish_reason
         role                – first non-None role value
-        role_count          – how many chunks carried a non-None role
+        role_count          – number of *distinct* role values in stream order;
+                              consecutive chunks repeating the same ``delta.role``
+                              count once (some LMDeploy backends resend ``role`` every chunk)
         chunk_count         – total number of chunks received
         reasoning_chunks    – number of chunks containing reasoning
         content_chunks      – number of chunks containing content
@@ -406,6 +478,7 @@ def collect_stream_reasoning(stream):
         'content_chunks': 0,
     }
 
+    last_distinct_role = None
     for chunk in stream:
         result['chunk_count'] += 1
         if not chunk.choices:
@@ -418,8 +491,11 @@ def collect_stream_reasoning(stream):
 
         delta = choice.delta
         if delta.role:
-            result['role'] = delta.role
-            result['role_count'] += 1
+            if result['role'] is None:
+                result['role'] = delta.role
+            if last_distinct_role != delta.role:
+                result['role_count'] += 1
+                last_distinct_role = delta.role
 
         # -- reasoning_content (lmdeploy extension field) -------------------
         rc = getattr(delta, 'reasoning_content', None)
diff --git a/requirements/test.txt b/requirements/test.txt
index 6061aaafde..3580dfc179 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -1,4 +1,5 @@
 allure-pytest
+anthropic>=0.39.0
 coverage
 jsonschema
 matplotlib