Skip to content

Commit 4c104f9

Browse files
committed
feat(server): add Anthropic Messages API endpoint (/v1/messages)
Enables Claude Code and other Anthropic-compatible tools to use ATOM as a backend. Translates between Anthropic Messages format and ATOM's internal OpenAI format. Supports: - Non-streaming and streaming responses - System messages, multi-turn conversations - Thinking/reasoning content separation (via ReasoningFilter) - Anthropic SSE event format (message_start, content_block_delta, etc.) - Tool definitions translation (Anthropic → OpenAI format) Usage with Claude Code: ANTHROPIC_BASE_URL=http://localhost:8000 \ ANTHROPIC_AUTH_TOKEN=dummy \ ANTHROPIC_MODEL=MiniMax-M2.7 \ claude
1 parent a8f75b8 commit 4c104f9

3 files changed

Lines changed: 776 additions & 0 deletions

File tree

atom/entrypoints/openai/api_server.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,18 @@
4444
stream_chat_response,
4545
stream_chat_response_fanout,
4646
)
47+
from .serving_anthropic import (
48+
AnthropicMessagesRequest,
49+
anthropic_to_openai_messages,
50+
anthropic_to_openai_tools,
51+
build_anthropic_response,
52+
stream_content_block_delta,
53+
stream_content_block_start,
54+
stream_content_block_stop,
55+
stream_message_delta,
56+
stream_message_start,
57+
stream_message_stop,
58+
)
4759
from .serving_completion import (
4860
build_completion_response,
4961
build_completion_response_multi,
@@ -773,6 +785,146 @@ async def completions(request: CompletionRequest):
773785
raise HTTPException(status_code=500, detail=str(e))
774786

775787

788+
@app.post("/v1/messages")
789+
async def anthropic_messages(request: AnthropicMessagesRequest, raw_request: Request):
790+
"""Handle Anthropic Messages API requests.
791+
792+
Translates Anthropic format to OpenAI format internally, runs inference,
793+
and returns Anthropic-formatted responses. Enables Claude Code and other
794+
Anthropic-compatible tools to use ATOM as a backend.
795+
"""
796+
global engine, tokenizer, model_name
797+
798+
try:
799+
# Convert Anthropic messages to OpenAI format
800+
openai_messages = anthropic_to_openai_messages(request.messages, request.system)
801+
802+
# Apply chat template
803+
from .protocol import ChatMessage
804+
805+
messages = [ChatMessage(**m) for m in openai_messages]
806+
807+
merged_kwargs = dict(default_chat_template_kwargs)
808+
prompt = apply_chat_template(
809+
tokenizer,
810+
custom_message_encoder,
811+
[msg.to_template_dict() for msg in messages],
812+
tools=None,
813+
**merged_kwargs,
814+
)
815+
816+
sampling_params = _build_sampling_params(
817+
temperature=request.temperature or 1.0,
818+
max_tokens=request.max_tokens,
819+
stop_strings=request.stop_sequences,
820+
top_k=request.top_k,
821+
top_p=request.top_p,
822+
)
823+
824+
request_id = uuid.uuid4().hex[:24]
825+
input_tokens = len(tokenizer.encode(prompt))
826+
827+
if request.stream:
828+
# Streaming response
829+
seq_id, stream_queue = await setup_streaming_request(
830+
prompt, sampling_params, request_id
831+
)
832+
833+
async def generate_anthropic_stream():
834+
from .reasoning import ReasoningFilter
835+
836+
reasoning_filter = ReasoningFilter()
837+
block_index = 0
838+
started_text = False
839+
started_thinking = False
840+
output_tokens = 0
841+
842+
yield stream_message_start(request_id, model_name, input_tokens)
843+
844+
try:
845+
while True:
846+
chunk_data = await stream_queue.get()
847+
new_text = chunk_data["text"]
848+
output_tokens += len(chunk_data.get("token_ids", []))
849+
finished = chunk_data.get("finished", False)
850+
851+
segments = reasoning_filter.process(new_text)
852+
if finished:
853+
segments.extend(reasoning_filter.flush())
854+
855+
for field, text in segments:
856+
if not text:
857+
continue
858+
859+
if field == "reasoning_content":
860+
if not started_thinking:
861+
yield stream_content_block_start(block_index, "thinking")
862+
started_thinking = True
863+
yield stream_content_block_delta(block_index, text, "thinking")
864+
else:
865+
if started_thinking and not started_text:
866+
yield stream_content_block_stop(block_index)
867+
block_index += 1
868+
if not started_text:
869+
yield stream_content_block_start(block_index, "text")
870+
started_text = True
871+
yield stream_content_block_delta(block_index, text, "text")
872+
873+
if finished:
874+
if started_thinking and not started_text:
875+
yield stream_content_block_stop(block_index)
876+
block_index += 1
877+
yield stream_content_block_start(block_index, "text")
878+
yield stream_content_block_delta(block_index, "", "text")
879+
yield stream_content_block_stop(block_index)
880+
yield stream_message_delta("end_turn", output_tokens)
881+
yield stream_message_stop()
882+
break
883+
finally:
884+
cleanup_streaming_request(seq_id)
885+
886+
return StreamingResponse(
887+
generate_anthropic_stream(),
888+
media_type="text/event-stream",
889+
headers={
890+
"anthropic-version": "2023-06-01",
891+
"x-request-id": request_id,
892+
},
893+
)
894+
895+
# Non-streaming response
896+
from .reasoning import separate_reasoning
897+
898+
final_output = None
899+
async for output in generate_async(prompt, sampling_params, request_id):
900+
final_output = output
901+
if final_output is None:
902+
raise RuntimeError("No output generated")
903+
904+
raw_text = final_output["text"]
905+
reasoning_content, content = separate_reasoning(raw_text)
906+
output_tokens = len(tokenizer.encode(raw_text))
907+
908+
return build_anthropic_response(
909+
request_id=request_id,
910+
model=model_name,
911+
content_text=content,
912+
reasoning_content=reasoning_content,
913+
input_tokens=input_tokens,
914+
output_tokens=output_tokens,
915+
)
916+
917+
except Exception as e:
918+
logger.error(f"Error in anthropic_messages: {e}", exc_info=True)
919+
return JSONResponse(
920+
status_code=500,
921+
content={
922+
"type": "error",
923+
"error": {"type": "api_error", "message": str(e)},
924+
},
925+
)
926+
927+
776928
@app.get("/v1/models")
777929
async def list_models():
778930
"""List available models."""

0 commit comments

Comments
 (0)