Skip to content

Commit aa5f79f

Browse files
author
Sayan Shaw
committed
initial harmony changes
1 parent aad7b67 commit aa5f79f

2 files changed

Lines changed: 457 additions & 7 deletions

File tree

shared/api/chat_template.cc

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -413,28 +413,57 @@ OrtxStatus TokenizerImpl::ApplyChatTemplate(const char* template_str, const char
413413
// Check Phi-4-mini tool call case for quote normalization
414414
bool phi_4_mini = false;
415415

416+
// Determine whether to skip tool normalization based on template content.
417+
// GPT-OSS/Harmony templates access `tool.function` directly (they expect the raw OpenAI format),
418+
// so NormalizeTools() would break them by unwrapping the function object.
419+
// Other templates (Phi-4, Qwen) either use a flat format or access `tool_call.function`.
420+
bool skip_tool_normalization = false;
421+
{
422+
std::string tmpl_str(activated_str);
423+
// Look for "tool.function" but exclude "tool_call.function" matches
424+
size_t pos = 0;
425+
while ((pos = tmpl_str.find("tool.function", pos)) != std::string::npos) {
426+
// Check that this isn't part of "tool_call.function"
427+
if (pos < 5 || tmpl_str.substr(pos - 5, 5) != "call.") {
428+
skip_tool_normalization = true;
429+
break;
430+
}
431+
pos += 13;
432+
}
433+
}
434+
416435
// Case 1: Check if tools are inside messages (for Phi-4-mini)
417436
if (actual_messages.is_array()) {
418437
for (auto& message_obj : actual_messages) {
419438
if (message_obj.contains("tools")) {
420439
// Set flag for Phi-4 tools to true
421440
phi_4_mini = true;
422441

423-
// Normalize the tools inside the message
424-
json tools_json = NormalizeTools(message_obj["tools"].get<std::string>().c_str());
425-
426-
// Update the tools in the message
427-
message_obj["tools"] = tools_json;
442+
if (skip_tool_normalization) {
443+
// GPT-OSS/Harmony: parse tools as-is without normalization
444+
json tools_json = json::parse(message_obj["tools"].get<std::string>().c_str());
445+
message_obj["tools"] = tools_json;
446+
} else {
447+
// Normalize the tools inside the message
448+
json tools_json = NormalizeTools(message_obj["tools"].get<std::string>().c_str());
449+
message_obj["tools"] = tools_json;
450+
}
428451
}
429452
}
430453
}
431454

432455
// Case 2: Check if we received tools separately (for Qwen or others)
433456
if (tools && *tools) {
434457
std::string tools_str = minja::normalize_newlines(tools);
435-
json tools_json = NormalizeTools(tools_str.c_str());
458+
json tools_json;
459+
if (skip_tool_normalization) {
460+
// GPT-OSS/Harmony: pass raw tools without normalization
461+
tools_json = json::parse(tools_str.c_str());
462+
} else {
463+
tools_json = NormalizeTools(tools_str.c_str());
464+
}
436465

437-
// Add normalized tools to the context if tools are passed separately
466+
// Add tools to the context
438467
context = minja::Context::make(json({
439468
{"messages", actual_messages},
440469
{"tools", tools_json},

0 commit comments

Comments
 (0)