Skip to content

Commit e6e24ff

Browse files
authored
server: Enable transcriptions API for LFM2-Audio (ggml-org#22000)
1 parent d313b6b commit e6e24ff

5 files changed

Lines changed: 43 additions & 14 deletions

File tree

common/chat.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,26 @@ bool common_chat_templates_was_explicit(const struct common_chat_templates * tmp
544544
return tmpls->has_explicit_template;
545545
}
546546

547+
// LFM2 format detection: template uses <|tool_list_start|>[...]<|tool_list_end|> around the tool list
548+
// and <|tool_call_start|>[...]<|tool_call_end|> around each tool call
549+
static bool is_lfm2_template(const std::string & src) {
550+
return src.find("<|tool_list_start|>") != std::string::npos &&
551+
src.find("<|tool_list_end|>") != std::string::npos;
552+
}
553+
554+
common_chat_prompt_preset common_chat_get_asr_prompt(const common_chat_templates * chat_templates) {
555+
common_chat_prompt_preset asr_preset;
556+
asr_preset.system = "";
557+
asr_preset.user = "Transcribe audio to text";
558+
559+
if (chat_templates && chat_templates->template_default && is_lfm2_template(chat_templates->template_default->source())) {
560+
asr_preset.system = "Perform ASR.";
561+
asr_preset.user = "";
562+
}
563+
564+
return asr_preset;
565+
}
566+
547567
std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant) {
548568
if (!variant.empty()) {
549569
if (variant == "tool_use") {
@@ -2053,10 +2073,7 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
20532073
return common_chat_params_init_kimi_k2(tmpl, params);
20542074
}
20552075

2056-
// LFM2 format detection: template uses <|tool_list_start|>[...]<|tool_list_end|> around the tool list
2057-
// and <|tool_call_start|>[...]<|tool_call_end|> around each tool call
2058-
if (src.find("<|tool_list_start|>") != std::string::npos &&
2059-
src.find("<|tool_list_end|>") != std::string::npos) {
2076+
if (is_lfm2_template(src)) {
20602077
LOG_DBG("Using specialized template: LFM2\n");
20612078
return common_chat_params_init_lfm2(tmpl, params);
20622079
}
@@ -2365,4 +2382,3 @@ std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_tem
23652382
GGML_ASSERT(chat_templates->template_default != nullptr);
23662383
return chat_templates->template_default->caps.to_map();
23672384
}
2368-

common/chat.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,3 +274,11 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
274274
const common_chat_template & tmpl,
275275
const std::string & src,
276276
autoparser::generation_params & params);
277+
278+
// specialized per-task preset
279+
struct common_chat_prompt_preset {
280+
std::string system;
281+
std::string user;
282+
};
283+
284+
common_chat_prompt_preset common_chat_get_asr_prompt(const common_chat_templates * chat_templates);

tools/server/server-chat.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,7 @@ json server_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
535535

536536
json convert_transcriptions_to_chatcmpl(
537537
const json & inp_body,
538+
const common_chat_templates * tmpls,
538539
const std::map<std::string, raw_buffer> & in_files,
539540
std::vector<raw_buffer> & out_files) {
540541
// TODO @ngxson : this function may need to be improved in the future
@@ -548,27 +549,29 @@ json convert_transcriptions_to_chatcmpl(
548549
}
549550

550551
// handle input data
551-
std::string prompt = json_value(inp_body, "prompt", std::string());
552-
std::string language = json_value(inp_body, "language", std::string());
552+
std::string prompt = json_value(inp_body, "prompt", std::string());
553+
std::string language = json_value(inp_body, "language", std::string());
553554
std::string response_format = json_value(inp_body, "response_format", std::string("json"));
554555
if (response_format != "json") {
555556
throw std::invalid_argument("Only 'json' response_format is supported for transcription");
556557
}
558+
const common_chat_prompt_preset preset = common_chat_get_asr_prompt(tmpls);
557559
if (prompt.empty()) {
558-
prompt = "Transcribe audio to text";
560+
prompt = preset.user;
559561
}
560562
if (!language.empty()) {
561563
prompt += string_format(" (language: %s)", language.c_str());
562564
}
563565
prompt += get_media_marker();
564566

567+
json messages = json::array();
568+
if (!preset.system.empty()) {
569+
messages.push_back({{"role", "system"}, {"content", preset.system}});
570+
}
571+
messages.push_back({{"role", "user"}, {"content", prompt}});
572+
565573
json chatcmpl_body = inp_body; // copy all fields
566-
chatcmpl_body["messages"] = json::array({
567-
{
568-
{"role", "user"},
569-
{"content", prompt},
570-
},
571-
});
574+
chatcmpl_body["messages"] = messages;
572575

573576
// because input from form-data, everything is string, we need to correct the types here
574577
std::string stream = json_value(inp_body, "stream", std::string("false"));

tools/server/server-chat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ json server_chat_convert_anthropic_to_oai(const json & body);
1818
// convert OpenAI transcriptions API format to OpenAI Chat Completions API format
1919
json convert_transcriptions_to_chatcmpl(
2020
const json & body,
21+
const common_chat_templates * tmpls,
2122
const std::map<std::string, raw_buffer> & in_files,
2223
std::vector<raw_buffer> & out_files);
2324

tools/server/server-context.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3807,6 +3807,7 @@ void server_routes::init_routes() {
38073807
std::vector<raw_buffer> files;
38083808
json body = convert_transcriptions_to_chatcmpl(
38093809
json::parse(req.body),
3810+
meta->chat_params.tmpls.get(),
38103811
req.files,
38113812
files);
38123813
SRV_DBG("%s\n", "Request converted: OpenAI Transcriptions -> OpenAI Chat Completions");

0 commit comments

Comments
 (0)