-
Notifications
You must be signed in to change notification settings - Fork 1k
Expand file tree
/
Copy pathtext_llm_worker.cpp
More file actions
62 lines (51 loc) · 2.32 KB
/
Copy pathtext_llm_worker.cpp
File metadata and controls
62 lines (51 loc) · 2.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
// Generic model-execution worker for standard .pte TextLLM models.
//
// All model execution lives here in C++ (via TextLLMEngine / TextLLMSession,
// the stable serving abstraction) — no Python model code, no pybind, no
// in-process Python serving. The OpenAI control plane (Python) spawns this
// process and drives it over JSONL on stdin/stdout (see worker_client.py). The
// JSONL protocol, session management, and the decode loop are shared across all
// workers in worker_loop.h; this file only constructs the engine/tokenizer.
// TextLLMEngine hosts a single session, so the worker serves anonymous requests
// via the shared loop's scratch session and reports no named sessions.
#include <gflags/gflags.h>
#include <executorch/extension/llm/runner/llm_runner_helper.h>
#include <executorch/extension/llm/server/cpp/worker_loop.h>
#include <executorch/runtime/platform/log.h>
#include <optional>
DEFINE_string(model_path, "", "Self-contained model .pte file path.");
DEFINE_string(tokenizer_path, "", "HuggingFace tokenizer.json path.");
namespace {
namespace llm = ::executorch::extension::llm;
} // namespace
int main(int argc, char** argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_path.empty() || FLAGS_tokenizer_path.empty()) {
ET_LOG(
Error, "text_llm_worker: --model_path and --tokenizer_path required");
return 1;
}
// TextLLMEngine requires a self-contained .pte: external .ptd weights are not
// supported for shared sessions (a model-specific worker handles that path).
auto engine = llm::TextLLMEngine::create(
FLAGS_model_path, FLAGS_tokenizer_path, std::nullopt);
if (!engine) {
ET_LOG(Error, "text_llm_worker: failed to create engine");
return 1;
}
// The session decodes token ids to text internally; this tokenizer encodes
// the rendered prompt to ids. Same tokenizer.json -> same vocabulary.
auto tokenizer = llm::load_tokenizer(FLAGS_tokenizer_path);
if (!tokenizer) {
ET_LOG(Error, "text_llm_worker: failed to load tokenizer");
return 1;
}
return llm::run_worker_stdio_loop(*engine, *tokenizer, engine->metadata());
}