Skip to content

Commit f18db3e

Browse files
committed
cli: add option to connect to server via http(s)
1 parent c3c1505 commit f18db3e

6 files changed

Lines changed: 774 additions & 107 deletions

File tree

common/arg.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -600,9 +600,9 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
600600
common_params_handle_model(params.vocoder.model, params.hf_token, params.offline);
601601
}
602602

603-
// model is required (except for server)
603+
// model is required (except for server, or when using --endpoint in CLI)
604604
// TODO @ngxson : maybe show a list of available models in CLI in this case
605-
if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER && !skip_model_download && !params.usage && !params.completion) {
605+
if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER && !skip_model_download && !params.usage && !params.completion && params.endpoint.empty()) {
606606
throw std::invalid_argument("error: --model is required\n");
607607
}
608608

@@ -1398,6 +1398,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
13981398
params.show_timings = value;
13991399
}
14001400
).set_examples({LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_SHOW_TIMINGS"));
1401+
add_opt(common_arg(
1402+
{"--endpoint"}, "URL",
1403+
string_format("connect to a running llama-server at URL instead of loading a model locally (e.g. http://localhost:8080)"),
1404+
[](common_params & params, const std::string & value) {
1405+
params.endpoint = value;
1406+
}
1407+
).set_examples({LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_ENDPOINT"));
14011408
add_opt(common_arg(
14021409
{"-f", "--file"}, "FNAME",
14031410
"a file containing the prompt (default: none)",

common/common.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,10 @@ struct common_params {
555555

556556
bool single_turn = false; // single turn chat conversation
557557

558+
// remote server endpoint for CLI (e.g. "http://localhost:8080")
559+
// when set, CLI connects to a running server instead of loading a model
560+
std::string endpoint = ""; // NOLINT
561+
558562
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
559563
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
560564

tools/cli/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
set(TARGET llama-cli)
2-
add_executable(${TARGET} cli.cpp)
3-
target_link_libraries(${TARGET} PRIVATE server-context PUBLIC llama-common ${CMAKE_THREAD_LIBS_INIT})
2+
add_executable(${TARGET} cli.cpp cli-remote.cpp)
3+
target_link_libraries(${TARGET} PRIVATE server-context cpp-httplib PUBLIC llama-common ${CMAKE_THREAD_LIBS_INIT})
44
target_compile_features(${TARGET} PRIVATE cxx_std_17)
55

66
include_directories(../server)

0 commit comments

Comments
 (0)