Skip to content

Commit 2f6513e

Browse files
taronaeoiamwavecut
authored andcommitted
llama-server: fix model params not propagated (ggml-org#21509)
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
1 parent cf900a1 commit 2f6513e

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

tools/server/server-context.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ struct server_context_impl {
632632

633633
// load the model and initialize llama_context
634634
// this may also be called to resume from sleeping state
635-
bool load_model(const common_params & params) {
635+
bool load_model(common_params & params) {
636636
bool is_resume = sleeping;
637637

638638
SRV_INF("loading model '%s'\n", params.model.path.c_str());
@@ -641,6 +641,9 @@ struct server_context_impl {
641641

642642
llama_init = common_init_from_params(params_base);
643643

644+
// propagate model-metadata sampling defaults back to caller
645+
params.sampling = params_base.sampling;
646+
644647
model = llama_init->model();
645648
ctx = llama_init->context();
646649

@@ -2978,7 +2981,7 @@ struct server_context_impl {
29782981
server_context::server_context() : impl(new server_context_impl()) {}
29792982
server_context::~server_context() = default;
29802983

2981-
bool server_context::load_model(const common_params & params) {
2984+
bool server_context::load_model(common_params & params) {
29822985
return impl->load_model(params);
29832986
}
29842987

tools/server/server-context.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ struct server_context {
5656

5757
// load the model and initialize llama_context
5858
// returns true on success
59-
bool load_model(const common_params & params);
59+
bool load_model(common_params & params);
6060

6161
// this function will block main thread until termination
6262
void start_loop();

0 commit comments

Comments
 (0)