Skip to content

Commit ae1f5b8

Browse files
yychyoArberSephirotheca
authored andcommitted
server: rename --clear-idle to --cache-idle-slots (ggml-org#21741)
1 parent 251c899 commit ae1f5b8

6 files changed

Lines changed: 16 additions & 16 deletions

File tree

common/arg.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,13 +1316,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
13161316
}
13171317
).set_env("LLAMA_ARG_KV_UNIFIED").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_PERPLEXITY, LLAMA_EXAMPLE_BATCHED, LLAMA_EXAMPLE_BENCH, LLAMA_EXAMPLE_PARALLEL}));
13181318
add_opt(common_arg(
1319-
{"--clear-idle"},
1320-
{"--no-clear-idle"},
1319+
{"--cache-idle-slots"},
1320+
{"--no-cache-idle-slots"},
13211321
"save and clear idle slots on new task (default: enabled, requires unified KV and cache-ram)",
13221322
[](common_params & params, bool value) {
1323-
params.clear_idle = value;
1323+
params.cache_idle_slots = value;
13241324
}
1325-
).set_env("LLAMA_ARG_CLEAR_IDLE").set_examples({LLAMA_EXAMPLE_SERVER}));
1325+
).set_env("LLAMA_ARG_CACHE_IDLE_SLOTS").set_examples({LLAMA_EXAMPLE_SERVER}));
13261326
add_opt(common_arg(
13271327
{"--context-shift"},
13281328
{"--no-context-shift"},

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ struct common_params {
567567
int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
568568
int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting
569569
bool cache_prompt = true; // whether to enable prompt caching
570-
bool clear_idle = true; // save and clear idle slots upon starting a new task
570+
bool cache_idle_slots = true; // save and clear idle slots upon starting a new task
571571
int32_t n_ctx_checkpoints = 32; // max number of context checkpoints per slot
572572
int32_t checkpoint_every_nt = 8192; // make a checkpoint every n tokens during prefill
573573
int32_t cache_ram_mib = 8192; // -1 = no limit, 0 - disable, 1 = 1 MiB, etc.

tools/server/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ For the full list of features, please refer to [server's changelog](https://gith
167167
| `-cpent, --checkpoint-every-n-tokens N` | create a checkpoint every n tokens during prefill (processing), -1 to disable (default: 8192)<br/>(env: LLAMA_ARG_CHECKPOINT_EVERY_NT) |
168168
| `-cram, --cache-ram N` | set the maximum cache size in MiB (default: 8192, -1 - no limit, 0 - disable)[(more info)](https://github.com/ggml-org/llama.cpp/pull/16391)<br/>(env: LLAMA_ARG_CACHE_RAM) |
169169
| `-kvu, --kv-unified, -no-kvu, --no-kv-unified` | use single unified KV buffer shared across all sequences (default: enabled if number of slots is auto)<br/>(env: LLAMA_ARG_KV_UNIFIED) |
170-
| `--clear-idle, --no-clear-idle` | save and clear idle slots on new task (default: enabled, requires unified KV and cache-ram)<br/>(env: LLAMA_ARG_CLEAR_IDLE) |
170+
| `--cache-idle-slots, --no-cache-idle-slots` | save and clear idle slots on new task (default: enabled, requires unified KV and cache-ram)<br/>(env: LLAMA_ARG_CACHE_IDLE_SLOTS) |
171171
| `--context-shift, --no-context-shift` | whether to use context shift on infinite text generation (default: disabled)<br/>(env: LLAMA_ARG_CONTEXT_SHIFT) |
172172
| `-r, --reverse-prompt PROMPT` | halt generation at PROMPT, return control in interactive mode |
173173
| `-sp, --special` | special tokens output enabled (default: false) |

tools/server/server-context.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -987,13 +987,13 @@ struct server_context_impl {
987987

988988
metrics.init();
989989

990-
if (params_base.clear_idle) {
990+
if (params_base.cache_idle_slots) {
991991
if (!params_base.kv_unified) {
992-
SRV_WRN("%s: --clear-idle requires --kv-unified, disabling\n", __func__);
993-
params_base.clear_idle = false;
992+
SRV_WRN("%s: --cache-idle-slots requires --kv-unified, disabling\n", __func__);
993+
params_base.cache_idle_slots = false;
994994
} else if (params_base.cache_ram_mib == 0) {
995-
SRV_WRN("%s: --clear-idle requires --cache-ram, disabling\n", __func__);
996-
params_base.clear_idle = false;
995+
SRV_WRN("%s: --cache-idle-slots requires --cache-ram, disabling\n", __func__);
996+
params_base.cache_idle_slots = false;
997997
} else {
998998
SRV_INF("%s: idle slots will be saved to prompt cache and cleared upon starting a new task\n", __func__);
999999
SRV_DBG("%s", "__TEST_TAG_CLEAR_IDLE_ENABLED__\n");
@@ -1886,7 +1886,7 @@ struct server_context_impl {
18861886
break; // drop the task
18871887
}
18881888

1889-
if (params_base.clear_idle) {
1889+
if (params_base.cache_idle_slots) {
18901890
for (auto & s : slots) {
18911891
if (!s.is_processing()) {
18921892
slot_save_and_clear(s);

tools/server/tests/unit/test_kv_keep_only_active.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def test_clear_and_restore():
9191

9292
def test_disabled_with_flag():
9393
global server
94-
server.no_clear_idle = True
94+
server.no_cache_idle_slots = True
9595
server.start()
9696
log = LogReader(server.log_path)
9797

tools/server/tests/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ class ServerProcess:
103103
media_path: str | None = None
104104
sleep_idle_seconds: int | None = None
105105
cache_ram: int | None = None
106-
no_clear_idle: bool = False
106+
no_cache_idle_slots: bool = False
107107
log_path: str | None = None
108108
webui_mcp_proxy: bool = False
109109

@@ -242,8 +242,8 @@ def start(self, timeout_seconds: int = DEFAULT_HTTP_TIMEOUT) -> None:
242242
server_args.extend(["--sleep-idle-seconds", self.sleep_idle_seconds])
243243
if self.cache_ram is not None:
244244
server_args.extend(["--cache-ram", self.cache_ram])
245-
if self.no_clear_idle:
246-
server_args.append("--no-clear-idle")
245+
if self.no_cache_idle_slots:
246+
server_args.append("--no-cache-idle-slots")
247247
if self.webui_mcp_proxy:
248248
server_args.append("--webui-mcp-proxy")
249249

0 commit comments

Comments
 (0)