@@ -622,10 +622,6 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
622622 for (auto & seq_breaker : params.sampling .dry_sequence_breakers ) {
623623 string_process_escapes (seq_breaker);
624624 }
625- for (auto & pair : params.speculative .draft .replacements ) {
626- string_process_escapes (pair.first );
627- string_process_escapes (pair.second );
628- }
629625 }
630626
631627 if (!params.kv_overrides .empty ()) {
@@ -3518,13 +3514,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
35183514 params.speculative .draft .p_min = std::stof (value);
35193515 }
35203516 ).set_spec ().set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env (" LLAMA_ARG_SPEC_DRAFT_P_MIN" ));
3521- add_opt (common_arg (
3522- {" --spec-draft-ctx-size" , " -cd" , " --ctx-size-draft" }, " N" ,
3523- string_format (" size of the prompt context for the draft model (default: %d, 0 = loaded from model)" , params.speculative .draft .n_ctx ),
3524- [](common_params & params, int value) {
3525- params.speculative .draft .n_ctx = value;
3526- }
3527- ).set_spec ().set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env (" LLAMA_ARG_SPEC_DRAFT_CTX_SIZE" ));
35283517 add_opt (common_arg (
35293518 {" --spec-draft-device" , " -devd" , " --device-draft" }, " <dev1,dev2,..>" ,
35303519 " comma-separated list of devices to use for offloading the draft model (none = don't offload)\n "
@@ -3561,32 +3550,12 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
35613550 }
35623551 ).set_spec ().set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env (" LLAMA_ARG_SPEC_DRAFT_MODEL" ));
35633552 add_opt (common_arg (
3564- {" --spec-draft-replace" , " --spec-replace" }, " TARGET" , " DRAFT" ,
3565- " translate the string in TARGET into DRAFT if the draft model and main model are not compatible" ,
3566- [](common_params & params, const std::string & tgt, const std::string & dft) {
3567- params.speculative .draft .replacements .push_back ({ tgt, dft });
3568- }
3569- ).set_spec ().set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}));
3570- add_opt (common_arg (
3571- {" --spec-type" }, " [none|ngram-cache|ngram-simple|ngram-map-k|ngram-map-k4v|ngram-mod]" ,
3553+ {" --spec-type" }, common_speculative_all_types_str (),
35723554 string_format (" type of speculative decoding to use when no draft model is provided (default: %s)\n " ,
3573- common_speculative_type_to_str (params.speculative .type ).c_str ()),
3555+ common_speculative_type_name_str (params.speculative .types ).c_str ()),
35743556 [](common_params & params, const std::string & value) {
3575- if (value == " none" ) {
3576- params.speculative .type = COMMON_SPECULATIVE_TYPE_NONE;
3577- } else if (value == " ngram-cache" ) {
3578- params.speculative .type = COMMON_SPECULATIVE_TYPE_NGRAM_CACHE;
3579- } else if (value == " ngram-simple" ) {
3580- params.speculative .type = COMMON_SPECULATIVE_TYPE_NGRAM_SIMPLE;
3581- } else if (value == " ngram-map-k" ) {
3582- params.speculative .type = COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K;
3583- } else if (value == " ngram-map-k4v" ) {
3584- params.speculative .type = COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K4V;
3585- } else if (value == " ngram-mod" ) {
3586- params.speculative .type = COMMON_SPECULATIVE_TYPE_NGRAM_MOD;
3587- } else {
3588- throw std::invalid_argument (" unknown speculative decoding type without draft model" );
3589- }
3557+ const auto enabled_types = string_split<std::string>(value, ' ,' );
3558+ params.speculative .types = common_speculative_types_from_names (enabled_types);
35903559 }
35913560 ).set_spec ().set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env (" LLAMA_ARG_SPEC_TYPE" ));
35923561 add_opt (common_arg (
@@ -4075,7 +4044,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
40754044 {" --spec-default" },
40764045 string_format (" enable default speculative decoding config" ),
40774046 [](common_params & params) {
4078- params.speculative .type = COMMON_SPECULATIVE_TYPE_NGRAM_MOD;
4047+ params.speculative .types = { COMMON_SPECULATIVE_TYPE_NGRAM_MOD } ;
40794048 params.speculative .ngram_mod .n_match = 24 ;
40804049 params.speculative .ngram_mod .n_min = 48 ;
40814050 params.speculative .ngram_mod .n_max = 64 ;
0 commit comments