@@ -108,12 +108,12 @@ static void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
108108 }
109109}
110110
111- std::string addon_model_token_to_piece (const struct llama_model * model, llama_token token) {
111+ std::string addon_model_token_to_piece (const struct llama_model * model, llama_token token, bool specialTokens ) {
112112 std::vector<char > result (8 , 0 );
113- const int n_tokens = llama_token_to_piece (model, token, result.data (), result.size ());
113+ const int n_tokens = llama_token_to_piece (model, token, result.data (), result.size (), specialTokens );
114114 if (n_tokens < 0 ) {
115115 result.resize (-n_tokens);
116- int check = llama_token_to_piece (model, token, result.data (), result.size ());
116+ int check = llama_token_to_piece (model, token, result.data (), result.size (), specialTokens );
117117 GGML_ASSERT (check == -n_tokens);
118118 } else {
119119 result.resize (n_tokens);
@@ -378,13 +378,16 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
378378 }
379379
380380 Napi::Uint32Array tokens = info[0 ].As <Napi::Uint32Array>();
381+ bool decodeSpecialTokens = info.Length () > 0
382+ ? info[1 ].As <Napi::Boolean>().Value ()
383+ : false ;
381384
382385 // Create a stringstream for accumulating the decoded string.
383386 std::stringstream ss;
384387
385388 // Decode each token and accumulate the result.
386389 for (size_t i = 0 ; i < tokens.ElementLength (); i++) {
387- const std::string piece = addon_model_token_to_piece (model, (llama_token)tokens[i]);
390+ const std::string piece = addon_model_token_to_piece (model, (llama_token)tokens[i], decodeSpecialTokens );
388391
389392 if (piece.empty ()) {
390393 continue ;
@@ -534,6 +537,20 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
534537
535538 return Napi::Number::From (info.Env (), int32_t (tokenType));
536539 }
540+ Napi::Value IsEogToken (const Napi::CallbackInfo& info) {
541+ if (disposed) {
542+ Napi::Error::New (info.Env (), " Model is disposed" ).ThrowAsJavaScriptException ();
543+ return info.Env ().Undefined ();
544+ }
545+
546+ if (info[0 ].IsNumber () == false ) {
547+ return Napi::Boolean::New (info.Env (), false );
548+ }
549+
550+ int token = info[0 ].As <Napi::Number>().Int32Value ();
551+
552+ return Napi::Boolean::New (info.Env (), llama_token_is_eog (model, token));
553+ }
537554 Napi::Value GetVocabularyType (const Napi::CallbackInfo& info) {
538555 if (disposed) {
539556 Napi::Error::New (info.Env (), " Model is disposed" ).ThrowAsJavaScriptException ();
@@ -581,6 +598,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
581598 InstanceMethod (" eotToken" , &AddonModel::EotToken),
582599 InstanceMethod (" getTokenString" , &AddonModel::GetTokenString),
583600 InstanceMethod (" getTokenType" , &AddonModel::GetTokenType),
601+ InstanceMethod (" isEogToken" , &AddonModel::IsEogToken),
584602 InstanceMethod (" getVocabularyType" , &AddonModel::GetVocabularyType),
585603 InstanceMethod (" shouldPrependBosToken" , &AddonModel::ShouldPrependBosToken),
586604 InstanceMethod (" getModelSize" , &AddonModel::GetModelSize),
@@ -1054,6 +1072,30 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
10541072 return info.Env ().Undefined ();
10551073 }
10561074
1075+ Napi::Value CanBeNextTokenForGrammarEvaluationState (const Napi::CallbackInfo& info) {
1076+ AddonGrammarEvaluationState* grammar_evaluation_state =
1077+ Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap (info[0 ].As <Napi::Object>());
1078+ llama_token tokenId = info[1 ].As <Napi::Number>().Int32Value ();
1079+
1080+ if ((grammar_evaluation_state)->grammar != nullptr ) {
1081+ std::vector<llama_token_data> candidates;
1082+ candidates.reserve (1 );
1083+ candidates.emplace_back (llama_token_data { tokenId, 1 , 0 .0f });
1084+
1085+ llama_token_data_array candidates_p = { candidates.data (), candidates.size (), false };
1086+
1087+ llama_sample_grammar (ctx, &candidates_p, (grammar_evaluation_state)->grammar );
1088+
1089+ if (candidates_p.size == 0 || candidates_p.data [0 ].logit == -INFINITY) {
1090+ return Napi::Boolean::New (info.Env (), false );
1091+ }
1092+
1093+ return Napi::Boolean::New (info.Env (), true );
1094+ }
1095+
1096+ return Napi::Boolean::New (info.Env (), false );
1097+ }
1098+
10571099 Napi::Value GetEmbedding (const Napi::CallbackInfo& info) {
10581100 if (disposed) {
10591101 Napi::Error::New (info.Env (), " Context is disposed" ).ThrowAsJavaScriptException ();
@@ -1118,6 +1160,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
11181160 InstanceMethod (" decodeBatch" , &AddonContext::DecodeBatch),
11191161 InstanceMethod (" sampleToken" , &AddonContext::SampleToken),
11201162 InstanceMethod (" acceptGrammarEvaluationStateToken" , &AddonContext::AcceptGrammarEvaluationStateToken),
1163+ InstanceMethod (" canBeNextTokenForGrammarEvaluationState" , &AddonContext::CanBeNextTokenForGrammarEvaluationState),
11211164 InstanceMethod (" getEmbedding" , &AddonContext::GetEmbedding),
11221165 InstanceMethod (" getStateSize" , &AddonContext::GetStateSize),
11231166 InstanceMethod (" printTimings" , &AddonContext::PrintTimings),
@@ -1442,7 +1485,6 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
14421485 // Select the best prediction.
14431486 auto logits = llama_get_logits_ith (ctx->ctx , batchLogitIndex);
14441487 auto n_vocab = llama_n_vocab (ctx->model ->model );
1445- auto eos_token = llama_token_eos (ctx->model ->model );
14461488
14471489 std::vector<llama_token_data> candidates;
14481490 candidates.reserve (n_vocab);
@@ -1455,7 +1497,7 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
14551497 if (hasTokenBias) {
14561498 auto logitBias = tokenBiases.at (token_id);
14571499 if (logitBias == -INFINITY || logitBias < -INFINITY) {
1458- if (token_id != eos_token ) {
1500+ if (! llama_token_is_eog (ctx-> model -> model , token_id) ) {
14591501 logit = -INFINITY;
14601502 }
14611503 } else {
@@ -1513,7 +1555,7 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
15131555 new_token_id = llama_sample_token (ctx->ctx , &candidates_p);
15141556 }
15151557
1516- if (new_token_id != eos_token && use_grammar && (grammar_evaluation_state)->grammar != nullptr ) {
1558+ if (! llama_token_is_eog (ctx-> model -> model , new_token_id) && use_grammar && (grammar_evaluation_state)->grammar != nullptr ) {
15171559 llama_grammar_accept_token (ctx->ctx , (grammar_evaluation_state)->grammar , new_token_id);
15181560 }
15191561
0 commit comments