forked from ggml-org/llama.cpp
-
-
Notifications
You must be signed in to change notification settings - Fork 35
Expand file tree
/
Copy pathserver-loop-guard.h
More file actions
47 lines (38 loc) · 1.5 KB
/
Copy pathserver-loop-guard.h
File metadata and controls
47 lines (38 loc) · 1.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#pragma once
#include "common.h"
#include "llama.h"
#include <cstdint>
#include <string>
#include <vector>
enum server_loop_guard_region {
SERVER_LOOP_REGION_REASONING,
SERVER_LOOP_REGION_VISIBLE,
};
struct server_loop_guard_result {
bool triggered = false;
std::string kind;
int32_t period = 0;
int32_t coverage = 0;
float score = 0.0f;
};
class server_loop_guard {
public:
explicit server_loop_guard(common_reasoning_loop_guard_params params = {});
void configure(common_reasoning_loop_guard_params params);
void reset();
void accept(llama_token token, server_loop_guard_region region);
bool should_check(server_loop_guard_region region, bool token_is_eog, bool forcing_reasoning_end) const;
server_loop_guard_result check(server_loop_guard_region region) const;
int32_t seen(server_loop_guard_region region) const;
private:
common_reasoning_loop_guard_params params;
std::vector<llama_token> reasoning_tail;
std::vector<llama_token> visible_tail;
int32_t reasoning_seen = 0;
int32_t visible_seen = 0;
std::vector<llama_token> & tail(server_loop_guard_region region);
const std::vector<llama_token> & tail(server_loop_guard_region region) const;
server_loop_guard_result check_periodic_tail(const std::vector<llama_token> & tokens) const;
server_loop_guard_result check_ngram_dominance(const std::vector<llama_token> & tokens) const;
server_loop_guard_result check_low_entropy(const std::vector<llama_token> & tokens) const;
};