|
18 | 18 | #ifndef THIRD_PARTY_GEMMA_CPP_UTIL_APP_H_ |
19 | 19 | #define THIRD_PARTY_GEMMA_CPP_UTIL_APP_H_ |
20 | 20 |
|
| 21 | +#include <iterator> |
21 | 22 | #if HWY_OS_LINUX |
22 | 23 | #include <sched.h> |
23 | 24 |
|
| 25 | +#include <cctype> |
24 | 26 | #include <cerrno> // IDE does not recognize errno.h as providing errno. |
| 27 | +#include <string> |
25 | 28 | #endif |
26 | 29 | #include <stddef.h> |
27 | 30 | #include <stdio.h> |
28 | 31 |
|
29 | 32 | #include <algorithm> // std::clamp |
30 | 33 | #include <thread> // NOLINT> |
31 | 34 |
|
| 35 | +// copybara:import_next_line:gemma_cpp |
| 36 | +#include "configs.h" |
| 37 | +// copybara:end |
| 38 | + |
| 39 | +// copybara:import_next_line:gemma_cpp |
| 40 | +#include "gemma.h" |
| 41 | +// copybara:end |
| 42 | + |
32 | 43 | // copybara:import_next_line:gemma_cpp |
33 | 44 | #include "util/args.h" |
34 | 45 | // copybara:end |
@@ -116,6 +127,124 @@ class AppArgs : public ArgsBase<AppArgs> { |
116 | 127 | } |
117 | 128 | }; |
118 | 129 |
|
| 130 | +struct LoaderArgs : public ArgsBase<LoaderArgs> { |
| 131 | + LoaderArgs(int argc, char* argv[]) { InitAndParse(argc, argv); } |
| 132 | + |
| 133 | + static std::string ToLower(const std::string& text) { |
| 134 | + std::string result = text; |
| 135 | + std::transform(begin(result), end(result), begin(result), |
| 136 | + [](unsigned char c) { return std::tolower(c); }); |
| 137 | + return result; |
| 138 | + } |
| 139 | + |
| 140 | + gcpp::Model ModelType() const { |
| 141 | + const std::string model_type_lc = ToLower(model_type); |
| 142 | + if (model_type_lc == "2b-pt" || model_type_lc == "2b-it") { |
| 143 | + return gcpp::Model::GEMMA_2B; |
| 144 | + } else { |
| 145 | + return gcpp::Model::GEMMA_7B; |
| 146 | + } |
| 147 | + } |
| 148 | + |
| 149 | + gcpp::ModelTraining ModelTraining() const { |
| 150 | + const std::string model_type_lc = ToLower(model_type); |
| 151 | + if (model_type_lc == "7b-pt" || model_type_lc == "2b-pt") { |
| 152 | + return gcpp::ModelTraining::GEMMA_PT; |
| 153 | + } else { |
| 154 | + return gcpp::ModelTraining::GEMMA_IT; |
| 155 | + } |
| 156 | + } |
| 157 | + |
| 158 | + // Returns error string or nullptr if OK. |
| 159 | + const char* Validate() const { |
| 160 | + const std::string model_type_lc = ToLower(model_type); |
| 161 | + if (model_type.empty()) { |
| 162 | + return "Missing --model flag, need to specify either 2b-pt, 7b-pt, " |
| 163 | + "2b-it, or 7b-it."; |
| 164 | + } |
| 165 | + if (model_type_lc != "2b-pt" && model_type_lc != "7b-pt" && |
| 166 | + model_type_lc != "2b-it" && model_type_lc != "7b-it") { |
| 167 | + return "Model type must be 2b-pt, 7b-pt, 2b-it, or " |
| 168 | + "7b-it."; |
| 169 | + } |
| 170 | + if (tokenizer.path.empty()) { |
| 171 | + return "Missing --tokenizer flag, a file for the tokenizer is required."; |
| 172 | + } |
| 173 | + if (compressed_weights.path.empty()) { |
| 174 | + return "Missing --compressed_weights flag, a file for the compressed " |
| 175 | + "model."; |
| 176 | + } |
| 177 | + return nullptr; |
| 178 | + } |
| 179 | + |
| 180 | + Path tokenizer; |
| 181 | + Path weights; // uncompressed weights file location |
| 182 | + Path compressed_weights; // compressed weights file location |
| 183 | + std::string model_type; |
| 184 | + |
| 185 | + template <class Visitor> |
| 186 | + void ForEach(const Visitor& visitor) { |
| 187 | + visitor(tokenizer, "tokenizer", Path(), |
| 188 | + "Path name of tokenizer model file.\n Required argument."); |
| 189 | + visitor( |
| 190 | + compressed_weights, "compressed_weights", Path(), |
| 191 | + "Path name of compressed weights file, regenerated from `--weights` " |
| 192 | + "file if " |
| 193 | + "the compressed weights file does not exist.\n Required argument."); |
| 194 | + visitor(model_type, "model", std::string(), |
| 195 | + "Model type\n 2b-it = 2B parameters, instruction-tuned\n " |
| 196 | + "2b-pt = 2B parameters, pretrained\n 7b-it = 7B parameters " |
| 197 | + "instruction-tuned\n 7b-pt = 7B parameters, pretrained\n" |
| 198 | + " Required argument."); |
| 199 | + visitor(weights, "weights", Path(), |
| 200 | + "Path name of model weights (.sbs) file. Only required if " |
| 201 | + "compressed_weights file is not present and needs to be " |
| 202 | + "regenerated. This parameter is only required for compressing" |
| 203 | + "new model weight exports, otherwise it is not needed."); |
| 204 | + } |
| 205 | +}; |
| 206 | + |
| 207 | +struct InferenceArgs : public ArgsBase<InferenceArgs> { |
| 208 | + InferenceArgs(int argc, char* argv[]) { InitAndParse(argc, argv); } |
| 209 | + |
| 210 | + size_t max_tokens; |
| 211 | + size_t max_generated_tokens; |
| 212 | + |
| 213 | + float temperature; |
| 214 | + bool deterministic; |
| 215 | + bool multiturn; |
| 216 | + |
| 217 | + // Returns error string or nullptr if OK. |
| 218 | + const char* Validate() const { |
| 219 | + if (max_tokens > gcpp::kSeqLen) { |
| 220 | + return "max_tokens is larger than the maximum sequence length (see " |
| 221 | + "configs.h)."; |
| 222 | + } |
| 223 | + if (max_generated_tokens > max_tokens) { |
| 224 | + return "Maximum number of generated tokens is larger than the maximum " |
| 225 | + "total tokens."; |
| 226 | + } |
| 227 | + return nullptr; |
| 228 | + } |
| 229 | + |
| 230 | + template <class Visitor> |
| 231 | + void ForEach(const Visitor& visitor) { |
| 232 | + visitor(max_tokens, "max_tokens", size_t{3072}, |
| 233 | + "Maximum number of tokens in prompt + generation."); |
| 234 | + visitor(max_generated_tokens, "max_generated_tokens", size_t{2048}, |
| 235 | + "Maximum number of tokens to generate."); |
| 236 | + |
| 237 | + visitor(temperature, "temperature", 1.0f, "Temperature for top-K", 2); |
| 238 | + visitor(deterministic, "deterministic", false, |
| 239 | + "Make top-k sampling deterministic", 2); |
| 240 | + visitor(multiturn, "multiturn", false, |
| 241 | + "Multiturn mode\n 0 = clear KV cache after every " |
| 242 | + "interaction\n 1 = continue KV cache after every interaction\n " |
| 243 | + " Default : 0 (conversation " |
| 244 | + "resets every turn)"); |
| 245 | + } |
| 246 | +}; |
| 247 | + |
119 | 248 | } // namespace gcpp |
120 | 249 |
|
121 | 250 | #endif // THIRD_PARTY_GEMMA_CPP_UTIL_APP_H_ |
0 commit comments