-
Notifications
You must be signed in to change notification settings - Fork 35
Expand file tree
/
Copy pathLlamaApp.java
More file actions
50 lines (40 loc) · 1.98 KB
/
Copy pathLlamaApp.java
File metadata and controls
50 lines (40 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
package org.beehive.gpullama3;
import org.beehive.gpullama3.auxiliary.RunMetrics;
import org.beehive.gpullama3.inference.sampler.Sampler;
import org.beehive.gpullama3.model.Model;
import java.io.IOException;
import static org.beehive.gpullama3.inference.sampler.Sampler.createSampler;
import static org.beehive.gpullama3.model.loader.ModelLoader.loadModel;
public class LlamaApp {
// Configuration flags for hardware acceleration and optimizations
public static final boolean USE_VECTOR_API = Boolean.parseBoolean(System.getProperty("llama.VectorAPI", "true")); // Enable Java Vector API for CPU acceleration
public static final boolean SHOW_PERF_INTERACTIVE = Boolean.parseBoolean(System.getProperty("llama.ShowPerfInteractive", "true")); // Show performance metrics in interactive mode
private static void runSingleInstruction(Model model, Sampler sampler, Options options) {
String response = model.runInstructOnce(sampler, options);
System.out.println(response);
if (SHOW_PERF_INTERACTIVE) {
RunMetrics.printMetrics();
}
}
/**
* Entry point for running the LLaMA-based model with provided command-line arguments.
*
* <p>Initializes model options, loads the appropriate model (either AOT or on-demand),
* configures the sampler, and runs either in interactive or single-instruction mode based on the input options.</p>
*
* @param args
* command-line arguments used to configure model path, temperature, seed, etc.
* @throws IOException
* if model loading or file operations fail.
*/
static void main(String[] args) throws IOException {
Options options = Options.parseOptions(args);
Model model = loadModel(options);
Sampler sampler = createSampler(model, options);
if (options.interactive()) {
model.runInteractive(sampler, options);
} else {
runSingleInstruction(model, sampler, options);
}
}
}