|
1 | | -package org.beehive.gpullama3.cli; |
2 | | - |
3 | | -import org.beehive.gpullama3.Options; |
4 | | -import org.beehive.gpullama3.auxiliary.LastRunMetrics; |
5 | | -import org.beehive.gpullama3.inference.sampler.Sampler; |
6 | | -import org.beehive.gpullama3.model.Model; |
7 | | - |
8 | | -import java.io.IOException; |
9 | | - |
10 | | -import static org.beehive.gpullama3.inference.sampler.Sampler.createSampler; |
11 | | -import static org.beehive.gpullama3.model.loader.ModelLoader.loadModel; |
12 | | - |
13 | | -/** |
14 | | - * LlamaTornadoCli - Pure Java CLI for running llama-tornado models |
15 | | - * |
16 | | - * This class provides a standalone command-line interface for running LLaMA models |
17 | | - * with TornadoVM acceleration. This version is compiled as part of the Maven build. |
18 | | - * |
19 | | - * For JBang usage, use the LlamaTornadoCli.java file in the root directory. |
20 | | - * |
21 | | - * Usage as compiled application: |
22 | | - * java --enable-preview --add-modules jdk.incubator.vector \ |
23 | | - * -cp target/gpu-llama3-0.3.1.jar \ |
24 | | - * org.beehive.gpullama3.cli.LlamaTornadoCli \ |
25 | | - * --model path/to/model.gguf --prompt "Your prompt here" |
26 | | - * |
27 | | - * Examples: |
28 | | - * # Interactive chat mode |
29 | | - * java -cp target/gpu-llama3-0.3.1.jar \ |
30 | | - * org.beehive.gpullama3.cli.LlamaTornadoCli \ |
31 | | - * -m model.gguf --interactive |
32 | | - * |
33 | | - * # Single instruction mode |
34 | | - * java -cp target/gpu-llama3-0.3.1.jar \ |
35 | | - * org.beehive.gpullama3.cli.LlamaTornadoCli \ |
36 | | - * -m model.gguf -p "Explain quantum computing" |
37 | | - * |
38 | | - * # With TornadoVM acceleration (requires TornadoVM runtime setup) |
39 | | - * java -cp target/gpu-llama3-0.3.1.jar \ |
40 | | - * org.beehive.gpullama3.cli.LlamaTornadoCli \ |
41 | | - * -m model.gguf -p "Hello" --use-tornadovm true |
42 | | - */ |
43 | | -public class LlamaTornadoCli { |
44 | | - |
45 | | - // Configuration flags |
46 | | - public static final boolean USE_VECTOR_API = Boolean.parseBoolean( |
47 | | - System.getProperty("llama.VectorAPI", "true")); |
48 | | - public static final boolean SHOW_PERF_INTERACTIVE = Boolean.parseBoolean( |
49 | | - System.getProperty("llama.ShowPerfInteractive", "true")); |
50 | | - |
51 | | - /** |
52 | | - * Run a single instruction and display the response |
53 | | - */ |
54 | | - private static void runSingleInstruction(Model model, Sampler sampler, Options options) { |
55 | | - String response = model.runInstructOnce(sampler, options); |
56 | | - System.out.println(response); |
57 | | - if (SHOW_PERF_INTERACTIVE) { |
58 | | - LastRunMetrics.printMetrics(); |
59 | | - } |
60 | | - } |
61 | | - |
62 | | - /** |
63 | | - * Main entry point for the CLI application |
64 | | - * |
65 | | - * @param args command-line arguments (see Options.parseOptions for details) |
66 | | - * @throws IOException if model loading fails |
67 | | - */ |
68 | | - public static void main(String[] args) throws IOException { |
69 | | - // Print banner |
70 | | - printBanner(); |
71 | | - |
72 | | - // Check if help requested |
73 | | - if (args.length == 0 || hasHelpFlag(args)) { |
74 | | - Options.printUsage(System.out); |
75 | | - System.exit(0); |
76 | | - } |
77 | | - |
78 | | - try { |
79 | | - // Parse options |
80 | | - Options options = Options.parseOptions(args); |
81 | | - |
82 | | - // Load model |
83 | | - System.out.println("Loading model from: " + options.modelPath()); |
84 | | - Model model = loadModel(options); |
85 | | - System.out.println("Model loaded successfully!"); |
86 | | - |
87 | | - // Create sampler |
88 | | - Sampler sampler = createSampler(model, options); |
89 | | - |
90 | | - // Run in interactive or single-instruction mode |
91 | | - if (options.interactive()) { |
92 | | - System.out.println("Starting interactive chat mode..."); |
93 | | - System.out.println("Type your messages below (Ctrl+C to exit):"); |
94 | | - System.out.println(); |
95 | | - model.runInteractive(sampler, options); |
96 | | - } else { |
97 | | - runSingleInstruction(model, sampler, options); |
98 | | - } |
99 | | - } catch (Exception e) { |
100 | | - System.err.println("Error: " + e.getMessage()); |
101 | | - e.printStackTrace(); |
102 | | - System.exit(1); |
103 | | - } |
104 | | - } |
105 | | - |
106 | | - /** |
107 | | - * Check if help flag is present in arguments |
108 | | - */ |
109 | | - private static boolean hasHelpFlag(String[] args) { |
110 | | - for (String arg : args) { |
111 | | - if (arg.equals("--help") || arg.equals("-h")) { |
112 | | - return true; |
113 | | - } |
114 | | - } |
115 | | - return false; |
116 | | - } |
117 | | - |
118 | | - /** |
119 | | - * Print ASCII banner |
120 | | - */ |
121 | | - private static void printBanner() { |
122 | | - System.out.println(""" |
123 | | - ╔══════════════════════════════════════════════════════════╗ |
124 | | - ║ Llama-Tornado CLI - GPU-Accelerated LLM ║ |
125 | | - ║ Powered by TornadoVM & Java 21 ║ |
126 | | - ╚══════════════════════════════════════════════════════════╝ |
127 | | - """); |
128 | | - } |
129 | | -} |
| 1 | +//package org.beehive.gpullama3.cli; |
| 2 | +// |
| 3 | +//import org.beehive.gpullama3.Options; |
| 4 | +//import org.beehive.gpullama3.auxiliary.LastRunMetrics; |
| 5 | +//import org.beehive.gpullama3.inference.sampler.Sampler; |
| 6 | +//import org.beehive.gpullama3.model.Model; |
| 7 | +// |
| 8 | +//import java.io.IOException; |
| 9 | +// |
| 10 | +//import static org.beehive.gpullama3.inference.sampler.Sampler.createSampler; |
| 11 | +//import static org.beehive.gpullama3.model.loader.ModelLoader.loadModel; |
| 12 | +// |
| 13 | +///** |
| 14 | +// * LlamaTornadoCli - Pure Java CLI for running llama-tornado models |
| 15 | +// * |
| 16 | +// * This class provides a standalone command-line interface for running LLaMA models |
| 17 | +// * with TornadoVM acceleration. This version is compiled as part of the Maven build. |
| 18 | +// * |
| 19 | +// * For JBang usage, use the LlamaTornadoCli.java file in the root directory. |
| 20 | +// * |
| 21 | +// * Usage as compiled application: |
| 22 | +// * java --enable-preview --add-modules jdk.incubator.vector \ |
| 23 | +// * -cp target/gpu-llama3-0.3.1.jar \ |
| 24 | +// * org.beehive.gpullama3.cli.LlamaTornadoCli \ |
| 25 | +// * --model path/to/model.gguf --prompt "Your prompt here" |
| 26 | +// * |
| 27 | +// * Examples: |
| 28 | +// * # Interactive chat mode |
| 29 | +// * java -cp target/gpu-llama3-0.3.1.jar \ |
| 30 | +// * org.beehive.gpullama3.cli.LlamaTornadoCli \ |
| 31 | +// * -m model.gguf --interactive |
| 32 | +// * |
| 33 | +// * # Single instruction mode |
| 34 | +// * java -cp target/gpu-llama3-0.3.1.jar \ |
| 35 | +// * org.beehive.gpullama3.cli.LlamaTornadoCli \ |
| 36 | +// * -m model.gguf -p "Explain quantum computing" |
| 37 | +// * |
| 38 | +// * # With TornadoVM acceleration (requires TornadoVM runtime setup) |
| 39 | +// * java -cp target/gpu-llama3-0.3.1.jar \ |
| 40 | +// * org.beehive.gpullama3.cli.LlamaTornadoCli \ |
| 41 | +// * -m model.gguf -p "Hello" --use-tornadovm true |
| 42 | +// */ |
| 43 | +//public class LlamaTornadoCli { |
| 44 | +// |
| 45 | +// // Configuration flags |
| 46 | +// public static final boolean USE_VECTOR_API = Boolean.parseBoolean( |
| 47 | +// System.getProperty("llama.VectorAPI", "true")); |
| 48 | +// public static final boolean SHOW_PERF_INTERACTIVE = Boolean.parseBoolean( |
| 49 | +// System.getProperty("llama.ShowPerfInteractive", "true")); |
| 50 | +// |
| 51 | +// /** |
| 52 | +// * Run a single instruction and display the response |
| 53 | +// */ |
| 54 | +// private static void runSingleInstruction(Model model, Sampler sampler, Options options) { |
| 55 | +// String response = model.runInstructOnce(sampler, options); |
| 56 | +// System.out.println(response); |
| 57 | +// if (SHOW_PERF_INTERACTIVE) { |
| 58 | +// LastRunMetrics.printMetrics(); |
| 59 | +// } |
| 60 | +// } |
| 61 | +// |
| 62 | +// /** |
| 63 | +// * Main entry point for the CLI application |
| 64 | +// * |
| 65 | +// * @param args command-line arguments (see Options.parseOptions for details) |
| 66 | +// * @throws IOException if model loading fails |
| 67 | +// */ |
| 68 | +// public static void main(String[] args) throws IOException { |
| 69 | +// // Print banner |
| 70 | +// printBanner(); |
| 71 | +// |
| 72 | +// // Check if help requested |
| 73 | +// if (args.length == 0 || hasHelpFlag(args)) { |
| 74 | +//// Options.printUsage(System.out); |
| 75 | +// System.exit(0); |
| 76 | +// } |
| 77 | +// |
| 78 | +// try { |
| 79 | +// // Parse options |
| 80 | +// Options options = Options.parseOptions(args); |
| 81 | +// |
| 82 | +// // Load model |
| 83 | +// System.out.println("Loading model from: " + options.modelPath()); |
| 84 | +// Model model = loadModel(options); |
| 85 | +// System.out.println("Model loaded successfully!"); |
| 86 | +// |
| 87 | +// // Create sampler |
| 88 | +// Sampler sampler = createSampler(model, options); |
| 89 | +// |
| 90 | +// // Run in interactive or single-instruction mode |
| 91 | +// if (options.interactive()) { |
| 92 | +// System.out.println("Starting interactive chat mode..."); |
| 93 | +// System.out.println("Type your messages below (Ctrl+C to exit):"); |
| 94 | +// System.out.println(); |
| 95 | +// model.runInteractive(sampler, options); |
| 96 | +// } else { |
| 97 | +// runSingleInstruction(model, sampler, options); |
| 98 | +// } |
| 99 | +// } catch (Exception e) { |
| 100 | +// System.err.println("Error: " + e.getMessage()); |
| 101 | +// e.printStackTrace(); |
| 102 | +// System.exit(1); |
| 103 | +// } |
| 104 | +// } |
| 105 | +// |
| 106 | +// /** |
| 107 | +// * Check if help flag is present in arguments |
| 108 | +// */ |
| 109 | +// private static boolean hasHelpFlag(String[] args) { |
| 110 | +// for (String arg : args) { |
| 111 | +// if (arg.equals("--help") || arg.equals("-h")) { |
| 112 | +// return true; |
| 113 | +// } |
| 114 | +// } |
| 115 | +// return false; |
| 116 | +// } |
| 117 | +// |
| 118 | +// /** |
| 119 | +// * Print ASCII banner |
| 120 | +// */ |
| 121 | +// private static void printBanner() { |
| 122 | +// System.out.println(""" |
| 123 | +// ╔══════════════════════════════════════════════════════════╗ |
| 124 | +// ║ Llama-Tornado CLI - GPU-Accelerated LLM ║ |
| 125 | +// ║ Powered by TornadoVM & Java 21 ║ |
| 126 | +// ╚══════════════════════════════════════════════════════════╝ |
| 127 | +// """); |
| 128 | +// } |
| 129 | +//} |
0 commit comments