-
Notifications
You must be signed in to change notification settings - Fork 32
Add jbang script and configuration to make easy to run
#90
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
7e57e5a
7494855
5f03970
5e5c0b1
80040bf
5ff078f
f539fe8
b624d57
7b8bd75
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,147 @@ | ||||||
| //JAVA 21 | ||||||
| //PREVIEW | ||||||
| //DEPS io.github.beehive-lab:gpu-llama3:0.3.1 | ||||||
| //DEPS io.github.beehive-lab:tornado-api:2.1.0 | ||||||
| //DEPS io.github.beehive-lab:tornado-runtime:2.1.0 | ||||||
|
|
||||||
| //SOURCES TornadoFlags.java | ||||||
| // === Set to not get annoying warnings about annotation processing | ||||||
| //JAVAC_OPTIONS -proc:full | ||||||
|
|
||||||
| // Compiler options | ||||||
| //JAVAC_OPTIONS --enable-preview | ||||||
| //JAVAC_OPTIONS --add-modules=jdk.incubator.vector | ||||||
|
|
||||||
| // JVM options for basic setup | ||||||
| //JAVA_OPTIONS --enable-preview | ||||||
| //JAVA_OPTIONS --add-modules=jdk.incubator.vector | ||||||
|
|
||||||
| package org.beehive.gpullama3.cli; | ||||||
|
|
||||||
| import org.beehive.gpullama3.Options; | ||||||
| import org.beehive.gpullama3.auxiliary.LastRunMetrics; | ||||||
| import org.beehive.gpullama3.inference.sampler.Sampler; | ||||||
| import org.beehive.gpullama3.model.Model; | ||||||
|
|
||||||
| import java.io.IOException; | ||||||
|
|
||||||
| import static org.beehive.gpullama3.inference.sampler.Sampler.createSampler; | ||||||
| import static org.beehive.gpullama3.model.loader.ModelLoader.loadModel; | ||||||
|
|
||||||
| /** | ||||||
| * LlamaTornadoCli - Pure Java CLI for running llama-tornado models | ||||||
| * | ||||||
| * This class provides a standalone command-line interface for running LLaMA models | ||||||
| * with TornadoVM acceleration. It can be executed directly with JBang or as a | ||||||
| * compiled Java application. | ||||||
| * | ||||||
| * Usage with JBang: | ||||||
| * jbang LlamaTornadoCli.java --model path/to/model.gguf --prompt "Your prompt here" | ||||||
| * | ||||||
| * Usage as compiled application: | ||||||
| * java --enable-preview --add-modules jdk.incubator.vector \ | ||||||
| * -cp target/gpu-llama3-0.3.1.jar \ | ||||||
| * org.beehive.gpullama3.cli.LlamaTornadoCli \ | ||||||
| * --model path/to/model.gguf --prompt "Your prompt here" | ||||||
| * | ||||||
| * Examples: | ||||||
| * # Interactive chat mode | ||||||
| * jbang LlamaTornadoCli.java -m model.gguf --interactive | ||||||
| * | ||||||
| * # Single instruction mode | ||||||
| * jbang LlamaTornadoCli.java -m model.gguf -p "Explain quantum computing" | ||||||
| * | ||||||
| * # With TornadoVM acceleration | ||||||
| * jbang LlamaTornadoCli.java -m model.gguf -p "Hello" --use-tornadovm true | ||||||
| * | ||||||
| * # Custom temperature and sampling | ||||||
| * jbang LlamaTornadoCli.java -m model.gguf -p "Tell me a story" \ | ||||||
| * --temperature 0.7 --top-p 0.9 --max-tokens 512 | ||||||
| */ | ||||||
| public class LlamaTornadoCli { | ||||||
|
|
||||||
| // Configuration flags | ||||||
| public static final boolean USE_VECTOR_API = Boolean.parseBoolean( | ||||||
| System.getProperty("llama.VectorAPI", "true")); | ||||||
| public static final boolean SHOW_PERF_INTERACTIVE = Boolean.parseBoolean( | ||||||
| System.getProperty("llama.ShowPerfInteractive", "true")); | ||||||
|
|
||||||
| /** | ||||||
| * Run a single instruction and display the response | ||||||
| */ | ||||||
| private static void runSingleInstruction(Model model, Sampler sampler, Options options) { | ||||||
| String response = model.runInstructOnce(sampler, options); | ||||||
| System.out.println(response); | ||||||
| if (SHOW_PERF_INTERACTIVE) { | ||||||
| LastRunMetrics.printMetrics(); | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| /** | ||||||
| * Main entry point for the CLI application | ||||||
| * | ||||||
| * @param args command-line arguments (see Options.parseOptions for details) | ||||||
| * @throws IOException if model loading fails | ||||||
| */ | ||||||
| public static void main(String[] args) throws IOException { | ||||||
| // Print banner | ||||||
| printBanner(); | ||||||
|
|
||||||
| // Check if help requested | ||||||
| if (args.length == 0 || hasHelpFlag(args)) { | ||||||
| // Options.printUsage(System.out); | ||||||
| System.exit(0); | ||||||
| } | ||||||
|
|
||||||
| try { | ||||||
| // Parse options | ||||||
| Options options = Options.parseOptions(args); | ||||||
|
|
||||||
| // Load model | ||||||
| System.out.println("Loading model from: " + options.modelPath()); | ||||||
| Model model = loadModel(options); | ||||||
| System.out.println("Model loaded successfully!"); | ||||||
|
|
||||||
| // Create sampler | ||||||
| Sampler sampler = createSampler(model, options); | ||||||
|
|
||||||
| // Run in interactive or single-instruction mode | ||||||
| if (options.interactive()) { | ||||||
| System.out.println("Starting interactive chat mode..."); | ||||||
| System.out.println("Type your messages below (Ctrl+C to exit):"); | ||||||
| System.out.println(); | ||||||
| model.runInteractive(sampler, options); | ||||||
| } else { | ||||||
| runSingleInstruction(model, sampler, options); | ||||||
| } | ||||||
| } catch (Exception e) { | ||||||
| System.err.println("Error: " + e.getMessage()); | ||||||
| e.printStackTrace(); | ||||||
| System.exit(1); | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| /** | ||||||
| * Check if help flag is present in arguments | ||||||
| */ | ||||||
| private static boolean hasHelpFlag(String[] args) { | ||||||
| for (String arg : args) { | ||||||
| if (arg.equals("--help") || arg.equals("-h")) { | ||||||
| return true; | ||||||
| } | ||||||
| } | ||||||
| return false; | ||||||
| } | ||||||
|
|
||||||
| /** | ||||||
| * Print ASCII banner | ||||||
| */ | ||||||
| private static void printBanner() { | ||||||
| System.out.println(""" | ||||||
| ╔══════════════════════════════════════════════════════════╗ | ||||||
| ║ Llama-Tornado CLI - GPU-Accelerated LLM ║ | ||||||
| ║ Powered by TornadoVM & Java 21 ║ | ||||||
|
||||||
| ║ Powered by TornadoVM & Java 21 ║ | |
| ║ Powered by TornadoVM & Java 21 ║ |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -242,6 +242,73 @@ llama-tornado --gpu --model beehive-llama-3.2-1b-instruct-fp16.gguf --prompt "te | |||||
| The above model can we swapped with one of the other models, such as `beehive-llama-3.2-3b-instruct-fp16.gguf` or `beehive-llama-3.2-8b-instruct-fp16.gguf`, depending on your needs. | ||||||
| Check models below. | ||||||
|
|
||||||
| ----------- | ||||||
|
|
||||||
| ## 🚀 Running with JBang (Pure Java CLI) | ||||||
|
|
||||||
| You can run llama-tornado as a pure Java script using [JBang](https://www.jbang.dev/) without building or installing anything. This provides a simple, script-like experience similar to [Jlama's CLI](https://github.com/tjake/Jlama). | ||||||
|
|
||||||
| ### Prerequisites for JBang | ||||||
|
|
||||||
| 1. **Install JBang**: Follow the [JBang installation guide](https://www.jbang.dev/download/) | ||||||
| 2. **TornadoVM SDK**: You still need TornadoVM installed and `TORNADO_SDK` environment variable set (see Setup section above) | ||||||
|
|
||||||
| ### Quick Start with JBang | ||||||
|
|
||||||
| ```bash | ||||||
| # Basic usage - interactive chat mode | ||||||
| jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf --interactive | ||||||
|
|
||||||
| # Single instruction mode | ||||||
| jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf -p "Explain quantum computing" | ||||||
|
|
||||||
| # With TornadoVM GPU acceleration | ||||||
| jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf \ | ||||||
| -p "Tell me a joke" --use-tornadovm true | ||||||
|
|
||||||
| # Custom generation parameters | ||||||
| jbang LlamaTornadoCli.java -m beehive-llama-3.2-1b-instruct-fp16.gguf \ | ||||||
| -p "Write a short story" \ | ||||||
| --temperature 0.7 \ | ||||||
| --top-p 0.9 \ | ||||||
| --max-tokens 512 | ||||||
| ``` | ||||||
|
|
||||||
| ### JBang vs llama-tornado Script | ||||||
|
|
||||||
| | Feature | JBang CLI | llama-tornado Script | | ||||||
| |---------|-----------|---------------------| | ||||||
| | **Installation** | No build required | Requires `mvn package` | | ||||||
| | **Dependencies** | Auto-downloaded | Included in fat JAR | | ||||||
| | **TornadoVM Setup** | Basic (via dependencies) | Full (via tornado command wrapper) | | ||||||
| | **GPU Acceleration** | Limited | Full support with all TornadoVM optimizations | | ||||||
| | **Use Case** | Quick experimentation, CPU inference | Production use, full GPU acceleration | | ||||||
|
|
||||||
| ### How It Works | ||||||
|
|
||||||
| The `LlamaTornadoCli.java` file includes special JBang directives at the top: | ||||||
|
|
||||||
| ```java | ||||||
| //JAVA 21 | ||||||
| //PREVIEW | ||||||
| //DEPS io.github.beehive-lab:gpu-llama3:0.3.1 | ||||||
| //DEPS io.github.beehive-lab:tornado-api:2.1.0 | ||||||
| //DEPS io.github.beehive-lab:tornado-runtime:2.1.0 | ||||||
|
|
||||||
| //SOURCES TornadoFlags.java | ||||||
| ``` | ||||||
|
|
||||||
| These directives tell JBang to: | ||||||
| - Use Java 21 with preview features | ||||||
| - Download the required Maven dependencies automatically | ||||||
| - Load TornadoVM configuration from `TornadoFlags.java` | ||||||
|
|
||||||
| The `TornadoFlags.java` file contains all TornadoVM-specific JVM configuration (module exports, runtime settings, etc.), keeping the main CLI file clean and maintainable. This follows the same pattern as the [TornadoVM JBang examples](https://gist.github.com/maxandersen/14ecdc03c7c57fc59dfeb7ba37dd4c9c). | ||||||
|
||||||
| The `TornadoFlags.java` file contains all TornadoVM-specific JVM configuration (module exports, runtime settings, etc.), keeping the main CLI file clean and maintainable. This follows the same pattern as the [TornadoVM JBang examples](https://gist.github.com/maxandersen/14ecdc03c7c57fc59dfeb7ba37dd4c9c). | |
| The `TornadoFlags.java` file is provided as a template for TornadoVM-specific JVM configuration (module exports, runtime settings, etc.), keeping the main CLI file clean and maintainable. By default, all configuration directives in this file are commented out; you can uncomment or add your own settings as needed. This follows the same pattern as the [TornadoVM JBang examples](https://gist.github.com/maxandersen/14ecdc03c7c57fc59dfeb7ba37dd4c9c). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The help usage message is commented out, which means when users run the CLI with no arguments or with --help/-h flags, they will see no usage information before the program exits. This makes the CLI difficult to use. The Options.printUsage method was made public in this PR specifically to be callable here, so this line should be uncommented to actually display the usage information to users.