beehive-lab
diff --git a/‎.github/workflows/build-and-run.yml‎
Lines changed: 29 additions & 0 deletions b/‎.github/workflows/build-and-run.yml‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎.github/workflows/deploy-maven-central.yml‎
Lines changed: 1 addition & 2 deletions b/‎.github/workflows/deploy-maven-central.yml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 16 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎CITATION.cff‎
Lines changed: 2 additions & 2 deletions b/‎CITATION.cff‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎LlamaTornadoCli.java‎
Lines changed: 145 additions & 0 deletions b/‎LlamaTornadoCli.java‎
Lines changed: 145 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 4 additions & 1 deletion b/‎Makefile‎
Lines changed: 4 additions & 1 deletion
@@ -128,6 +128,20 @@ jobs:
           ./llama-tornado --gpu --${{ matrix.backend.name }} \
             --model /$MODELS_DIR/Phi-3-mini-4k-instruct-fp16.gguf \
             --prompt "Say hello"
+      - name: FP16 - Run Granite-3.2-2b-instruct-f16.gguf
+        run: |
+          cd ${{ github.workspace }}
+          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
+          ./llama-tornado --gpu --${{ matrix.backend.name }} \
+            --model /$MODELS_DIR/granite-3.2-2b-instruct-f16.gguf \
+            --prompt "Say hello"
+      - name: FP16 - Run Granite-4.0-1b-F16.gguf
+        run: |
+          cd ${{ github.workspace }}
+          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
+          ./llama-tornado --gpu --${{ matrix.backend.name }} \
+            --model /$MODELS_DIR/granite-4.0-1b-F16.gguf \
+            --prompt "Say hello"
       - name: Q8 - Run Llama-3.2-1B-Instruct-Q8_0.gguf
         run: |
           cd ${{ github.workspace }}
@@ -163,3 +177,18 @@ jobs:
           ./llama-tornado --gpu --${{ matrix.backend.name }} \
             --model $MODELS_DIR/Mistral-7B-Instruct-v0.3.Q8_0.gguf \
             --prompt "Say hello"
+      - name: Q8 - Run Granite-3.2-2b-instruct-Q8.gguf
+        run: |
+          cd ${{ github.workspace }}
+          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
+          ./llama-tornado --gpu --${{ matrix.backend.name }} \
+            --model /$MODELS_DIR/granite-3.2-2b-instruct-Q8_0.gguf \
+            --prompt "Say hello"
+      - name: Q8 - Run Granite-4.0-1b-Q8_0.gguf
+        run: |
+          cd ${{ github.workspace }}
+          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
+          ./llama-tornado --gpu --${{ matrix.backend.name }} \
+            --model /$MODELS_DIR/granite-4.0-1b-Q8_0.gguf \
+            --prompt "Say hello"
+
@@ -3,8 +3,7 @@ name: Deploy to Maven Central
 on:
   push:
     tags:
-      - 'v*'
-      - '[0-9]+.[0-9]+.[0-9]+*'
+      - 'v[0-9]+.[0-9]+.[0-9]+'
   workflow_run:
     workflows: ["Finalize GPULlama3 Release"]
     types: [completed]
 
@@ -2,6 +2,22 @@
 
 All notable changes to GPULlama3.java will be documented in this file.
 
+## [0.3.3] - 2025-12-19
+
+<!-- TODO: Add changes manually -->
+
+## [0.3.2] - 2025-12-18
+
+### Model Support
+
+- [models] Support for IBM Granite Models 3.2, 3.3 & 4.0 with FP16 and Q8 ([#92](https://github.com/beehive-lab/GPULlama3.java/pull/92))
+
+### Other Changes
+
+- [docs] Update docs to use SDKMAN! and point to TornadoVM 2.2.0 ([#93](https://github.com/beehive-lab/GPULlama3.java/pull/93))
+- Add JBang catalog and local usage examples to README.md ([#91](https://github.com/beehive-lab/GPULlama3.java/pull/91))
+- Add `jbang` script and configuration to make easy to run ([#90](https://github.com/beehive-lab/GPULlama3.java/pull/90))
+
 ## [0.3.1] - 2025-12-11
 
 ### Model Support
 
@@ -15,6 +15,6 @@ authors:
   given-names: "Christos"
 title: "GPULlama3.java"
 license: MIT License
-version: 0.3.1
-date-released: 2025-12-11
+version: 0.3.3
+date-released: 2025-12-19
 url: "https://github.com/beehive-lab/GPULlama3.java"
@@ -0,0 +1,145 @@
+//JAVA 21
+//PREVIEW
+//DEPS io.github.beehive-lab:gpu-llama3:0.3.2-dev
+//DEPS io.github.beehive-lab:tornado-api:2.2.0
+//DEPS io.github.beehive-lab:tornado-runtime:2.2.0
+
+//SOURCES TornadoFlags.java
+// === Set to not get annoying warnings about annotation processing
+//JAVAC_OPTIONS -proc:full
+
+// Compiler options
+//JAVAC_OPTIONS --enable-preview
+//JAVAC_OPTIONS --add-modules=jdk.incubator.vector
+
+// JVM options for basic setup
+//JAVA_OPTIONS --enable-preview
+//JAVA_OPTIONS --add-modules=jdk.incubator.vector
+
+package org.beehive.gpullama3.cli;
+
+import org.beehive.gpullama3.Options;
+import org.beehive.gpullama3.auxiliary.LastRunMetrics;
+import org.beehive.gpullama3.inference.sampler.Sampler;
+import org.beehive.gpullama3.model.Model;
+
+import java.io.IOException;
+
+import static org.beehive.gpullama3.inference.sampler.Sampler.createSampler;
+import static org.beehive.gpullama3.model.loader.ModelLoader.loadModel;
+
+/**
+ * LlamaTornadoCli - Pure Java CLI for running llama-tornado models
+ *
+ * This class provides a standalone command-line interface for running LLaMA models
+ * with TornadoVM acceleration. It can be executed directly with JBang or as a
+ * compiled Java application.
+ *
+ * Usage with JBang:
+ *   jbang LlamaTornadoCli.java --model path/to/model.gguf --prompt "Your prompt here"
+ *
+ * Usage as compiled application:
+ *   java --enable-preview --add-modules jdk.incubator.vector \
+ *        -cp target/gpu-llama3-0.3.1.jar \
+ *        org.beehive.gpullama3.cli.LlamaTornadoCli \
+ *        --model path/to/model.gguf --prompt "Your prompt here"
+ *
+ * Examples:
+ *   # Interactive chat mode
+ *   jbang LlamaTornadoCli.java -m model.gguf --interactive
+ *
+ *   # Single instruction mode
+ *   jbang LlamaTornadoCli.java -m model.gguf -p "Explain quantum computing"
+ *
+ *   # With TornadoVM acceleration
+ *   jbang LlamaTornadoCli.java -m model.gguf -p "Hello" --use-tornadovm true
+ *
+ *   # Custom temperature and sampling
+ *   jbang LlamaTornadoCli.java -m model.gguf -p "Tell me a story" \
+ *        --temperature 0.7 --top-p 0.9 --max-tokens 512
+ */
+public class LlamaTornadoCli {
+
+    // Configuration flags
+    public static final boolean USE_VECTOR_API = Boolean.parseBoolean(
+        System.getProperty("llama.VectorAPI", "true"));
+    public static final boolean SHOW_PERF_INTERACTIVE = Boolean.parseBoolean(
+        System.getProperty("llama.ShowPerfInteractive", "true"));
+
+    /**
+     * Run a single instruction and display the response
+     */
+    private static void runSingleInstruction(Model model, Sampler sampler, Options options) {
+        String response = model.runInstructOnce(sampler, options);
+        System.out.println(response);
+        if (SHOW_PERF_INTERACTIVE) {
+            LastRunMetrics.printMetrics();
+        }
+    }
+
+    /**
+     * Main entry point for the CLI application
+     *
+     * @param args command-line arguments (see Options.parseOptions for details)
+     * @throws IOException if model loading fails
+     */
+    public static void main(String[] args) throws IOException {
+        // Print banner
+        printBanner();
+
+        // Check if help requested
+        if (args.length == 0 || hasHelpFlag(args)) {
+            Options.printUsage(System.out);
+            System.exit(0);
+        }
+
+        try {
+            // Parse options
+            Options options = Options.parseOptions(args);
+
+            // Load model
+            Model model = loadModel(options);
+
+            // Create sampler
+            Sampler sampler = createSampler(model, options);
+
+            // Run in interactive or single-instruction mode
+            if (options.interactive()) {
+                System.out.println("Starting interactive chat mode...");
+                System.out.println("Type your messages below (Ctrl+C to exit):");
+                System.out.println();
+                model.runInteractive(sampler, options);
+            } else {
+                runSingleInstruction(model, sampler, options);
+            }
+        } catch (Exception e) {
+            System.err.println("Error: " + e.getMessage());
+            e.printStackTrace();
+            System.exit(1);
+        }
+    }
+
+    /**
+     * Check if help flag is present in arguments
+     */
+    private static boolean hasHelpFlag(String[] args) {
+        for (String arg : args) {
+            if (arg.equals("--help") || arg.equals("-h")) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Print ASCII banner
+     */
+    private static void printBanner() {
+        System.out.println("""
+            ╔══════════════════════════════════════════════════════════╗
+            ║        Llama-Tornado CLI - GPU-Accelerated LLM           ║
+            ║           Powered by TornadoVM & Java 21                 ║
+            ╚══════════════════════════════════════════════════════════╝
+            """);
+    }
+}
@@ -5,7 +5,7 @@
 MVN = ./mvnw
 
 # Default target
-all: package
+all: install
 
 # Build the project (clean and package without tests)
 build: clean package
@@ -14,6 +14,9 @@ build: clean package
 clean:
 	$(MVN) clean
 
+install:
+	$(MVN) install -DskipTests
+
 # Package the project without running tests
 package:
 	$(MVN) package -DskipTests