Skip to content

Commit 3400a85

Browse files
committed
Merge branch 'feat/jdk25-n-tornadovm-upd' of github.com:beehive-lab/GPULlama3.java into feat/jdk25-n-tornadovm-upd
2 parents 87cca38 + a000e36 commit 3400a85

33 files changed

+3252
-64
lines changed

.github/workflows/build-and-run.yml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,20 @@ jobs:
128128
./llama-tornado --gpu --${{ matrix.backend.name }} \
129129
--model /$MODELS_DIR/Phi-3-mini-4k-instruct-fp16.gguf \
130130
--prompt "Say hello"
131+
- name: FP16 - Run Granite-3.2-2b-instruct-f16.gguf
132+
run: |
133+
cd ${{ github.workspace }}
134+
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
135+
./llama-tornado --gpu --${{ matrix.backend.name }} \
136+
--model /$MODELS_DIR/granite-3.2-2b-instruct-f16.gguf \
137+
--prompt "Say hello"
138+
- name: FP16 - Run Granite-4.0-1b-F16.gguf
139+
run: |
140+
cd ${{ github.workspace }}
141+
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
142+
./llama-tornado --gpu --${{ matrix.backend.name }} \
143+
--model /$MODELS_DIR/granite-4.0-1b-F16.gguf \
144+
--prompt "Say hello"
131145
- name: Q8 - Run Llama-3.2-1B-Instruct-Q8_0.gguf
132146
run: |
133147
cd ${{ github.workspace }}
@@ -163,3 +177,18 @@ jobs:
163177
./llama-tornado --gpu --${{ matrix.backend.name }} \
164178
--model $MODELS_DIR/Mistral-7B-Instruct-v0.3.Q8_0.gguf \
165179
--prompt "Say hello"
180+
- name: Q8 - Run Granite-3.2-2b-instruct-Q8.gguf
181+
run: |
182+
cd ${{ github.workspace }}
183+
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
184+
./llama-tornado --gpu --${{ matrix.backend.name }} \
185+
--model /$MODELS_DIR/granite-3.2-2b-instruct-Q8_0.gguf \
186+
--prompt "Say hello"
187+
- name: Q8 - Run Granite-4.0-1b-Q8_0.gguf
188+
run: |
189+
cd ${{ github.workspace }}
190+
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
191+
./llama-tornado --gpu --${{ matrix.backend.name }} \
192+
--model /$MODELS_DIR/granite-4.0-1b-Q8_0.gguf \
193+
--prompt "Say hello"
194+

.github/workflows/deploy-maven-central.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@ name: Deploy to Maven Central
33
on:
44
push:
55
tags:
6-
- 'v*'
7-
- '[0-9]+.[0-9]+.[0-9]+*'
6+
- 'v[0-9]+.[0-9]+.[0-9]+'
87
workflow_run:
98
workflows: ["Finalize GPULlama3 Release"]
109
types: [completed]

CHANGELOG.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,22 @@
22

33
All notable changes to GPULlama3.java will be documented in this file.
44

5+
## [0.3.3] - 2025-12-19
6+
7+
<!-- TODO: Add changes manually -->
8+
9+
## [0.3.2] - 2025-12-18
10+
11+
### Model Support
12+
13+
- [models] Support for IBM Granite Models 3.2, 3.3 & 4.0 with FP16 and Q8 ([#92](https://github.com/beehive-lab/GPULlama3.java/pull/92))
14+
15+
### Other Changes
16+
17+
- [docs] Update docs to use SDKMAN! and point to TornadoVM 2.2.0 ([#93](https://github.com/beehive-lab/GPULlama3.java/pull/93))
18+
- Add JBang catalog and local usage examples to README.md ([#91](https://github.com/beehive-lab/GPULlama3.java/pull/91))
19+
- Add `jbang` script and configuration to make easy to run ([#90](https://github.com/beehive-lab/GPULlama3.java/pull/90))
20+
521
## [0.3.1] - 2025-12-11
622

723
### Model Support

CITATION.cff

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,6 @@ authors:
1515
given-names: "Christos"
1616
title: "GPULlama3.java"
1717
license: MIT License
18-
version: 0.3.1
19-
date-released: 2025-12-11
18+
version: 0.3.3
19+
date-released: 2025-12-19
2020
url: "https://github.com/beehive-lab/GPULlama3.java"

LlamaTornadoCli.java

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
//JAVA 21
2+
//PREVIEW
3+
//DEPS io.github.beehive-lab:gpu-llama3:0.3.2-dev
4+
//DEPS io.github.beehive-lab:tornado-api:2.2.0
5+
//DEPS io.github.beehive-lab:tornado-runtime:2.2.0
6+
7+
//SOURCES TornadoFlags.java
8+
// === Set to not get annoying warnings about annotation processing
9+
//JAVAC_OPTIONS -proc:full
10+
11+
// Compiler options
12+
//JAVAC_OPTIONS --enable-preview
13+
//JAVAC_OPTIONS --add-modules=jdk.incubator.vector
14+
15+
// JVM options for basic setup
16+
//JAVA_OPTIONS --enable-preview
17+
//JAVA_OPTIONS --add-modules=jdk.incubator.vector
18+
19+
package org.beehive.gpullama3.cli;
20+
21+
import org.beehive.gpullama3.Options;
22+
import org.beehive.gpullama3.auxiliary.LastRunMetrics;
23+
import org.beehive.gpullama3.inference.sampler.Sampler;
24+
import org.beehive.gpullama3.model.Model;
25+
26+
import java.io.IOException;
27+
28+
import static org.beehive.gpullama3.inference.sampler.Sampler.createSampler;
29+
import static org.beehive.gpullama3.model.loader.ModelLoader.loadModel;
30+
31+
/**
32+
* LlamaTornadoCli - Pure Java CLI for running llama-tornado models
33+
*
34+
* This class provides a standalone command-line interface for running LLaMA models
35+
* with TornadoVM acceleration. It can be executed directly with JBang or as a
36+
* compiled Java application.
37+
*
38+
* Usage with JBang:
39+
* jbang LlamaTornadoCli.java --model path/to/model.gguf --prompt "Your prompt here"
40+
*
41+
* Usage as compiled application:
42+
* java --enable-preview --add-modules jdk.incubator.vector \
43+
* -cp target/gpu-llama3-0.3.1.jar \
44+
* org.beehive.gpullama3.cli.LlamaTornadoCli \
45+
* --model path/to/model.gguf --prompt "Your prompt here"
46+
*
47+
* Examples:
48+
* # Interactive chat mode
49+
* jbang LlamaTornadoCli.java -m model.gguf --interactive
50+
*
51+
* # Single instruction mode
52+
* jbang LlamaTornadoCli.java -m model.gguf -p "Explain quantum computing"
53+
*
54+
* # With TornadoVM acceleration
55+
* jbang LlamaTornadoCli.java -m model.gguf -p "Hello" --use-tornadovm true
56+
*
57+
* # Custom temperature and sampling
58+
* jbang LlamaTornadoCli.java -m model.gguf -p "Tell me a story" \
59+
* --temperature 0.7 --top-p 0.9 --max-tokens 512
60+
*/
61+
public class LlamaTornadoCli {
62+
63+
// Configuration flags
64+
public static final boolean USE_VECTOR_API = Boolean.parseBoolean(
65+
System.getProperty("llama.VectorAPI", "true"));
66+
public static final boolean SHOW_PERF_INTERACTIVE = Boolean.parseBoolean(
67+
System.getProperty("llama.ShowPerfInteractive", "true"));
68+
69+
/**
70+
* Run a single instruction and display the response
71+
*/
72+
private static void runSingleInstruction(Model model, Sampler sampler, Options options) {
73+
String response = model.runInstructOnce(sampler, options);
74+
System.out.println(response);
75+
if (SHOW_PERF_INTERACTIVE) {
76+
LastRunMetrics.printMetrics();
77+
}
78+
}
79+
80+
/**
81+
* Main entry point for the CLI application
82+
*
83+
* @param args command-line arguments (see Options.parseOptions for details)
84+
* @throws IOException if model loading fails
85+
*/
86+
public static void main(String[] args) throws IOException {
87+
// Print banner
88+
printBanner();
89+
90+
// Check if help requested
91+
if (args.length == 0 || hasHelpFlag(args)) {
92+
Options.printUsage(System.out);
93+
System.exit(0);
94+
}
95+
96+
try {
97+
// Parse options
98+
Options options = Options.parseOptions(args);
99+
100+
// Load model
101+
Model model = loadModel(options);
102+
103+
// Create sampler
104+
Sampler sampler = createSampler(model, options);
105+
106+
// Run in interactive or single-instruction mode
107+
if (options.interactive()) {
108+
System.out.println("Starting interactive chat mode...");
109+
System.out.println("Type your messages below (Ctrl+C to exit):");
110+
System.out.println();
111+
model.runInteractive(sampler, options);
112+
} else {
113+
runSingleInstruction(model, sampler, options);
114+
}
115+
} catch (Exception e) {
116+
System.err.println("Error: " + e.getMessage());
117+
e.printStackTrace();
118+
System.exit(1);
119+
}
120+
}
121+
122+
/**
123+
* Check if help flag is present in arguments
124+
*/
125+
private static boolean hasHelpFlag(String[] args) {
126+
for (String arg : args) {
127+
if (arg.equals("--help") || arg.equals("-h")) {
128+
return true;
129+
}
130+
}
131+
return false;
132+
}
133+
134+
/**
135+
* Print ASCII banner
136+
*/
137+
private static void printBanner() {
138+
System.out.println("""
139+
╔══════════════════════════════════════════════════════════╗
140+
║ Llama-Tornado CLI - GPU-Accelerated LLM ║
141+
║ Powered by TornadoVM & Java 21 ║
142+
╚══════════════════════════════════════════════════════════╝
143+
""");
144+
}
145+
}

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
MVN = ./mvnw
66

77
# Default target
8-
all: package
8+
all: install
99

1010
# Build the project (clean and package without tests)
1111
build: clean package
@@ -14,6 +14,9 @@ build: clean package
1414
clean:
1515
$(MVN) clean
1616

17+
install:
18+
$(MVN) install -DskipTests
19+
1720
# Package the project without running tests
1821
package:
1922
$(MVN) package -DskipTests

0 commit comments

Comments
 (0)