Skip to content

Commit 5324c09

Browse files
authored
Merge pull request #105 from AdamBien/main
Add llamaTornado Java 25 single-file launcher with Metal backend support
2 parents 8a00ded + 9642299 commit 5324c09

3 files changed

Lines changed: 377 additions & 7 deletions

File tree

README.md

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,11 +119,9 @@ We are at the early stages of Java entering the AI world with features added to
119119
| | M4 Pro | 16.77 tokens/s | 8.56 tokens/s | (WIP) |
120120
| **AMD / OpenCL** | Radeon RX | (WIP) | (WIP) | (WIP) |
121121

122-
##### ⚠️ Note on Apple Silicon Performance
122+
##### Apple Silicon Support
123123

124-
TornadoVM currently runs on Apple Silicon via [OpenCL](https://developer.apple.com/opencl/), which has been officially deprecated since macOS 10.14.
125-
126-
Despite being deprecated, OpenCL can still run on Apple Silicon; albeit, with older drivers which do not support all optimizations of TornadoVM. Therefore, the performance is not optimal since TornadoVM does not have a Metal backend yet (it currently has OpenCL, PTX, and SPIR-V backends). We recommend using Apple silicon for development and for performance testing to use OpenCL/PTX compatible Nvidia GPUs for the time being (until we add a Metal backend to TornadoVM and start optimizing it).
124+
TornadoVM 4.0 includes a native [Metal](https://developer.apple.com/metal/) backend, enabling GPU-accelerated inference on Apple Silicon.
127125

128126
-----------
129127
## 📦 Maven Dependency
@@ -313,6 +311,14 @@ Enable GPU acceleration with Q8_0 quantization:
313311
./llama-tornado --gpu --verbose-init --model beehive-llama-3.2-1b-instruct-fp16.gguf --prompt "tell me a joke"
314312
```
315313

314+
#### Running with `llamaTornado` (Java 25 single-file script)
315+
316+
`llamaTornado` is a zero-dependency Java 25 single-file script that replaces the Python launcher. It requires `java 25+` on your PATH:
317+
318+
```bash
319+
./llamaTornado --gpu --verbose-init --metal --model /Users/abien/work/workspaces/llms/Mistral-7B-Instruct-v0.3.Q8_0.gguf --prompt "what is java"
320+
```
321+
316322
-----------
317323

318324
## 🐳 Docker

llamaTornado

Lines changed: 364 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,364 @@
1+
#!/usr/bin/env -S java --source 25
2+
3+
import module java.logging;
4+
5+
String name = MethodHandles.lookup().lookupClass().getName();
6+
String version = "2026-04-11.1";
7+
8+
enum Backend { OPENCL, PTX, METAL }
9+
10+
record Config(
11+
String modelPath, String prompt, String systemPrompt,
12+
double temperature, double topP, long seed, int maxTokens,
13+
boolean stream, boolean echo, boolean interactive, boolean instruct,
14+
boolean useGpu, Backend backend, String gpuMemory,
15+
String heapMin, String heapMax,
16+
boolean debug, boolean profiler, String profilerDumpDir,
17+
boolean printBytecodes, boolean threads, boolean printKernel,
18+
boolean fullDump, boolean verboseInit,
19+
boolean showCommand, boolean executeAfterShow,
20+
String openclFlags, int maxWaitEvents, boolean verbose
21+
) {}
22+
23+
Config parseArgs(String[] args) {
24+
String modelPath = null;
25+
String prompt = null;
26+
String systemPrompt = null;
27+
double temperature = 0.1;
28+
double topP = 0.95;
29+
long seed = System.currentTimeMillis() / 1000;
30+
int maxTokens = 512;
31+
boolean stream = true;
32+
boolean echo = false;
33+
boolean interactive = false;
34+
boolean instruct = true;
35+
boolean useGpu = false;
36+
Backend backend = Backend.OPENCL;
37+
String gpuMemory = "14GB";
38+
String heapMin = "20g";
39+
String heapMax = "20g";
40+
boolean debug = false;
41+
boolean profiler = false;
42+
String profilerDumpDir = null;
43+
boolean printBytecodes = false;
44+
boolean threads = false;
45+
boolean printKernel = false;
46+
boolean fullDump = false;
47+
boolean verboseInit = false;
48+
boolean showCommand = false;
49+
boolean executeAfterShow = false;
50+
String openclFlags = "-cl-denorms-are-zero -cl-no-signed-zeros -cl-finite-math-only";
51+
int maxWaitEvents = 32000;
52+
boolean verbose = false;
53+
54+
for (int i = 0; i < args.length; i++) {
55+
switch (args[i]) {
56+
case "--model", "-m" -> modelPath = args[++i];
57+
case "--prompt", "-p" -> prompt = args[++i];
58+
case "--system-prompt", "-sp" -> systemPrompt = args[++i];
59+
case "--temperature" -> temperature = Double.parseDouble(args[++i]);
60+
case "--top-p" -> topP = Double.parseDouble(args[++i]);
61+
case "--seed" -> seed = Long.parseLong(args[++i]);
62+
case "--max-tokens", "-n" -> maxTokens = Integer.parseInt(args[++i]);
63+
case "--stream" -> stream = Boolean.parseBoolean(args[++i]);
64+
case "--echo" -> echo = Boolean.parseBoolean(args[++i]);
65+
case "-i", "--interactive" -> { interactive = true; instruct = false; }
66+
case "--instruct" -> instruct = true;
67+
case "--gpu" -> useGpu = true;
68+
case "--opencl" -> backend = Backend.OPENCL;
69+
case "--ptx" -> backend = Backend.PTX;
70+
case "--metal" -> backend = Backend.METAL;
71+
case "--gpu-memory" -> gpuMemory = args[++i];
72+
case "--heap-min" -> heapMin = args[++i];
73+
case "--heap-max" -> heapMax = args[++i];
74+
case "--debug" -> debug = true;
75+
case "--profiler" -> profiler = true;
76+
case "--profiler-dump-dir" -> profilerDumpDir = args[++i];
77+
case "--print-bytecodes" -> printBytecodes = true;
78+
case "--print-threads" -> threads = true;
79+
case "--print-kernel" -> printKernel = true;
80+
case "--full-dump" -> fullDump = true;
81+
case "--verbose-init" -> verboseInit = true;
82+
case "--show-command" -> showCommand = true;
83+
case "--execute-after-show" -> executeAfterShow = true;
84+
case "--opencl-flags" -> openclFlags = args[++i];
85+
case "--max-wait-events" -> maxWaitEvents = Integer.parseInt(args[++i]);
86+
case "--verbose", "-v" -> verbose = true;
87+
default -> {
88+
System.err.println("Unknown option: " + args[i]);
89+
System.exit(1);
90+
}
91+
}
92+
}
93+
94+
if (modelPath == null) {
95+
System.err.println("Error: --model is required");
96+
printUsage();
97+
System.exit(1);
98+
}
99+
100+
if (profilerDumpDir == null) {
101+
profilerDumpDir = System.getenv("LLAMA_ROOT") + "/profiler-log.json";
102+
}
103+
104+
return new Config(modelPath, prompt, systemPrompt, temperature, topP, seed, maxTokens,
105+
stream, echo, interactive, instruct, useGpu, backend, gpuMemory, heapMin, heapMax,
106+
debug, profiler, profilerDumpDir, printBytecodes, threads, printKernel, fullDump,
107+
verboseInit, showCommand, executeAfterShow, openclFlags, maxWaitEvents, verbose);
108+
}
109+
110+
void printUsage() {
111+
IO.println("""
112+
Usage: %s --model <path> [options]
113+
114+
GPU-accelerated LLM runner using TornadoVM
115+
116+
Required:
117+
--model, -m <path> Path to the LLM gguf file
118+
119+
LLaMA Configuration:
120+
--prompt, -p <text> Input prompt
121+
--system-prompt, -sp System prompt
122+
--temperature <val> Sampling temperature (default: 0.1)
123+
--top-p <val> Top-p sampling (default: 0.95)
124+
--seed <val> Random seed (default: current timestamp)
125+
--max-tokens, -n <val> Max tokens to generate (default: 512)
126+
--stream <bool> Enable streaming (default: true)
127+
--echo <bool> Echo input prompt (default: false)
128+
129+
Mode:
130+
-i, --interactive Interactive/chat mode
131+
--instruct Instruction mode (default)
132+
133+
Hardware:
134+
--gpu Enable GPU acceleration
135+
--opencl Use OpenCL backend (default)
136+
--ptx Use PTX/CUDA backend
137+
--metal Use Metal backend (macOS)
138+
--gpu-memory <val> GPU memory allocation (default: 14GB)
139+
--heap-min <val> Min JVM heap (default: 20g)
140+
--heap-max <val> Max JVM heap (default: 20g)
141+
142+
Debug:
143+
--debug Enable debug output
144+
--profiler Enable TornadoVM profiler
145+
--profiler-dump-dir Profiler output directory
146+
--print-bytecodes Print bytecodes
147+
--print-threads Print thread info
148+
--print-kernel Print kernel info
149+
--full-dump Full debug dump
150+
--verbose-init TornadoVM init timing
151+
--show-command Display the full Java command
152+
--execute-after-show Execute after showing command
153+
--verbose, -v Verbose output
154+
155+
-help Show this help
156+
-version Show version
157+
""".formatted(name));
158+
}
159+
160+
String findLlamaJar(String llamaRoot) {
161+
var targetDir = Path.of(llamaRoot, "target");
162+
try (var stream = Files.newDirectoryStream(targetDir, "gpu-llama3-*-SNAPSHOT.jar")) {
163+
var jars = new ArrayList<Path>();
164+
stream.forEach(jars::add);
165+
if (jars.isEmpty()) {
166+
try (var fallback = Files.newDirectoryStream(targetDir, "gpu-llama3-*.jar")) {
167+
fallback.forEach(jars::add);
168+
}
169+
}
170+
if (jars.isEmpty()) {
171+
System.err.println("Error: No gpu-llama3 JAR found in " + targetDir);
172+
System.exit(1);
173+
}
174+
jars.sort(Comparator.reverseOrder());
175+
return jars.getFirst().toString();
176+
} catch (IOException e) {
177+
System.err.println("Error searching for JAR: " + e.getMessage());
178+
System.exit(1);
179+
return null;
180+
}
181+
}
182+
183+
String modulePath(String tornadoSdk) {
184+
var sep = System.getProperty("os.name").toLowerCase().contains("win") ? ";" : ":";
185+
return "." + sep + tornadoSdk + "/share/java/tornado";
186+
}
187+
188+
List<String> buildCommand(Config cfg, String javaHome, String tornadoSdk, String llamaRoot) {
189+
var cmd = new ArrayList<String>();
190+
191+
cmd.addAll(List.of(
192+
javaHome + "/bin/java",
193+
"-server",
194+
"-XX:+UnlockExperimentalVMOptions",
195+
"-XX:+EnableJVMCI",
196+
"-Xms" + cfg.heapMin(),
197+
"-Xmx" + cfg.heapMax(),
198+
"--enable-preview",
199+
"-Djava.library.path=" + tornadoSdk + "/lib",
200+
"-Djdk.module.showModuleResolution=false",
201+
"--module-path", modulePath(tornadoSdk)
202+
));
203+
204+
// TornadoVM configuration
205+
cmd.addAll(List.of(
206+
"-Dtornado.load.api.implementation=uk.ac.manchester.tornado.runtime.tasks.TornadoTaskGraph",
207+
"-Dtornado.load.runtime.implementation=uk.ac.manchester.tornado.runtime.TornadoCoreRuntime",
208+
"-Dtornado.load.tornado.implementation=uk.ac.manchester.tornado.runtime.common.Tornado",
209+
"-Dtornado.load.annotation.implementation=uk.ac.manchester.tornado.annotation.ASMClassVisitor",
210+
"-Dtornado.load.annotation.parallel=uk.ac.manchester.tornado.api.annotations.Parallel",
211+
"-Dtornado.tvm.maxbytecodesize=65536"
212+
));
213+
214+
if (cfg.useGpu()) cmd.add("-Duse.tornadovm=true");
215+
if (cfg.verboseInit()) cmd.add("-Dllama.EnableTimingForTornadoVMInit=true");
216+
217+
// Debug flags
218+
cmd.add("-Dtornado.debug=" + cfg.debug());
219+
cmd.add("-Dtornado.threadInfo=" + cfg.threads());
220+
cmd.add("-Dtornado.fullDebug=" + cfg.fullDump());
221+
cmd.add("-Dtornado.printKernel=" + cfg.printKernel());
222+
cmd.add("-Dtornado.print.bytecodes=" + cfg.printBytecodes());
223+
224+
// Runtime configuration
225+
cmd.addAll(List.of(
226+
"-Dtornado.device.memory=" + cfg.gpuMemory(),
227+
"-Dtornado.profiler=" + cfg.profiler(),
228+
"-Dtornado.log.profiler=false",
229+
"-Dtornado.profiler.dump.dir=" + cfg.profilerDumpDir(),
230+
"-Dtornado.enable.fastMathOptimizations=true",
231+
"-Dtornado.enable.mathOptimizations=false",
232+
"-Dtornado.enable.nativeFunctions=true",
233+
"-Dtornado.loop.interchange=true",
234+
"-Dtornado.eventpool.maxwaitevents=" + cfg.maxWaitEvents()
235+
));
236+
237+
if (cfg.backend() == Backend.OPENCL) {
238+
cmd.add("-Dtornado.opencl.compiler.flags=" + cfg.openclFlags());
239+
}
240+
241+
// Module configuration
242+
cmd.addAll(List.of(
243+
"--upgrade-module-path", tornadoSdk + "/share/java/graalJars",
244+
"@" + tornadoSdk + "/etc/exportLists/common-exports"
245+
));
246+
247+
switch (cfg.backend()) {
248+
case OPENCL -> {
249+
cmd.add("@" + tornadoSdk + "/etc/exportLists/opencl-exports");
250+
cmd.addAll(List.of("--add-modules",
251+
"ALL-SYSTEM,jdk.incubator.vector,tornado.runtime,tornado.annotation,tornado.drivers.common,tornado.drivers.opencl"));
252+
}
253+
case PTX -> {
254+
cmd.add("@" + tornadoSdk + "/etc/exportLists/ptx-exports");
255+
cmd.addAll(List.of("--add-modules",
256+
"ALL-SYSTEM,jdk.incubator.vector,tornado.runtime,tornado.annotation,tornado.drivers.common,tornado.drivers.ptx"));
257+
}
258+
case METAL -> {
259+
cmd.add("@" + tornadoSdk + "/etc/exportLists/metal-exports");
260+
cmd.addAll(List.of("--add-modules",
261+
"ALL-SYSTEM,jdk.incubator.vector,tornado.runtime,tornado.annotation,tornado.drivers.common,tornado.drivers.metal"));
262+
}
263+
}
264+
265+
cmd.addAll(List.of("-cp", findLlamaJar(llamaRoot), "org.beehive.gpullama3.LlamaApp"));
266+
267+
// LLaMA arguments
268+
cmd.addAll(List.of(
269+
"-m", cfg.modelPath(),
270+
"--temperature", String.valueOf(cfg.temperature()),
271+
"--top-p", String.valueOf(cfg.topP()),
272+
"--seed", String.valueOf(cfg.seed()),
273+
"--max-tokens", String.valueOf(cfg.maxTokens()),
274+
"--stream", String.valueOf(cfg.stream()),
275+
"--echo", String.valueOf(cfg.echo())
276+
));
277+
278+
if (cfg.prompt() != null) cmd.addAll(List.of("-p", cfg.prompt()));
279+
if (cfg.systemPrompt() != null) cmd.addAll(List.of("-sp", cfg.systemPrompt()));
280+
if (cfg.interactive()) cmd.add("--interactive");
281+
else if (cfg.instruct()) cmd.add("--instruct");
282+
283+
return cmd;
284+
}
285+
286+
String resolveLlamaRoot() {
287+
var envRoot = System.getenv("LLAMA_ROOT");
288+
if (envRoot != null && !envRoot.isBlank()) return envRoot;
289+
290+
// Derive from the script's own location, same as set_paths does
291+
try {
292+
var scriptPath = Path.of(MethodHandles.lookup().lookupClass()
293+
.getProtectionDomain().getCodeSource().getLocation().toURI());
294+
return scriptPath.getParent().toString();
295+
} catch (Exception e) {
296+
System.err.println("Error: LLAMA_ROOT not set and could not determine script location");
297+
System.err.println("Note: check set_path in root dir -> source set_path");
298+
System.exit(1);
299+
return null;
300+
}
301+
}
302+
303+
String requireEnv(String key) {
304+
var value = System.getenv(key);
305+
if (value == null || value.isBlank()) {
306+
System.err.println("Error: " + key + " is not set");
307+
System.err.println("Please ensure JAVA_HOME and TORNADOVM_HOME are defined");
308+
System.exit(1);
309+
}
310+
if (!Files.exists(Path.of(value))) {
311+
System.err.println("Error: " + key + " path does not exist: " + value);
312+
System.exit(1);
313+
}
314+
return value;
315+
}
316+
317+
void main(String... args) {
318+
if (args.length == 0 || args[0].equals("-help")) {
319+
printUsage();
320+
return;
321+
}
322+
if (args[0].equals("-version")) {
323+
IO.println(name + " " + version);
324+
return;
325+
}
326+
327+
var javaHome = requireEnv("JAVA_HOME");
328+
var tornadoSdk = requireEnv("TORNADOVM_HOME");
329+
var llamaRoot = resolveLlamaRoot();
330+
331+
var cfg = parseArgs(args);
332+
var cmd = buildCommand(cfg, javaHome, tornadoSdk, llamaRoot);
333+
334+
if (cfg.showCommand()) {
335+
IO.println("Full Java command:");
336+
IO.println("-".repeat(80));
337+
IO.println(String.join(" ", cmd));
338+
IO.println("-".repeat(80));
339+
IO.println();
340+
if (!cfg.executeAfterShow()) {
341+
IO.println("Command built successfully. Use --execute-after-show to run after displaying.");
342+
return;
343+
}
344+
}
345+
346+
if (cfg.verbose()) {
347+
IO.println("Executing command:");
348+
cmd.forEach(arg -> IO.println(" " + arg));
349+
IO.println();
350+
}
351+
352+
try {
353+
var process = new ProcessBuilder(cmd)
354+
.inheritIO()
355+
.start();
356+
System.exit(process.waitFor());
357+
} catch (InterruptedException e) {
358+
System.err.println("\nOperation cancelled");
359+
System.exit(130);
360+
} catch (IOException e) {
361+
System.err.println("Error: " + e.getMessage());
362+
System.exit(1);
363+
}
364+
}

0 commit comments

Comments
 (0)