forked from kherud/java-llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathLlamaIterator.java
More file actions
81 lines (70 loc) · 2.54 KB
/
Copy pathLlamaIterator.java
File metadata and controls
81 lines (70 loc) · 2.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
// SPDX-FileCopyrightText: 2023-2025 Konstantin Herud
//
// SPDX-License-Identifier: MIT
package net.ladenthin.llama;
import net.ladenthin.llama.json.CompletionResponseParser;
import java.util.Iterator;
import java.util.NoSuchElementException;
/**
* This iterator is used by {@link LlamaModel#generate(InferenceParameters)} and
* {@link LlamaModel#generateChat(InferenceParameters)}. In addition to implementing {@link Iterator},
* it allows to cancel ongoing inference (see {@link #cancel()}).
*
* <p>{@link LlamaIterator} implements {@link AutoCloseable}. When used via {@link LlamaIterable}
* inside a try-with-resources block, {@link #close()} is called automatically on early exit
* (e.g. {@code break}), preventing the native task slot from leaking.
*/
public final class LlamaIterator implements Iterator<LlamaOutput>, AutoCloseable {
private final LlamaModel model;
private final int taskId;
private final CompletionResponseParser completionParser = new CompletionResponseParser();
private boolean hasNext = true;
LlamaIterator(LlamaModel model, InferenceParameters parameters) {
this(model, parameters, false);
}
LlamaIterator(LlamaModel model, InferenceParameters parameters, boolean chat) {
this.model = model;
parameters.setStream(true);
taskId = chat
? model.requestChatCompletion(parameters.toString())
: model.requestCompletion(parameters.toString());
}
@Override
public boolean hasNext() {
return hasNext;
}
@Override
public LlamaOutput next() {
if (!hasNext) {
throw new NoSuchElementException();
}
String json = model.receiveCompletionJson(taskId);
LlamaOutput output = completionParser.parse(json);
hasNext = !output.stop;
if (output.stop) {
model.releaseTask(taskId);
}
return output;
}
/**
* Cancel the ongoing generation process.
*/
public void cancel() {
model.cancelCompletion(taskId);
hasNext = false;
}
/**
* Cancels any in-progress generation if the iterator has not yet reached a stop token.
* Safe to call multiple times — subsequent calls are no-ops.
*
* <p>Prefer using the enclosing {@link LlamaIterable} in a try-with-resources block rather
* than calling this directly.
*/
@Override
public void close() {
if (hasNext) {
cancel();
}
}
}