Skip to content

Commit d4b0db4

Browse files
committed
refactor: split SequenceDecoder into AnsiDecoder and UnicodeDecoder
1 parent d18af7f commit d4b0db4

6 files changed

Lines changed: 423 additions & 332 deletions

File tree

twinkle-screen/src/main/java/org/codejive/twinkle/screen/io/BufferWriter.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@
77
import org.codejive.twinkle.ansi.util.AnsiOutputParser.AnsiSequenceHandler;
88
import org.codejive.twinkle.screen.Buffer;
99
import org.codejive.twinkle.screen.Buffer.LinkPrintOption;
10-
import org.codejive.twinkle.text.SequenceDecoder;
1110
import org.codejive.twinkle.text.Size;
11+
import org.codejive.twinkle.text.UnicodeDecoder;
1212
import org.jspecify.annotations.NonNull;
1313

1414
public class BufferWriter extends Writer {
1515
protected Buffer buffer;
16-
protected SequenceDecoder decoder;
16+
protected UnicodeDecoder decoder;
1717
int cursorX;
1818
int cursorY;
1919
private int savedCursorX;
@@ -26,7 +26,7 @@ public class BufferWriter extends Writer {
2626

2727
public BufferWriter(@NonNull Buffer buffer) {
2828
this.buffer = buffer;
29-
this.decoder = new SequenceDecoder();
29+
this.decoder = new UnicodeDecoder();
3030
this.cursorX = 0;
3131
this.cursorY = 0;
3232
this.savedCursorX = 0;
@@ -73,7 +73,7 @@ public void flush() {
7373
}
7474
decoder.finish();
7575
if (decoder.isReady()) {
76-
if (decoder.state() == SequenceDecoder.State.ANSI_ESCAPE_SEQUENCE) {
76+
if (decoder.state() == UnicodeDecoder.ANSI) {
7777
handleAnsiSequence(decoder.toString());
7878
} else if (decoder.codepoint() == '\n') {
7979
cursorX = 0;
Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
package org.codejive.twinkle.ansi.util;
2+
3+
import org.codejive.twinkle.ansi.Constants;
4+
5+
/**
6+
* A base decoder for handling ANSI escape sequences. This class provides the foundation for
7+
* decoding character sequences, with a focus on identifying and parsing ANSI escape sequences.
8+
* Subclasses can extend this to add additional sequence handling.
9+
*
10+
* <p>Characters are pushed into the decoder while its state is INCOMPLETE. Once enough information
11+
* is available to determine the sequence type, the state changes accordingly. Use {@code reset()}
12+
* to clear the decoder and start a new sequence.
13+
*/
14+
public class AnsiDecoder {
15+
// State constants
16+
public static final int INCOMPLETE = 0;
17+
public static final int ANSI = 1;
18+
public static final int ERROR = 2;
19+
20+
protected enum AnsiMode {
21+
NONE,
22+
PREFIX,
23+
CSI,
24+
OSC
25+
}
26+
27+
protected final StringBuilder buffer = new StringBuilder();
28+
protected int state = INCOMPLETE;
29+
protected AnsiMode ansiMode = AnsiMode.NONE;
30+
protected boolean oscSeenEsc = false;
31+
32+
/**
33+
* Pushes a character value (as an int) into the decoder.
34+
*
35+
* <p>Accepts int values to support full Unicode range including supplementary characters. This
36+
* base implementation handles ANSI escape sequences. Subclasses should override {@link
37+
* #handleNonAnsi(int)} to provide additional handling for non-ANSI characters.
38+
*
39+
* @param c the character value to push
40+
*/
41+
public void push(int c) {
42+
if (!canPush(c)) {
43+
state = ERROR;
44+
return;
45+
}
46+
47+
if (Character.isSupplementaryCodePoint(c)) {
48+
buffer.append(Character.toChars(c));
49+
} else {
50+
buffer.append((char) c);
51+
}
52+
53+
if (ansiMode != AnsiMode.NONE) {
54+
char[] chars = Character.toChars(c);
55+
for (int i = 0; i < chars.length; i++) {
56+
pushAnsi(chars[i]);
57+
if (state == ERROR || state == ANSI) {
58+
break;
59+
}
60+
}
61+
return;
62+
}
63+
64+
if (c == Constants.ESC) {
65+
pushAnsi((char) c);
66+
return;
67+
}
68+
69+
handleNonAnsi(c);
70+
}
71+
72+
/**
73+
* Returns true if the given character value can be consumed as part of the currently decoded
74+
* sequence.
75+
*
76+
* <p>This is a non-mutating probe. Callers can use it to detect sequence boundaries without
77+
* relying on completion heuristics.
78+
*
79+
* @param c the character value to check
80+
* @return true if the character can be pushed
81+
*/
82+
public boolean canPush(int c) {
83+
if (state == ERROR || state == ANSI) {
84+
return false;
85+
}
86+
87+
if (!Character.isValidCodePoint(c)) {
88+
return false;
89+
}
90+
91+
if (ansiMode != AnsiMode.NONE) {
92+
return true;
93+
}
94+
95+
if (buffer.length() == 0) {
96+
return true;
97+
}
98+
99+
if (c == Constants.ESC) {
100+
return false;
101+
}
102+
103+
return canPushNonAnsi(c);
104+
}
105+
106+
/**
107+
* Finalizes pending state when no more input is available.
108+
*
109+
* <p>This base implementation resolves unterminated ANSI escapes as ANSI sequences. Subclasses
110+
* should override {@link #finishNonAnsi()} to handle additional finalization logic.
111+
*/
112+
public void finish() {
113+
if (state == ERROR) {
114+
return;
115+
}
116+
if (ansiMode != AnsiMode.NONE) {
117+
ansiMode = AnsiMode.NONE;
118+
state = ANSI;
119+
return;
120+
}
121+
finishNonAnsi();
122+
}
123+
124+
/** Resets the decoder to its initial state, clearing all accumulated data. */
125+
public void reset() {
126+
buffer.setLength(0);
127+
state = INCOMPLETE;
128+
ansiMode = AnsiMode.NONE;
129+
oscSeenEsc = false;
130+
resetNonAnsi();
131+
}
132+
133+
/** Returns true if the decoder has completed a sequence. */
134+
public boolean isComplete() {
135+
return state() != INCOMPLETE;
136+
}
137+
138+
/**
139+
* Returns the current state of the decoder.
140+
*
141+
* @return the current state as an int constant
142+
*/
143+
public int state() {
144+
return state;
145+
}
146+
147+
/** Returns the buffered sequence as a string. */
148+
@Override
149+
public String toString() {
150+
return buffer.toString();
151+
}
152+
153+
/** Handles ANSI escape sequence parsing logic. */
154+
protected void pushAnsi(char ch) {
155+
if (state == ANSI) {
156+
state = ERROR;
157+
return;
158+
}
159+
160+
if (ansiMode == AnsiMode.NONE) {
161+
if (ch == Constants.ESC) {
162+
ansiMode = AnsiMode.PREFIX;
163+
state = INCOMPLETE;
164+
return;
165+
}
166+
state = ERROR;
167+
return;
168+
}
169+
170+
if (ansiMode == AnsiMode.PREFIX) {
171+
if (ch == '[') {
172+
ansiMode = AnsiMode.CSI;
173+
state = INCOMPLETE;
174+
} else if (ch == ']') {
175+
ansiMode = AnsiMode.OSC;
176+
state = INCOMPLETE;
177+
oscSeenEsc = false;
178+
} else {
179+
state = ANSI;
180+
ansiMode = AnsiMode.NONE;
181+
}
182+
return;
183+
}
184+
185+
if (ansiMode == AnsiMode.CSI) {
186+
if (ch >= 0x40 && ch <= 0x7E) {
187+
state = ANSI;
188+
ansiMode = AnsiMode.NONE;
189+
} else {
190+
state = INCOMPLETE;
191+
}
192+
return;
193+
}
194+
195+
if (ansiMode == AnsiMode.OSC) {
196+
if (oscSeenEsc) {
197+
if (ch == '\\') {
198+
state = ANSI;
199+
ansiMode = AnsiMode.NONE;
200+
oscSeenEsc = false;
201+
return;
202+
}
203+
oscSeenEsc = (ch == Constants.ESC);
204+
state = INCOMPLETE;
205+
return;
206+
}
207+
if (ch == 0x07) {
208+
state = ANSI;
209+
ansiMode = AnsiMode.NONE;
210+
return;
211+
}
212+
oscSeenEsc = (ch == Constants.ESC);
213+
state = INCOMPLETE;
214+
}
215+
}
216+
217+
/**
218+
* Hook for subclasses to handle non-ANSI characters. Base implementation sets state to ERROR.
219+
*
220+
* @param c the character value to handle
221+
*/
222+
protected void handleNonAnsi(int c) {
223+
state = ERROR;
224+
}
225+
226+
/**
227+
* Hook for subclasses to check if a non-ANSI character can be pushed. Base implementation
228+
* returns false.
229+
*
230+
* @param c the character value to check
231+
* @return true if the character can be pushed
232+
*/
233+
protected boolean canPushNonAnsi(int c) {
234+
return false;
235+
}
236+
237+
/** Hook for subclasses to perform finalization of non-ANSI sequences. */
238+
protected void finishNonAnsi() {
239+
// Base implementation does nothing
240+
}
241+
242+
/** Hook for subclasses to reset non-ANSI state. */
243+
protected void resetNonAnsi() {
244+
// Base implementation does nothing
245+
}
246+
}

0 commit comments

Comments
 (0)