Skip to content

Commit 812937a

Browse files
committed
Fix parser
1 parent 2ed4745 commit 812937a

2 files changed

Lines changed: 87 additions & 84 deletions

File tree

src/main/java/com/yocto/yoclib/imap/protocol/ProtocolParser.java

Lines changed: 77 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77
public final class ProtocolParser {
88

9-
public static ProtocolObject[] parse(String input) {
10-
if (input == null || input.trim().isEmpty()) {
9+
public static ProtocolObject[] parse(byte[] input) {
10+
if (input == null || input.length == 0) {
1111
return new ProtocolObject[0];
1212
}
1313

@@ -27,28 +27,29 @@ private static ProtocolObject parseToken(Tokenizer t) {
2727
t.skipWhitespace();
2828
if (!t.hasMore()) return null;
2929

30-
char c = t.peekCharNoSkip();
30+
byte b = t.peekByteNoSkip();
3131

32-
// Binary literal: ~{n} or ~{n+} ← Only if ~ is immediately followed by {
33-
if (c == '~' && t.isNextCharAfter('~', '{')) {
34-
t.consumeNoSkip('~');
32+
// Binary literal: ~{n} or ~{n+}
33+
if (b == '~' && t.isNextByteAfter((byte) '~', (byte) '{')) {
34+
t.consumeNoSkip((byte) '~');
3535
return parseBinaryLiteral(t);
3636
}
3737

3838
// Regular literal: {n} or {n+}
39-
if (c == '{') {
39+
if (b == '{') {
4040
return parseRegularLiteral(t);
4141
}
4242

43-
if (c == '(') return parseList(t);
44-
if (c == '[') return parseSubordinate(t);
45-
if (c == '"') return parseQuoted(t);
43+
if (b == '(') return parseList(t);
44+
if (b == '[') return parseSubordinate(t);
45+
if (b == '"') return parseQuoted(t);
4646

47-
// Atom (includes lone "~", etc.)
47+
// Atom
4848
String atomValue = t.readAtom();
4949
ProtocolAtom atom = new ProtocolAtom(atomValue);
5050

51-
if (t.hasMore() && (t.isNextCharImmediate('[') || t.isNextCharImmediate('<'))) {
51+
if (t.hasMore() &&
52+
(t.isNextByteImmediate((byte) '[') || t.isNextByteImmediate((byte) '<'))) {
5253
return parseSectionPartial(atom, t);
5354
}
5455

@@ -58,41 +59,40 @@ private static ProtocolObject parseToken(Tokenizer t) {
5859
// ====================== Literal Parsers ======================
5960

6061
private static ProtocolLiteral parseRegularLiteral(Tokenizer t) {
61-
LiteralHeader header = parseLiteralHeader(t, false);
62+
LiteralHeader header = parseLiteralHeader(t);
6263
t.skipCRLF();
63-
String data = t.readExactly(header.size);
64-
return new ProtocolLiteral(data, header.nonSynchronizing);
64+
byte[] data = t.readExactly(header.size);
65+
return new ProtocolLiteral(new String(data, StandardCharsets.ISO_8859_1), header.nonSynchronizing);
6566
}
6667

6768
private static ProtocolBinaryLiteral parseBinaryLiteral(Tokenizer t) {
68-
LiteralHeader header = parseLiteralHeader(t, true);
69+
LiteralHeader header = parseLiteralHeader(t);
6970
t.skipCRLF();
70-
String rawData = t.readExactly(header.size);
71-
byte[] binaryData = rawData.getBytes(StandardCharsets.ISO_8859_1);
72-
return new ProtocolBinaryLiteral(binaryData, header.nonSynchronizing);
71+
byte[] data = t.readExactly(header.size);
72+
return new ProtocolBinaryLiteral(data, header.nonSynchronizing);
7373
}
7474

75-
private static LiteralHeader parseLiteralHeader(Tokenizer t, boolean isBinary) {
76-
t.consumeNoSkip('{');
75+
private static LiteralHeader parseLiteralHeader(Tokenizer t) {
76+
t.consumeNoSkip((byte) '{');
7777

7878
StringBuilder sizeStr = new StringBuilder();
7979
boolean nonSynchronizing = false;
8080

8181
while (t.hasMore()) {
82-
char c = t.peekCharNoSkip();
83-
if (Character.isDigit(c)) {
84-
sizeStr.append(t.nextCharNoSkip());
85-
} else if (c == '+') {
82+
byte b = t.peekByteNoSkip();
83+
if (b >= '0' && b <= '9') {
84+
sizeStr.append((char) t.nextByteNoSkip());
85+
} else if (b == '+') {
8686
nonSynchronizing = true;
87-
t.consumeNoSkip('+');
88-
} else if (c == '}') {
87+
t.consumeNoSkip((byte) '+');
88+
} else if (b == '}') {
8989
break;
9090
} else {
9191
break;
9292
}
9393
}
9494

95-
t.consumeNoSkip('}');
95+
t.consumeNoSkip((byte) '}');
9696

9797
int size = 0;
9898
try {
@@ -107,12 +107,12 @@ private static LiteralHeader parseLiteralHeader(Tokenizer t, boolean isBinary) {
107107
// ====================== Other Parsers ======================
108108

109109
private static ProtocolList parseList(Tokenizer t) {
110-
t.consume('(');
110+
t.consume((byte) '(');
111111
List<ProtocolObject> elements = new ArrayList<>();
112112
while (t.hasMore()) {
113113
t.skipWhitespace();
114-
if (t.peekChar() == ')') {
115-
t.consume(')');
114+
if (t.peekByte() == ')') {
115+
t.consume((byte) ')');
116116
break;
117117
}
118118
elements.add(parseToken(t));
@@ -121,12 +121,12 @@ private static ProtocolList parseList(Tokenizer t) {
121121
}
122122

123123
private static ProtocolSubordinate parseSubordinate(Tokenizer t) {
124-
t.consume('[');
124+
t.consume((byte) '[');
125125
List<ProtocolObject> elements = new ArrayList<>();
126126
while (t.hasMore()) {
127127
t.skipWhitespace();
128-
if (t.peekChar() == ']') {
129-
t.consume(']');
128+
if (t.peekByte() == ']') {
129+
t.consume((byte) ']');
130130
break;
131131
}
132132
elements.add(parseToken(t));
@@ -139,11 +139,11 @@ private static ProtocolSectionPartial parseSectionPartial(ProtocolAtom baseAtom,
139139
Integer offset = null;
140140
Integer length = null;
141141

142-
if (t.isNextCharImmediate('[')) {
142+
if (t.isNextByteImmediate((byte) '[')) {
143143
section = parseSubordinate(t);
144144
}
145145

146-
if (t.hasMore() && t.isNextCharImmediate('<')) {
146+
if (t.hasMore() && t.isNextByteImmediate((byte) '<')) {
147147
PartialData pd = parsePartialData(t);
148148
offset = pd.offset;
149149
length = pd.length;
@@ -153,16 +153,16 @@ private static ProtocolSectionPartial parseSectionPartial(ProtocolAtom baseAtom,
153153
}
154154

155155
private static PartialData parsePartialData(Tokenizer t) {
156-
t.consumeNoSkip('<');
156+
t.consumeNoSkip((byte) '<');
157157
StringBuilder sb = new StringBuilder();
158158

159159
while (t.hasMore()) {
160-
char c = t.nextCharNoSkip();
161-
if (c == '>') {
162-
t.consumeNoSkip('>');
160+
byte b = t.nextByteNoSkip();
161+
if (b == '>') {
162+
t.consumeNoSkip((byte) '>');
163163
break;
164164
}
165-
sb.append(c);
165+
sb.append((char) b);
166166
}
167167

168168
String content = sb.toString().trim();
@@ -188,13 +188,15 @@ private static PartialData parsePartialData(Tokenizer t) {
188188
}
189189

190190
private static ProtocolQuoted parseQuoted(Tokenizer t) {
191-
t.consumeNoSkip('"');
191+
t.consumeNoSkip((byte) '"');
192192

193193
StringBuilder sb = new StringBuilder();
194194
boolean escaped = false;
195195

196-
while (t.pos < t.input.length()) {
197-
char c = t.input.charAt(t.pos++);
196+
while (t.hasMore()) {
197+
byte b = t.nextByteNoSkip();
198+
char c = (char) b;
199+
198200
if (escaped) {
199201
sb.append(c);
200202
escaped = false;
@@ -212,82 +214,83 @@ private static ProtocolQuoted parseQuoted(Tokenizer t) {
212214
// ====================== Tokenizer ======================
213215

214216
private static class Tokenizer {
215-
final String input;
217+
final byte[] input;
216218
int pos = 0;
217219

218-
Tokenizer(String input) {
220+
Tokenizer(byte[] input) {
219221
this.input = input;
220222
}
221223

222224
public boolean hasMore() {
223225
skipWhitespace();
224-
return pos < input.length();
226+
return pos < input.length;
225227
}
226228

227229
public void skipWhitespace() {
228-
while (pos < input.length() && Character.isWhitespace(input.charAt(pos))) {
230+
while (pos < input.length && Character.isWhitespace(input[pos] & 0xFF)) {
229231
pos++;
230232
}
231233
}
232234

233235
public void skipCRLF() {
234-
if (pos < input.length() && input.charAt(pos) == '\r') pos++;
235-
if (pos < input.length() && input.charAt(pos) == '\n') pos++;
236+
if (pos < input.length && input[pos] == '\r') pos++;
237+
if (pos < input.length && input[pos] == '\n') pos++;
236238
}
237239

238-
public char peekChar() {
240+
public byte peekByte() {
239241
skipWhitespace();
240-
return pos < input.length() ? input.charAt(pos) : '\0';
242+
return pos < input.length ? input[pos] : 0;
241243
}
242244

243-
public char peekCharNoSkip() {
244-
return pos < input.length() ? input.charAt(pos) : '\0';
245+
public byte peekByteNoSkip() {
246+
return pos < input.length ? input[pos] : 0;
245247
}
246248

247-
public boolean isNextCharImmediate(char expected) {
248-
return pos < input.length() && input.charAt(pos) == expected;
249+
public boolean isNextByteImmediate(byte expected) {
250+
return pos < input.length && input[pos] == expected;
249251
}
250252

251-
public boolean isNextCharAfter(char current, char expected) {
252-
if (pos >= input.length() || input.charAt(pos) != current) {
253+
public boolean isNextByteAfter(byte current, byte expected) {
254+
if (pos >= input.length || input[pos] != current) {
253255
return false;
254256
}
255-
return pos + 1 < input.length() && input.charAt(pos + 1) == expected;
257+
return pos + 1 < input.length && input[pos + 1] == expected;
256258
}
257259

258260
public String readAtom() {
259261
skipWhitespace();
260262
int start = pos;
261-
while (pos < input.length()) {
262-
char c = input.charAt(pos);
263-
if (Character.isWhitespace(c) || c == '(' || c == ')' || c == '[' || c == ']' ||
264-
c == '"' || c == '{' || c == '}' || c == '<' || c == '>') {
263+
while (pos < input.length) {
264+
byte b = input[pos];
265+
if (Character.isWhitespace(b & 0xFF) || b == '(' || b == ')' || b == '[' || b == ']' ||
266+
b == '"' || b == '{' || b == '}' || b == '<' || b == '>') {
265267
break;
266268
}
267269
pos++;
268270
}
269-
return input.substring(start, pos);
271+
return new String(input, start, pos - start, StandardCharsets.US_ASCII);
270272
}
271273

272-
public String readExactly(int count) {
273-
if (count <= 0) return "";
274-
int toRead = Math.min(count, input.length() - pos);
275-
String data = input.substring(pos, pos + toRead);
274+
public byte[] readExactly(int count) {
275+
if (count <= 0) return new byte[0];
276+
int toRead = Math.min(count, input.length - pos);
277+
byte[] data = new byte[toRead];
278+
System.arraycopy(input, pos, data, 0, toRead);
276279
pos += toRead;
277280
return data;
278281
}
279282

280-
public void consume(char expected) {
283+
public void consume(byte expected) {
281284
skipWhitespace();
282-
if (pos < input.length() && input.charAt(pos) == expected) pos++;
285+
if (pos < input.length && input[pos] == expected) pos++;
283286
}
284287

285-
public void consumeNoSkip(char expected) {
286-
if (pos < input.length() && input.charAt(pos) == expected) pos++;
288+
public void consumeNoSkip(byte expected) {
289+
if (pos < input.length && input[pos] == expected) pos++;
287290
}
288291

289-
public char nextCharNoSkip() {
290-
return pos < input.length() ? input.charAt(pos++) : '\0';
292+
public byte nextByteNoSkip() {
293+
return pos < input.length ? input[pos++] : 0;
291294
}
292295
}
293296

src/test/java/com/yocto/yoclib/imap/tests/protocol/ProtocolParserTest.java

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,54 +16,54 @@ public class ProtocolParserTest{
1616
public void testParsingBinaryLiteral(){
1717
assertArrayEquals(new ProtocolObject[]{
1818
new ProtocolAtom("~"),
19-
},ProtocolParser.parse("~\r\n"));
19+
},ProtocolParser.parse("~\r\n".getBytes()));
2020

2121
assertArrayEquals(new ProtocolObject[]{
2222
new ProtocolAtom("~/Mail/foo"),
23-
},ProtocolParser.parse("~/Mail/foo\r\n"));
23+
},ProtocolParser.parse("~/Mail/foo\r\n".getBytes()));
2424

2525
assertArrayEquals(new ProtocolObject[]{
2626
new ProtocolBinaryLiteral(""),
2727
new ProtocolAtom("abc"),
28-
},ProtocolParser.parse("~{0}\r\n abc\r\n"));
28+
},ProtocolParser.parse("~{0}\r\n abc\r\n".getBytes()));
2929

3030
assertArrayEquals(new ProtocolObject[]{
3131
new ProtocolBinaryLiteral("def"),
3232
new ProtocolAtom("abc"),
33-
},ProtocolParser.parse("~{3}\r\ndef abc\r\n"));
33+
},ProtocolParser.parse("~{3}\r\ndef abc\r\n".getBytes()));
3434

3535
assertArrayEquals(new ProtocolObject[]{
3636
new ProtocolBinaryLiteral("",true),
3737
new ProtocolAtom("abc"),
38-
},ProtocolParser.parse("~{0+}\r\n abc\r\n"));
38+
},ProtocolParser.parse("~{0+}\r\n abc\r\n".getBytes()));
3939

4040
assertArrayEquals(new ProtocolObject[]{
4141
new ProtocolBinaryLiteral("def",true),
4242
new ProtocolAtom("abc"),
43-
},ProtocolParser.parse("~{3+}\r\ndef abc\r\n"));
43+
},ProtocolParser.parse("~{3+}\r\ndef abc\r\n".getBytes()));
4444
}
4545

4646
@Test
4747
public void testParsingLiteral(){
4848
assertArrayEquals(new ProtocolObject[]{
4949
new ProtocolLiteral(""),
5050
new ProtocolAtom("abc"),
51-
},ProtocolParser.parse("{0}\r\n abc\r\n"));
51+
},ProtocolParser.parse("{0}\r\n abc\r\n".getBytes()));
5252

5353
assertArrayEquals(new ProtocolObject[]{
5454
new ProtocolLiteral("def"),
5555
new ProtocolAtom("abc"),
56-
},ProtocolParser.parse("{3}\r\ndef abc\r\n"));
56+
},ProtocolParser.parse("{3}\r\ndef abc\r\n".getBytes()));
5757

5858
assertArrayEquals(new ProtocolObject[]{
5959
new ProtocolLiteral("",true),
6060
new ProtocolAtom("abc"),
61-
},ProtocolParser.parse("{0+}\r\n abc\r\n"));
61+
},ProtocolParser.parse("{0+}\r\n abc\r\n".getBytes()));
6262

6363
assertArrayEquals(new ProtocolObject[]{
6464
new ProtocolLiteral("def",true),
6565
new ProtocolAtom("abc"),
66-
},ProtocolParser.parse("{3+}\r\ndef abc\r\n"));
66+
},ProtocolParser.parse("{3+}\r\ndef abc\r\n".getBytes()));
6767
}
6868

6969
@Test

0 commit comments

Comments
 (0)