66
77public final class ProtocolParser {
88
9- public static ProtocolObject [] parse (String input ) {
10- if (input == null || input .trim (). isEmpty () ) {
9+ public static ProtocolObject [] parse (byte [] input ) {
10+ if (input == null || input .length == 0 ) {
1111 return new ProtocolObject [0 ];
1212 }
1313
@@ -27,28 +27,29 @@ private static ProtocolObject parseToken(Tokenizer t) {
2727 t .skipWhitespace ();
2828 if (!t .hasMore ()) return null ;
2929
30- char c = t .peekCharNoSkip ();
30+ byte b = t .peekByteNoSkip ();
3131
32- // Binary literal: ~{n} or ~{n+} ← Only if ~ is immediately followed by {
33- if (c == '~' && t .isNextCharAfter ( '~' , '{' )) {
34- t .consumeNoSkip ('~' );
32+ // Binary literal: ~{n} or ~{n+}
33+ if (b == '~' && t .isNextByteAfter (( byte ) '~' , ( byte ) '{' )) {
34+ t .consumeNoSkip (( byte ) '~' );
3535 return parseBinaryLiteral (t );
3636 }
3737
3838 // Regular literal: {n} or {n+}
39- if (c == '{' ) {
39+ if (b == '{' ) {
4040 return parseRegularLiteral (t );
4141 }
4242
43- if (c == '(' ) return parseList (t );
44- if (c == '[' ) return parseSubordinate (t );
45- if (c == '"' ) return parseQuoted (t );
43+ if (b == '(' ) return parseList (t );
44+ if (b == '[' ) return parseSubordinate (t );
45+ if (b == '"' ) return parseQuoted (t );
4646
47- // Atom (includes lone "~", etc.)
47+ // Atom
4848 String atomValue = t .readAtom ();
4949 ProtocolAtom atom = new ProtocolAtom (atomValue );
5050
51- if (t .hasMore () && (t .isNextCharImmediate ('[' ) || t .isNextCharImmediate ('<' ))) {
51+ if (t .hasMore () &&
52+ (t .isNextByteImmediate ((byte ) '[' ) || t .isNextByteImmediate ((byte ) '<' ))) {
5253 return parseSectionPartial (atom , t );
5354 }
5455
@@ -58,41 +59,40 @@ private static ProtocolObject parseToken(Tokenizer t) {
5859 // ====================== Literal Parsers ======================
5960
6061 private static ProtocolLiteral parseRegularLiteral (Tokenizer t ) {
61- LiteralHeader header = parseLiteralHeader (t , false );
62+ LiteralHeader header = parseLiteralHeader (t );
6263 t .skipCRLF ();
63- String data = t .readExactly (header .size );
64- return new ProtocolLiteral (data , header .nonSynchronizing );
64+ byte [] data = t .readExactly (header .size );
65+ return new ProtocolLiteral (new String ( data , StandardCharsets . ISO_8859_1 ) , header .nonSynchronizing );
6566 }
6667
6768 private static ProtocolBinaryLiteral parseBinaryLiteral (Tokenizer t ) {
68- LiteralHeader header = parseLiteralHeader (t , true );
69+ LiteralHeader header = parseLiteralHeader (t );
6970 t .skipCRLF ();
70- String rawData = t .readExactly (header .size );
71- byte [] binaryData = rawData .getBytes (StandardCharsets .ISO_8859_1 );
72- return new ProtocolBinaryLiteral (binaryData , header .nonSynchronizing );
71+ byte [] data = t .readExactly (header .size );
72+ return new ProtocolBinaryLiteral (data , header .nonSynchronizing );
7373 }
7474
75- private static LiteralHeader parseLiteralHeader (Tokenizer t , boolean isBinary ) {
76- t .consumeNoSkip ('{' );
75+ private static LiteralHeader parseLiteralHeader (Tokenizer t ) {
76+ t .consumeNoSkip (( byte ) '{' );
7777
7878 StringBuilder sizeStr = new StringBuilder ();
7979 boolean nonSynchronizing = false ;
8080
8181 while (t .hasMore ()) {
82- char c = t .peekCharNoSkip ();
83- if (Character . isDigit ( c ) ) {
84- sizeStr .append (t . nextCharNoSkip ());
85- } else if (c == '+' ) {
82+ byte b = t .peekByteNoSkip ();
83+ if (b >= '0' && b <= '9' ) {
84+ sizeStr .append (( char ) t . nextByteNoSkip ());
85+ } else if (b == '+' ) {
8686 nonSynchronizing = true ;
87- t .consumeNoSkip ('+' );
88- } else if (c == '}' ) {
87+ t .consumeNoSkip (( byte ) '+' );
88+ } else if (b == '}' ) {
8989 break ;
9090 } else {
9191 break ;
9292 }
9393 }
9494
95- t .consumeNoSkip ('}' );
95+ t .consumeNoSkip (( byte ) '}' );
9696
9797 int size = 0 ;
9898 try {
@@ -107,12 +107,12 @@ private static LiteralHeader parseLiteralHeader(Tokenizer t, boolean isBinary) {
107107 // ====================== Other Parsers ======================
108108
109109 private static ProtocolList parseList (Tokenizer t ) {
110- t .consume ('(' );
110+ t .consume (( byte ) '(' );
111111 List <ProtocolObject > elements = new ArrayList <>();
112112 while (t .hasMore ()) {
113113 t .skipWhitespace ();
114- if (t .peekChar () == ')' ) {
115- t .consume (')' );
114+ if (t .peekByte () == ')' ) {
115+ t .consume (( byte ) ')' );
116116 break ;
117117 }
118118 elements .add (parseToken (t ));
@@ -121,12 +121,12 @@ private static ProtocolList parseList(Tokenizer t) {
121121 }
122122
123123 private static ProtocolSubordinate parseSubordinate (Tokenizer t ) {
124- t .consume ('[' );
124+ t .consume (( byte ) '[' );
125125 List <ProtocolObject > elements = new ArrayList <>();
126126 while (t .hasMore ()) {
127127 t .skipWhitespace ();
128- if (t .peekChar () == ']' ) {
129- t .consume (']' );
128+ if (t .peekByte () == ']' ) {
129+ t .consume (( byte ) ']' );
130130 break ;
131131 }
132132 elements .add (parseToken (t ));
@@ -139,11 +139,11 @@ private static ProtocolSectionPartial parseSectionPartial(ProtocolAtom baseAtom,
139139 Integer offset = null ;
140140 Integer length = null ;
141141
142- if (t .isNextCharImmediate ( '[' )) {
142+ if (t .isNextByteImmediate (( byte ) '[' )) {
143143 section = parseSubordinate (t );
144144 }
145145
146- if (t .hasMore () && t .isNextCharImmediate ( '<' )) {
146+ if (t .hasMore () && t .isNextByteImmediate (( byte ) '<' )) {
147147 PartialData pd = parsePartialData (t );
148148 offset = pd .offset ;
149149 length = pd .length ;
@@ -153,16 +153,16 @@ private static ProtocolSectionPartial parseSectionPartial(ProtocolAtom baseAtom,
153153 }
154154
155155 private static PartialData parsePartialData (Tokenizer t ) {
156- t .consumeNoSkip ('<' );
156+ t .consumeNoSkip (( byte ) '<' );
157157 StringBuilder sb = new StringBuilder ();
158158
159159 while (t .hasMore ()) {
160- char c = t .nextCharNoSkip ();
161- if (c == '>' ) {
162- t .consumeNoSkip ('>' );
160+ byte b = t .nextByteNoSkip ();
161+ if (b == '>' ) {
162+ t .consumeNoSkip (( byte ) '>' );
163163 break ;
164164 }
165- sb .append (c );
165+ sb .append (( char ) b );
166166 }
167167
168168 String content = sb .toString ().trim ();
@@ -188,13 +188,15 @@ private static PartialData parsePartialData(Tokenizer t) {
188188 }
189189
190190 private static ProtocolQuoted parseQuoted (Tokenizer t ) {
191- t .consumeNoSkip ('"' );
191+ t .consumeNoSkip (( byte ) '"' );
192192
193193 StringBuilder sb = new StringBuilder ();
194194 boolean escaped = false ;
195195
196- while (t .pos < t .input .length ()) {
197- char c = t .input .charAt (t .pos ++);
196+ while (t .hasMore ()) {
197+ byte b = t .nextByteNoSkip ();
198+ char c = (char ) b ;
199+
198200 if (escaped ) {
199201 sb .append (c );
200202 escaped = false ;
@@ -212,82 +214,83 @@ private static ProtocolQuoted parseQuoted(Tokenizer t) {
212214 // ====================== Tokenizer ======================
213215
214216 private static class Tokenizer {
215- final String input ;
217+ final byte [] input ;
216218 int pos = 0 ;
217219
218- Tokenizer (String input ) {
220+ Tokenizer (byte [] input ) {
219221 this .input = input ;
220222 }
221223
222224 public boolean hasMore () {
223225 skipWhitespace ();
224- return pos < input .length () ;
226+ return pos < input .length ;
225227 }
226228
227229 public void skipWhitespace () {
228- while (pos < input .length () && Character .isWhitespace (input . charAt ( pos ) )) {
230+ while (pos < input .length && Character .isWhitespace (input [ pos ] & 0xFF )) {
229231 pos ++;
230232 }
231233 }
232234
233235 public void skipCRLF () {
234- if (pos < input .length () && input . charAt ( pos ) == '\r' ) pos ++;
235- if (pos < input .length () && input . charAt ( pos ) == '\n' ) pos ++;
236+ if (pos < input .length && input [ pos ] == '\r' ) pos ++;
237+ if (pos < input .length && input [ pos ] == '\n' ) pos ++;
236238 }
237239
238- public char peekChar () {
240+ public byte peekByte () {
239241 skipWhitespace ();
240- return pos < input .length () ? input . charAt ( pos ) : '\0' ;
242+ return pos < input .length ? input [ pos ] : 0 ;
241243 }
242244
243- public char peekCharNoSkip () {
244- return pos < input .length () ? input . charAt ( pos ) : '\0' ;
245+ public byte peekByteNoSkip () {
246+ return pos < input .length ? input [ pos ] : 0 ;
245247 }
246248
247- public boolean isNextCharImmediate ( char expected ) {
248- return pos < input .length () && input . charAt ( pos ) == expected ;
249+ public boolean isNextByteImmediate ( byte expected ) {
250+ return pos < input .length && input [ pos ] == expected ;
249251 }
250252
251- public boolean isNextCharAfter ( char current , char expected ) {
252- if (pos >= input .length () || input . charAt ( pos ) != current ) {
253+ public boolean isNextByteAfter ( byte current , byte expected ) {
254+ if (pos >= input .length || input [ pos ] != current ) {
253255 return false ;
254256 }
255- return pos + 1 < input .length () && input . charAt ( pos + 1 ) == expected ;
257+ return pos + 1 < input .length && input [ pos + 1 ] == expected ;
256258 }
257259
258260 public String readAtom () {
259261 skipWhitespace ();
260262 int start = pos ;
261- while (pos < input .length () ) {
262- char c = input . charAt ( pos ) ;
263- if (Character .isWhitespace (c ) || c == '(' || c == ')' || c == '[' || c == ']' ||
264- c == '"' || c == '{' || c == '}' || c == '<' || c == '>' ) {
263+ while (pos < input .length ) {
264+ byte b = input [ pos ] ;
265+ if (Character .isWhitespace (b & 0xFF ) || b == '(' || b == ')' || b == '[' || b == ']' ||
266+ b == '"' || b == '{' || b == '}' || b == '<' || b == '>' ) {
265267 break ;
266268 }
267269 pos ++;
268270 }
269- return input . substring ( start , pos );
271+ return new String ( input , start , pos - start , StandardCharsets . US_ASCII );
270272 }
271273
272- public String readExactly (int count ) {
273- if (count <= 0 ) return "" ;
274- int toRead = Math .min (count , input .length () - pos );
275- String data = input .substring (pos , pos + toRead );
274+ public byte [] readExactly (int count ) {
275+ if (count <= 0 ) return new byte [0 ];
276+ int toRead = Math .min (count , input .length - pos );
277+ byte [] data = new byte [toRead ];
278+ System .arraycopy (input , pos , data , 0 , toRead );
276279 pos += toRead ;
277280 return data ;
278281 }
279282
280- public void consume (char expected ) {
283+ public void consume (byte expected ) {
281284 skipWhitespace ();
282- if (pos < input .length () && input . charAt ( pos ) == expected ) pos ++;
285+ if (pos < input .length && input [ pos ] == expected ) pos ++;
283286 }
284287
285- public void consumeNoSkip (char expected ) {
286- if (pos < input .length () && input . charAt ( pos ) == expected ) pos ++;
288+ public void consumeNoSkip (byte expected ) {
289+ if (pos < input .length && input [ pos ] == expected ) pos ++;
287290 }
288291
289- public char nextCharNoSkip () {
290- return pos < input .length () ? input . charAt ( pos ++) : '\0' ;
292+ public byte nextByteNoSkip () {
293+ return pos < input .length ? input [ pos ++] : 0 ;
291294 }
292295 }
293296
0 commit comments