11package com .yocto .yoclib .imap .protocol ;
22
3+ import java .nio .charset .StandardCharsets ;
34import java .util .ArrayList ;
45import java .util .List ;
56
6- public class ProtocolParser {
7+ public class ProtocolParser {
78
89 public static ProtocolObject [] parse (String input ) {
910 if (input == null || input .trim ().isEmpty ()) {
@@ -26,25 +27,127 @@ private static ProtocolObject parseToken(Tokenizer t) {
2627 t .skipWhitespace ();
2728 if (!t .hasMore ()) return null ;
2829
29- char c = t .peekChar ();
30+ char c = t .peekCharNoSkip ();
31+
32+ // Binary literal: ~{n} or ~{n+}
33+ if (c == '~' ) {
34+ t .consumeNoSkip ('~' );
35+ return parseBinaryLiteral (t );
36+ }
37+
38+ // Regular literal: {n} or {n+}
39+ if (c == '{' ) {
40+ return parseRegularLiteral (t );
41+ }
3042
3143 if (c == '(' ) return parseList (t );
3244 if (c == '[' ) return parseSubordinate (t );
33- if (c == '{' ) return parseLiteral (t );
34- if (c == '"' ) return parseQuoted (t ); // quoted string
45+ if (c == '"' ) return parseQuoted (t );
3546
47+ // Atom
3648 String atomValue = t .readAtom ();
3749 ProtocolAtom atom = new ProtocolAtom (atomValue );
3850
39- // Strict SectionPartial: no whitespace between atom and [ or <
4051 if (t .hasMore () && (t .isNextCharImmediate ('[' ) || t .isNextCharImmediate ('<' ))) {
4152 return parseSectionPartial (atom , t );
4253 }
4354
4455 return atom ;
4556 }
4657
47- private static ProtocolObject parseSectionPartial (ProtocolAtom baseAtom , Tokenizer t ) {
58+ // ====================== Split Literal Parsers ======================
59+
60+ /**
61+ * Parses regular literals: {n} or {n+}
62+ */
63+ private static ProtocolLiteral parseRegularLiteral (Tokenizer t ) {
64+ LiteralHeader header = parseLiteralHeader (t , false );
65+ t .skipCRLF ();
66+
67+ String data = t .readExactly (header .size );
68+ return new ProtocolLiteral (data , header .nonSynchronizing );
69+ }
70+
71+ /**
72+ * Parses binary literals: ~{n} or ~{n+}
73+ */
74+ private static ProtocolBinaryLiteral parseBinaryLiteral (Tokenizer t ) {
75+ LiteralHeader header = parseLiteralHeader (t , true );
76+ t .skipCRLF ();
77+
78+ String rawData = t .readExactly (header .size );
79+ byte [] binaryData = rawData .getBytes (StandardCharsets .ISO_8859_1 );
80+
81+ return new ProtocolBinaryLiteral (binaryData , header .nonSynchronizing );
82+ }
83+
84+ /**
85+ * Common helper: parses the header part {size} or {size+}
86+ * (tilde ~ is already consumed for binary case)
87+ */
88+ private static LiteralHeader parseLiteralHeader (Tokenizer t , boolean isBinary ) {
89+ t .consumeNoSkip ('{' );
90+
91+ StringBuilder sizeStr = new StringBuilder ();
92+ boolean nonSynchronizing = false ;
93+
94+ while (t .hasMore ()) {
95+ char c = t .peekCharNoSkip ();
96+ if (Character .isDigit (c )) {
97+ sizeStr .append (t .nextCharNoSkip ());
98+ } else if (c == '+' ) {
99+ nonSynchronizing = true ;
100+ t .consumeNoSkip ('+' );
101+ } else if (c == '}' ) {
102+ break ;
103+ } else {
104+ break ; // malformed
105+ }
106+ }
107+
108+ t .consumeNoSkip ('}' );
109+
110+ int size = 0 ;
111+ try {
112+ if (sizeStr .length () > 0 ) {
113+ size = Integer .parseInt (sizeStr .toString ());
114+ }
115+ } catch (Exception ignored ) {}
116+
117+ return new LiteralHeader (size , nonSynchronizing );
118+ }
119+
120+ // ====================== Other Parsers (unchanged) ======================
121+
122+ private static ProtocolList parseList (Tokenizer t ) {
123+ t .consume ('(' );
124+ List <ProtocolObject > elements = new ArrayList <>();
125+ while (t .hasMore ()) {
126+ t .skipWhitespace ();
127+ if (t .peekChar () == ')' ) {
128+ t .consume (')' );
129+ break ;
130+ }
131+ elements .add (parseToken (t ));
132+ }
133+ return new ProtocolList (elements .toArray (new ProtocolObject [0 ]));
134+ }
135+
136+ private static ProtocolSubordinate parseSubordinate (Tokenizer t ) {
137+ t .consume ('[' );
138+ List <ProtocolObject > elements = new ArrayList <>();
139+ while (t .hasMore ()) {
140+ t .skipWhitespace ();
141+ if (t .peekChar () == ']' ) {
142+ t .consume (']' );
143+ break ;
144+ }
145+ elements .add (parseToken (t ));
146+ }
147+ return new ProtocolSubordinate (elements .toArray (new ProtocolObject [0 ]));
148+ }
149+
150+ private static ProtocolSectionPartial parseSectionPartial (ProtocolAtom baseAtom , Tokenizer t ) {
48151 ProtocolSubordinate section = null ;
49152 Integer offset = null ;
50153 Integer length = null ;
@@ -97,62 +200,13 @@ private static PartialData parsePartialData(Tokenizer t) {
97200 return new PartialData (offset , length );
98201 }
99202
100- // ====================== Parsers ======================
101-
102- private static ProtocolList parseList (Tokenizer t ) {
103- t .consume ('(' );
104- List <ProtocolObject > elements = new ArrayList <>();
105- while (t .hasMore ()) {
106- t .skipWhitespace ();
107- if (t .peekChar () == ')' ) {
108- t .consume (')' );
109- break ;
110- }
111- elements .add (parseToken (t ));
112- }
113- return new ProtocolList (elements .toArray (new ProtocolObject [0 ]));
114- }
115-
116- private static ProtocolSubordinate parseSubordinate (Tokenizer t ) {
117- t .consume ('[' );
118- List <ProtocolObject > elements = new ArrayList <>();
119- while (t .hasMore ()) {
120- t .skipWhitespace ();
121- if (t .peekChar () == ']' ) {
122- t .consume (']' );
123- break ;
124- }
125- elements .add (parseToken (t ));
126- }
127- return new ProtocolSubordinate (elements .toArray (new ProtocolObject [0 ]));
128- }
129-
130- private static ProtocolLiteral parseLiteral (Tokenizer t ) {
131- String sizeToken = t .readUntil ('}' );
132- if (!sizeToken .startsWith ("{" ) || !sizeToken .endsWith ("}" )) {
133- return new ProtocolLiteral (sizeToken );
134- }
135-
136- int size = 0 ;
137- try {
138- size = Integer .parseInt (sizeToken .substring (1 , sizeToken .length () - 1 ));
139- } catch (Exception ignored ) {}
140-
141- t .skipCRLF ();
142- String data = t .readExactly (size );
143- return new ProtocolLiteral (data );
144- }
145-
146- /**
147- * Correct quoted string parser - preserves all whitespace inside quotes
148- */
149203 private static ProtocolQuoted parseQuoted (Tokenizer t ) {
150- t .consumeNoSkip ('"' ); // consume opening quote without whitespace skip
204+ t .consumeNoSkip ('"' );
151205
152206 StringBuilder sb = new StringBuilder ();
153207 boolean escaped = false ;
154208
155- while (t .pos < t .input .length ()) { // direct position check to avoid any skip
209+ while (t .pos < t .input .length ()) {
156210 char c = t .input .charAt (t .pos ++);
157211 if (escaped ) {
158212 sb .append (c );
@@ -162,23 +216,13 @@ private static ProtocolQuoted parseQuoted(Tokenizer t) {
162216 } else if (c == '"' ) {
163217 break ;
164218 } else {
165- sb .append (c ); // spaces and all characters preserved
219+ sb .append (c );
166220 }
167221 }
168222 return new ProtocolQuoted (sb .toString ());
169223 }
170224
171- private static class PartialData {
172- final Integer offset ;
173- final Integer length ;
174-
175- PartialData (Integer offset , Integer length ) {
176- this .offset = offset != null ? offset : 0 ;
177- this .length = length ;
178- }
179- }
180-
181- // ====================== Strict Tokenizer ======================
225+ // ====================== Tokenizer (unchanged) ======================
182226
183227 private static class Tokenizer {
184228 final String input ;
@@ -223,22 +267,14 @@ public String readAtom() {
223267 while (pos < input .length ()) {
224268 char c = input .charAt (pos );
225269 if (Character .isWhitespace (c ) || c == '(' || c == ')' || c == '[' || c == ']' ||
226- c == '"' || c == '{' || c == '<' || c == '>' ) {
270+ c == '"' || c == '{' || c == '}' || c == ' <' || c == '>' ) {
227271 break ;
228272 }
229273 pos ++;
230274 }
231275 return input .substring (start , pos );
232276 }
233277
234- public String readUntil (char endChar ) {
235- skipWhitespace ();
236- int start = pos ;
237- while (pos < input .length () && input .charAt (pos ) != endChar ) pos ++;
238- if (pos < input .length ()) pos ++;
239- return input .substring (start , pos );
240- }
241-
242278 public String readExactly (int count ) {
243279 if (count <= 0 ) return "" ;
244280 int toRead = Math .min (count , input .length () - pos );
@@ -261,4 +297,25 @@ public char nextCharNoSkip() {
261297 }
262298 }
263299
300+ // ====================== Helper Classes ======================
301+
302+ private static class LiteralHeader {
303+ final int size ;
304+ final boolean nonSynchronizing ;
305+
306+ LiteralHeader (int size , boolean nonSynchronizing ) {
307+ this .size = size ;
308+ this .nonSynchronizing = nonSynchronizing ;
309+ }
310+ }
311+
312+ private static class PartialData {
313+ final Integer offset ;
314+ final Integer length ;
315+
316+ PartialData (Integer offset , Integer length ) {
317+ this .offset = offset != null ? offset : 0 ;
318+ this .length = length ;
319+ }
320+ }
264321}
0 commit comments