2121
2222enum TokenType {
2323 CODE_IDENTIFIER ,
24+ RAW_DATA ,
2425 WHITESPACE_NO_NEWLINE ,
2526 ERROR_SENTINEL ,
2627};
2728
29+ static inline void advance (TSLexer * lexer ) { lexer -> advance (lexer , false); }
30+
31+ static inline void skip (TSLexer * lexer ) { lexer -> advance (lexer , true); }
32+
2833static bool is_hexadecimal_character (char character ) {
2934 switch (character ) {
3035 case '0' :
@@ -67,7 +72,7 @@ static bool scan_code_identifier(TSLexer *lexer) {
6772 unsigned int const size = (sizeof (next_token_text ) / sizeof (char ) - 1 );
6873
6974 while (true) {
70- lexer -> advance (lexer , false );
75+ advance (lexer );
7176
7277 if (lexer -> lookahead == '\n' || lexer -> eof (lexer )) {
7378 lexer -> result_symbol = CODE_IDENTIFIER ;
@@ -150,6 +155,7 @@ static bool scan_whitespace_no_newline(TSLexer *lexer) {
150155 // mark_end was called, we have control over the size of a matched token.
151156 //
152157 lexer -> mark_end (lexer );
158+ lexer -> result_symbol = WHITESPACE_NO_NEWLINE ;
153159
154160 bool has_text = false;
155161
@@ -164,15 +170,14 @@ static bool scan_whitespace_no_newline(TSLexer *lexer) {
164170 case ' ' :
165171 case '\t' :
166172 has_text = true;
167- lexer -> result_symbol = WHITESPACE_NO_NEWLINE ;
168173 lexer -> mark_end (lexer );
169174
170175 break ;
171176 default :
172177 return false;
173178 };
174179
175- lexer -> advance (lexer , false );
180+ advance (lexer );
176181 }
177182}
178183
@@ -187,7 +192,50 @@ bool tree_sitter_objdump_external_scanner_scan(void *payload, TSLexer *lexer, co
187192 return false;
188193 }
189194
190- if (valid_symbols [WHITESPACE_NO_NEWLINE ]) {
195+ bool advanced_once = false;
196+
197+ if (valid_symbols [RAW_DATA ]) {
198+ while (iswspace (lexer -> lookahead ) && lexer -> lookahead != '\n' ) {
199+ skip (lexer );
200+ }
201+
202+ bool found_dot = false;
203+ uint8_t raw_data_count = 0 ;
204+
205+ // consume till newline, require at least one dot and require it to occur within the first 4 characters, and
206+ // don't parse any 0x's.
207+ while (lexer -> lookahead != '\n' ) {
208+ if (lexer -> lookahead == '.' && raw_data_count < 4 ) {
209+ found_dot = true;
210+ }
211+
212+ if (lexer -> lookahead == '0' ) {
213+ advance (lexer );
214+ advanced_once = true;
215+ raw_data_count ++ ;
216+ if (lexer -> lookahead == 'x' ) {
217+ return false;
218+ }
219+ }
220+
221+ advance (lexer );
222+ if (!iswspace (lexer -> lookahead )) {
223+ advanced_once = true;
224+ }
225+ raw_data_count ++ ;
226+ }
227+
228+ if (lexer -> lookahead == '\n' && found_dot && raw_data_count > 1 ) {
229+ lexer -> result_symbol = RAW_DATA ;
230+ return true;
231+ }
232+
233+ if (raw_data_count > 4 && !found_dot ) {
234+ return false;
235+ }
236+ }
237+
238+ if (valid_symbols [WHITESPACE_NO_NEWLINE ] && !advanced_once ) {
191239 return scan_whitespace_no_newline (lexer );
192240 }
193241
0 commit comments