11# frozen_string_literal: true
22
3+ require "set"
34require "strscan"
45require_relative "../../polyfill/append_as_bytes"
56
@@ -9,16 +10,17 @@ class Parser
910 # Accepts a list of prism tokens and converts them into the expected
1011 # format for the parser gem.
1112 class Lexer
13+ # These tokens are always skipped
14+ TYPES_ALWAYS_SKIP = %i[ IGNORED_NEWLINE __END__ EOF ] . to_set
15+ private_constant :TYPES_ALWAYS_SKIP
16+
1217 # The direct translating of types between the two lexers.
1318 TYPES = {
1419 # These tokens should never appear in the output of the lexer.
15- EOF : nil ,
1620 MISSING : nil ,
1721 NOT_PROVIDED : nil ,
18- IGNORED_NEWLINE : nil ,
1922 EMBDOC_END : nil ,
2023 EMBDOC_LINE : nil ,
21- __END__ : nil ,
2224
2325 # These tokens have more or less direct mappings.
2426 AMPERSAND : :tAMPER2 ,
@@ -194,18 +196,18 @@ class Lexer
194196 #
195197 # NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned
196198 # instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046
197- LAMBDA_TOKEN_TYPES = [ :kDO_LAMBDA , :tLAMBDA , :tLAMBEG ]
199+ LAMBDA_TOKEN_TYPES = [ :kDO_LAMBDA , :tLAMBDA , :tLAMBEG ] . to_set
198200
199201 # The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
200202 # The following token types are listed as those classified as `tLPAREN`.
201203 LPAREN_CONVERSION_TOKEN_TYPES = [
202204 :kBREAK , :kCASE , :tDIVIDE , :kFOR , :kIF , :kNEXT , :kRETURN , :kUNTIL , :kWHILE , :tAMPER , :tANDOP , :tBANG , :tCOMMA , :tDOT2 , :tDOT3 ,
203205 :tEQL , :tLPAREN , :tLPAREN2 , :tLPAREN_ARG , :tLSHFT , :tNL , :tOP_ASGN , :tOROP , :tPIPE , :tSEMI , :tSTRING_DBEG , :tUMINUS , :tUPLUS
204- ]
206+ ] . to_set
205207
206208 # Types of tokens that are allowed to continue a method call with comments in-between.
207209 # For these, the parser gem doesn't emit a newline token after the last comment.
208- COMMENT_CONTINUATION_TYPES = [ :COMMENT , :AMPERSAND_DOT , :DOT ]
210+ COMMENT_CONTINUATION_TYPES = [ :COMMENT , :AMPERSAND_DOT , :DOT ] . to_set
209211 private_constant :COMMENT_CONTINUATION_TYPES
210212
211213 # Heredocs are complex and require us to keep track of a bit of info to refer to later
@@ -252,7 +254,7 @@ def to_a
252254 while index < length
253255 token , state = lexed [ index ]
254256 index += 1
255- next if %i[ IGNORED_NEWLINE __END__ EOF ] . include? ( token . type )
257+ next if TYPES_ALWAYS_SKIP . include? ( token . type )
256258
257259 type = TYPES . fetch ( token . type )
258260 value = token . value
@@ -344,7 +346,7 @@ def to_a
344346 when :tSTRING_BEG
345347 next_token = lexed [ index ] [ 0 ]
346348 next_next_token = lexed [ index + 1 ] [ 0 ]
347- basic_quotes = [ " \" " , "'" ] . include? ( value )
349+ basic_quotes = value == '"' || value == "'"
348350
349351 if basic_quotes && next_token &.type == :STRING_END
350352 next_location = token . location . join ( next_token . location )
0 commit comments