diff --git a/lib/rdoc/generator/markup.rb b/lib/rdoc/generator/markup.rb index 54158c29ba..5bc0c5849a 100644 --- a/lib/rdoc/generator/markup.rb +++ b/lib/rdoc/generator/markup.rb @@ -125,9 +125,6 @@ def markup_code src = RDoc::TokenStream.to_html @token_stream - # add initial whitespace so that the indent gets calculated correctly - src.prepend(' ' * @token_stream.first[:char_no]) if source_language == 'ruby' && @token_stream.first - # dedent the source common_indent = src.length src.scan(/^ *(?=\S)/) do |whitespace| diff --git a/lib/rdoc/generator/template/aliki/css/rdoc.css b/lib/rdoc/generator/template/aliki/css/rdoc.css index e3e0aec650..50e810c6ec 100644 --- a/lib/rdoc/generator/template/aliki/css/rdoc.css +++ b/lib/rdoc/generator/template/aliki/css/rdoc.css @@ -1021,7 +1021,6 @@ main h6 a:hover { .ruby-ivar { color: var(--code-orange); } .ruby-operator { color: var(--code-green); } .ruby-identifier { color: var(--code-blue); } -.ruby-node { color: var(--code-purple); } .ruby-comment { color: var(--color-neutral-500); @@ -1037,7 +1036,6 @@ main h6 a:hover { [data-theme="dark"] .ruby-ivar { color: var(--code-orange); } [data-theme="dark"] .ruby-operator { color: var(--code-green); } [data-theme="dark"] .ruby-identifier { color: var(--code-blue); } -[data-theme="dark"] .ruby-node { color: var(--code-purple); } [data-theme="dark"] .ruby-comment { color: var(--color-neutral-400); diff --git a/lib/rdoc/generator/template/darkfish/css/rdoc.css b/lib/rdoc/generator/template/darkfish/css/rdoc.css index 87bf24c36e..6f1163affa 100644 --- a/lib/rdoc/generator/template/darkfish/css/rdoc.css +++ b/lib/rdoc/generator/template/darkfish/css/rdoc.css @@ -449,7 +449,6 @@ main h6 { .ruby-ivar { color: #B57614; } /* Brown */ .ruby-operator { color: #427B58; } /* Dark Teal */ .ruby-identifier { color: #076678; } /* Deep Teal */ -.ruby-node { color: #8F3F71; } /* Plum */ .ruby-comment { color: #928374; font-style: italic; } /* Gray */ .ruby-regexp { color: #8F3F71; } /* Plum */ .ruby-value { color: #AF3A03; } /* Dark Orange */ diff --git a/lib/rdoc/markup/to_html.rb b/lib/rdoc/markup/to_html.rb index 0da0081e70..9499118120 100644 --- a/lib/rdoc/markup/to_html.rb +++ b/lib/rdoc/markup/to_html.rb @@ -2,6 +2,7 @@ require 'cgi/escape' require 'cgi/util' unless defined?(CGI::EscapeExt) require 'prism' +require 'rdoc/parser/ruby_colorizer' ## # Outputs RDoc markup as HTML. @@ -321,6 +322,15 @@ def accept_paragraph(paragraph) @res << "
\n" end + # Generate syntax highlighted html for ruby-like text. + + def parsable_text_to_html(text) + tokens = RDoc::Parser::RubyColorizer.colorize(text) + result = RDoc::TokenStream.to_html tokens + result = result + "\n" unless result.end_with?("\n") + result + end + ## # Adds +verbatim+ to the output @@ -328,27 +338,17 @@ def accept_verbatim(verbatim) text = verbatim.text.rstrip format = verbatim.format - klass = nil - # Apply Ruby syntax highlighting if # - explicitly marked as Ruby (via ruby? which accepts :ruby or :rb) # - no format specified but the text is parseable as Ruby # Otherwise, add language class when applicable and skip Ruby highlighting - content = if verbatim.ruby? || (format.nil? && parseable?(text)) - begin - tokens = RDoc::Parser::RipperStateLex.parse text - klass = ' class="ruby"' - - result = RDoc::TokenStream.to_html tokens - result = result + "\n" unless "\n" == result[-1] - result - rescue - CGI.escapeHTML text - end - else - klass = " class=\"#{format}\"" if format - CGI.escapeHTML text - end + if verbatim.ruby? || (format.nil? && parseable?(text)) + content = parsable_text_to_html(text) + klass = ' class="ruby"' + else + content = CGI.escapeHTML text + klass = " class=\"#{format}\"" if format + end if @pipe @res << "\n#{CGI.escapeHTML text}\n\n"
diff --git a/lib/rdoc/parser/ripper_state_lex.rb b/lib/rdoc/parser/ripper_state_lex.rb
deleted file mode 100644
index 2212906bbd..0000000000
--- a/lib/rdoc/parser/ripper_state_lex.rb
+++ /dev/null
@@ -1,302 +0,0 @@
-# frozen_string_literal: true
-require 'ripper'
-
-##
-# Wrapper for Ripper lex states
-
-class RDoc::Parser::RipperStateLex
- # :stopdoc:
-
- Token = Struct.new(:line_no, :char_no, :kind, :text, :state)
-
- EXPR_END = Ripper::EXPR_END
- EXPR_ENDFN = Ripper::EXPR_ENDFN
- EXPR_ARG = Ripper::EXPR_ARG
- EXPR_FNAME = Ripper::EXPR_FNAME
-
- class InnerStateLex < Ripper::Filter
- def initialize(code)
- super(code)
- end
-
- def on_default(event, tok, data)
- data << Token.new(lineno, column, event, tok, state)
- end
- end
-
- def get_squashed_tk
- if @buf.empty?
- tk = @tokens.shift
- else
- tk = @buf.shift
- end
- return nil if tk.nil?
- case tk[:kind]
- when :on_symbeg then
- tk = get_symbol_tk(tk)
- when :on_tstring_beg then
- tk = get_string_tk(tk)
- when :on_backtick then
- if (tk[:state] & (EXPR_FNAME | EXPR_ENDFN)) != 0
- tk[:kind] = :on_ident
- tk[:state] = Ripper::Lexer::State.new(EXPR_ARG)
- else
- tk = get_string_tk(tk)
- end
- when :on_regexp_beg then
- tk = get_regexp_tk(tk)
- when :on_embdoc_beg then
- tk = get_embdoc_tk(tk)
- when :on_heredoc_beg then
- @heredoc_queue << retrieve_heredoc_info(tk)
- when :on_nl, :on_ignored_nl, :on_comment, :on_heredoc_end then
- if !@heredoc_queue.empty?
- get_heredoc_tk(*@heredoc_queue.shift)
- elsif tk[:text].nil? # :on_ignored_nl sometimes gives nil
- tk[:text] = ''
- end
- when :on_words_beg then
- tk = get_words_tk(tk)
- when :on_qwords_beg then
- tk = get_words_tk(tk)
- when :on_symbols_beg then
- tk = get_words_tk(tk)
- when :on_qsymbols_beg then
- tk = get_words_tk(tk)
- when :on_op then
- if '&.' == tk[:text]
- tk[:kind] = :on_period
- else
- tk = get_op_tk(tk)
- end
- end
- tk
- end
-
- private def get_symbol_tk(tk)
- is_symbol = true
- symbol_tk = Token.new(tk.line_no, tk.char_no, :on_symbol)
- if ":'" == tk[:text] or ':"' == tk[:text] or tk[:text].start_with?('%s')
- tk1 = get_string_tk(tk)
- symbol_tk[:text] = tk1[:text]
- symbol_tk[:state] = tk1[:state]
- else
- case (tk1 = get_squashed_tk)[:kind]
- when :on_ident
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- when :on_tstring_content
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = get_squashed_tk[:state] # skip :on_tstring_end
- when :on_tstring_end
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- when :on_op
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- when :on_ivar
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- when :on_cvar
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- when :on_gvar
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- when :on_const
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- when :on_kw
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- else
- is_symbol = false
- tk = tk1
- end
- end
- if is_symbol
- tk = symbol_tk
- end
- tk
- end
-
- private def get_string_tk(tk)
- string = tk[:text]
- state = nil
- kind = :on_tstring
- loop do
- inner_str_tk = get_squashed_tk
- if inner_str_tk.nil?
- break
- elsif :on_tstring_end == inner_str_tk[:kind]
- string = string + inner_str_tk[:text]
- state = inner_str_tk[:state]
- break
- elsif :on_label_end == inner_str_tk[:kind]
- string = string + inner_str_tk[:text]
- state = inner_str_tk[:state]
- kind = :on_symbol
- break
- else
- string = string + inner_str_tk[:text]
- if :on_embexpr_beg == inner_str_tk[:kind] then
- kind = :on_dstring if :on_tstring == kind
- end
- end
- end
- Token.new(tk.line_no, tk.char_no, kind, string, state)
- end
-
- private def get_regexp_tk(tk)
- string = tk[:text]
- state = nil
- loop do
- inner_str_tk = get_squashed_tk
- if inner_str_tk.nil?
- break
- elsif :on_regexp_end == inner_str_tk[:kind]
- string = string + inner_str_tk[:text]
- state = inner_str_tk[:state]
- break
- else
- string = string + inner_str_tk[:text]
- end
- end
- Token.new(tk.line_no, tk.char_no, :on_regexp, string, state)
- end
-
- private def get_embdoc_tk(tk)
- string = tk[:text]
- until :on_embdoc_end == (embdoc_tk = get_squashed_tk)[:kind] do
- string = string + embdoc_tk[:text]
- end
- string = string + embdoc_tk[:text]
- Token.new(tk.line_no, tk.char_no, :on_embdoc, string, embdoc_tk.state)
- end
-
- private def get_heredoc_tk(heredoc_name, indent)
- string = ''
- start_tk = nil
- prev_tk = nil
- until heredoc_end?(heredoc_name, indent, tk = @tokens.shift) do
- start_tk = tk unless start_tk
- if (prev_tk.nil? or "\n" == prev_tk[:text][-1]) and 0 != tk[:char_no]
- string = string + (' ' * tk[:char_no])
- end
- string = string + tk[:text]
- prev_tk = tk
- end
- start_tk = tk unless start_tk
- prev_tk = tk unless prev_tk
- @buf.unshift tk # closing heredoc
- heredoc_tk = Token.new(start_tk.line_no, start_tk.char_no, :on_heredoc, string, prev_tk.state)
- @buf.unshift heredoc_tk
- end
-
- private def retrieve_heredoc_info(tk)
- name = tk[:text].gsub(/\A<<[-~]?(['"`]?)(.+)\1\z/, '\2')
- indent = tk[:text] =~ /\A<<[-~]/
- [name, indent]
- end
-
- private def heredoc_end?(name, indent, tk)
- result = false
- if :on_heredoc_end == tk[:kind] then
- tk_name = tk[:text].chomp
- tk_name.lstrip! if indent
- if name == tk_name
- result = true
- end
- end
- result
- end
-
- private def get_words_tk(tk)
- string = ''
- start_token = tk[:text]
- start_quote = tk[:text].rstrip[-1]
- line_no = tk[:line_no]
- char_no = tk[:char_no]
- state = tk[:state]
- end_quote =
- case start_quote
- when ?( then ?)
- when ?[ then ?]
- when ?{ then ?}
- when ?< then ?>
- else start_quote
- end
- end_token = nil
- loop do
- tk = get_squashed_tk
- if tk.nil?
- end_token = end_quote
- break
- elsif :on_tstring_content == tk[:kind] then
- string += tk[:text]
- elsif :on_words_sep == tk[:kind] or :on_tstring_end == tk[:kind] then
- if end_quote == tk[:text].strip then
- end_token = tk[:text]
- break
- else
- string += tk[:text]
- end
- else
- string += tk[:text]
- end
- end
- text = "#{start_token}#{string}#{end_token}"
- Token.new(line_no, char_no, :on_dstring, text, state)
- end
-
- private def get_op_tk(tk)
- redefinable_operators = %w[! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~]
- if redefinable_operators.include?(tk[:text]) and tk[:state] == EXPR_ARG then
- tk[:state] = Ripper::Lexer::State.new(EXPR_ARG)
- tk[:kind] = :on_ident
- elsif tk[:text] =~ /^[-+]$/ then
- tk_ahead = get_squashed_tk
- case tk_ahead[:kind]
- when :on_int, :on_float, :on_rational, :on_imaginary then
- tk[:text] += tk_ahead[:text]
- tk[:kind] = tk_ahead[:kind]
- tk[:state] = tk_ahead[:state]
- when :on_heredoc_beg, :on_tstring, :on_dstring # frozen/non-frozen string literal
- tk[:text] += tk_ahead[:text]
- tk[:kind] = tk_ahead[:kind]
- tk[:state] = tk_ahead[:state]
- else
- @buf.unshift tk_ahead
- end
- end
- tk
- end
-
- # :startdoc:
-
- # New lexer for +code+.
- def initialize(code)
- @buf = []
- @heredoc_queue = []
- @inner_lex = InnerStateLex.new(code)
- @tokens = @inner_lex.parse([])
- end
-
- # Returns tokens parsed from +code+.
- def self.parse(code)
- lex = self.new(code)
- tokens = []
- begin
- while tk = lex.get_squashed_tk
- tokens.push tk
- end
- rescue StopIteration
- end
- tokens
- end
-
- # Returns +true+ if lex state will be +END+ after +token+.
- def self.end?(token)
- (token[:state] & EXPR_END)
- end
-end
diff --git a/lib/rdoc/parser/ruby.rb b/lib/rdoc/parser/ruby.rb
index 0a0f690bac..0100b04b45 100644
--- a/lib/rdoc/parser/ruby.rb
+++ b/lib/rdoc/parser/ruby.rb
@@ -1,7 +1,7 @@
# frozen_string_literal: true
require 'prism'
-require_relative 'ripper_state_lex'
+require_relative 'ruby_colorizer'
# Parse and collect document from Ruby source code.
@@ -198,10 +198,12 @@ def record_location(container) # :nodoc:
# Scans this Ruby file for Ruby constructs
def scan
- @tokens = RDoc::Parser::RipperStateLex.parse(@content)
@lines = @content.lines
- result = Prism.parse(@content)
- @program_node = result.value
+ result = Prism.parse_lex(@content)
+ @program_node, unordered_tokens = result.value
+ # Heredoc tokens are not in start_offset order.
+ # Need to sort them to use bsearch for finding tokens from location.
+ @prism_tokens = unordered_tokens.map(&:first).sort_by { |t| t.location.start_offset }
@line_nodes = {}
prepare_line_nodes(@program_node)
prepare_comments(result.comments)
@@ -314,7 +316,7 @@ def parse_comment_tomdoc(container, comment, line_no, start_line)
meth.start_collecting_tokens(:ruby)
node = @line_nodes[line_no]
- tokens = node ? visible_tokens_from_location(node.location) : []
+ tokens = node ? syntax_highlighted_tokens(node) : []
tokens.each { |token| meth.token_stream << token }
container.add_method meth
@@ -382,7 +384,7 @@ def handle_meta_method_comment(comment, directives, node)
elsif line_no || node
method_name ||= call_node_name_arguments(node).first if is_call_node
if node
- tokens = visible_tokens_from_location(node.location)
+ tokens = syntax_highlighted_tokens(node)
line_no = node.location.start_line
else
tokens = []
@@ -490,21 +492,10 @@ def extract_section_comment(comment_text, prefix_line_count) # :nodoc:
comment_text
end
- def slice_tokens(start_pos, end_pos) # :nodoc:
- start_index = @tokens.bsearch_index { |t| ([t.line_no, t.char_no] <=> start_pos) >= 0 }
- end_index = @tokens.bsearch_index { |t| ([t.line_no, t.char_no] <=> end_pos) >= 0 }
- tokens = @tokens[start_index...end_index]
- tokens.pop if tokens.last&.kind == :on_nl
- tokens
- end
-
- # Returns tokens from the given location
+ # Returns syntax highlighted tokens of the given node
- def visible_tokens_from_location(location)
- slice_tokens(
- [location.start_line, location.start_character_column],
- [location.end_line, location.end_character_column]
- )
+ def syntax_highlighted_tokens(node)
+ RDoc::Parser::RubyColorizer.partial_colorize(@content, node, @prism_tokens)
end
# Handles `public :foo, :bar` `private :foo, :bar` and `protected :foo, :bar`
@@ -1018,7 +1009,7 @@ def visit_def_node(node)
end
name = node.name.to_s
params, block_params, calls_super = MethodSignatureVisitor.scan_signature(node)
- tokens = @scanner.visible_tokens_from_location(node.location)
+ tokens = @scanner.syntax_highlighted_tokens(node)
@scanner.add_method(
name,
diff --git a/lib/rdoc/parser/ruby_colorizer.rb b/lib/rdoc/parser/ruby_colorizer.rb
new file mode 100644
index 0000000000..4765e79c3d
--- /dev/null
+++ b/lib/rdoc/parser/ruby_colorizer.rb
@@ -0,0 +1,249 @@
+# frozen_string_literal: true
+
+require 'prism'
+require 'set'
+
+# Ruby code syntax highlighter.
+# Colorize result is an array of +RDoc::Parser::RubyColorizer::ColoredToken+
+# Actual color for each token kind is determined elsewhere (e.g., HTML generator)
+module RDoc::Parser::RubyColorizer
+
+ ColoredToken = Struct.new(:kind, :text)
+
+ # Prism operator token types except assignment '='
+ OP_TOKENS = %i[
+ AMPERSAND AMPERSAND_AMPERSAND
+ BANG BANG_EQUAL BANG_TILDE CARET COLON COLON_COLON
+ EQUAL_EQUAL EQUAL_GREATER EQUAL_TILDE
+ GREATER GREATER_GREATER
+ LESS LESS_EQUAL LESS_EQUAL_GREATER LESS_LESS
+ MINUS MINUS_GREATER PERCENT PIPE PIPE_PIPE PLUS
+ QUESTION_MARK SLASH STAR STAR_STAR TILDE
+ UAMPERSAND UMINUS UPLUS USTAR USTAR_STAR
+ ].to_set
+
+ # Prism token type to ColoredToken kind map
+ TOKEN_TYPE_MAP = {
+ IDENTIFIER: :identifier,
+ METHOD_NAME: :identifier,
+ INSTANCE_VARIABLE: :ivar,
+ CLASS_VARIABLE: :identifier,
+ GLOBAL_VARIABLE: :identifier,
+ BACK_REFERENCE: :identifier,
+ NUMBERED_REFERENCE: :identifier,
+ CONSTANT: :constant,
+ LABEL: :value,
+ INTEGER: :value,
+ FLOAT: :value,
+ RATIONAL: :value,
+ IMAGINARY: :value,
+ COMMENT: :comment,
+ EMBDOC_BEGIN: :comment,
+ EMBDOC_LINE: :comment,
+ EMBDOC_END: :comment
+ }
+
+ class << self
+
+ # Colorize the entire +code+ and returns colored token stream.
+ def colorize(code)
+ result = Prism.parse_lex(code)
+ program_node, unordered_tokens = result.value
+ prism_tokens = unordered_tokens.map(&:first).sort_by! { |token| token.location.start_offset }
+ partial_colorize(code, program_node, prism_tokens, 0, code.bytesize)
+ end
+
+ # Colorize partial +node+ in +whole_code+ and returns colored token stream.
+ def partial_colorize(whole_code, node, prism_tokens, start_offset = nil, end_offset = nil)
+ start_offset ||= node.location.start_offset
+ end_offset ||= node.location.end_offset
+ visitor = NodeColorizeVisitor.new
+ node.accept(visitor)
+ prior_tokens = visitor.tokens.sort_by {|_, start_offset, _| start_offset }
+ normal_tokens = normal_tokens(slice_by_location(prism_tokens, start_offset, end_offset))
+ colored_tokens = unify_tokens(whole_code, prior_tokens, normal_tokens, start_offset, end_offset)
+ colored_tokens.unshift(ColoredToken.new(:plain, ' ' * node.location.start_column)) if node.location.start_column > 0
+ colored_tokens
+ end
+
+ private
+
+ def slice_by_location(items, start_offset, end_offset)
+ start_index = items.bsearch_index { |item| item.location.end_offset > start_offset } || items.size
+ end_index = items.bsearch_index { |item| item.location.start_offset >= end_offset } || items.size
+ items[start_index...end_index]
+ end
+
+ # Unify prior tokens and normal tokens into a single token stream.
+ # Prior tokens have higher priority than normal tokens.
+ # Also adds missing text (spaces, newlines, etc.) as :plain tokens
+ # so that the entire range is covered.
+ def unify_tokens(whole_code, prior_tokens, normal_tokens, start_offset, end_offset)
+ tokens = []
+ offset = start_offset
+
+ # Add missing text such as spaces and newlines as a separate :plain token
+ flush = -> next_offset {
+ return if offset == next_offset
+
+ whole_code.byteslice(offset...next_offset).scan(/\n|\s+|[^\s]+/) do |text|
+ tokens << ColoredToken.new(:plain, text)
+ end
+ }
+
+ until prior_tokens.empty? && normal_tokens.empty?
+ ptok = prior_tokens.first
+ ntok = normal_tokens.first
+ if ntok && (!ptok || ntok[2] <= ptok[1])
+ token = normal_tokens.shift
+ else
+ token = prior_tokens.shift
+ end
+ kind, start_pos, end_pos = token
+ next if start_pos < offset
+
+ flush.call(start_pos)
+ tokens << ColoredToken.new(kind, whole_code.byteslice(start_pos...end_pos))
+ offset = end_pos
+ end
+ flush.call(end_offset)
+ tokens
+ end
+
+ # Convert normal Prism tokens to [kind, start_offset, end_offset]
+ def normal_tokens(tokens)
+ tokens.map do |token,|
+ kind =
+ if token.type.start_with?('KEYWORD_')
+ :keyword
+ elsif OP_TOKENS.include?(token.type.to_sym)
+ :operator
+ else
+ TOKEN_TYPE_MAP[token.type] || :plain
+ end
+ [kind, token.location.start_offset, token.location.end_offset]
+ end
+ end
+ end
+
+ # Visitor to determine node colorizing which can't be determined by tokens.
+ # STRING_CONTENT/EMBEXPR_BEGIN/EMBEXPR_END in string/regexp/symbol have different colorizing
+ class NodeColorizeVisitor < Prism::Visitor # :nodoc:
+ attr_reader :tokens
+
+ def initialize
+ @tokens = []
+ end
+
+ def visit_symbol_node(node)
+ # SymbolNode#location may contain heredoc content and closing
+ # e.g., `<; end`
+ push_location(:identifier, node.name_loc)
+ super
+ end
+
+ private
+
+ def push_location(kind, location)
+ # Only push tokens that have a non-zero length
+ if location && location.start_offset < location.end_offset
+ @tokens << [kind, location.start_offset, location.end_offset]
+ end
+ end
+
+ def handle_interpolated_parts(kind, parts)
+ # StringNode, EmbeddedStatementsNode brackets, and EmbeddedVariableNode hash in
+ # interpolated regexp/symbol/string parts should be colored as regexp/symbol/string respectively.
+ parts.each do |part|
+ case part
+ when Prism::StringNode
+ # InterpolatedStringNode#parts may have its own opening/closing. e.g., `'a' "b"`
+ push_location(kind, part.opening_loc)
+ push_location(kind, part.content_loc)
+ push_location(kind, part.closing_loc)
+ when Prism::InterpolatedStringNode
+ # InterpolatedStringNode#parts may contain InterpolatedStringNode. e.g., `'a' "#{}"`
+ part.accept(self)
+ when Prism::EmbeddedStatementsNode
+ push_location(kind, part.opening_loc)
+ push_location(kind, part.closing_loc)
+ part.accept(self)
+ when Prism::EmbeddedVariableNode
+ push_location(kind, part.operator_loc)
+ end
+ end
+ end
+ end
+
+ private_constant :NodeColorizeVisitor
+end
diff --git a/lib/rdoc/token_stream.rb b/lib/rdoc/token_stream.rb
index cc89397c60..71bd4a7078 100644
--- a/lib/rdoc/token_stream.rb
+++ b/lib/rdoc/token_stream.rb
@@ -19,28 +19,17 @@ def self.to_html(token_stream)
next unless t
style = case t[:kind]
- when :on_const then 'ruby-constant'
- when :on_kw then 'ruby-keyword'
- when :on_ivar then 'ruby-ivar'
- when :on_cvar then 'ruby-identifier'
- when :on_gvar then 'ruby-identifier'
- when '=' != t[:text] && :on_op
- then 'ruby-operator'
- when :on_tlambda then 'ruby-operator'
- when :on_ident then 'ruby-identifier'
- when :on_label then 'ruby-value'
- when :on_backref, :on_dstring
- then 'ruby-node'
- when :on_comment then 'ruby-comment'
- when :on_embdoc then 'ruby-comment'
- when :on_regexp then 'ruby-regexp'
- when :on_tstring then 'ruby-string'
- when :on_int, :on_float,
- :on_rational, :on_imaginary,
- :on_heredoc,
- :on_symbol, :on_CHAR then 'ruby-value'
- when :on_heredoc_beg, :on_heredoc_end
- then 'ruby-identifier'
+ when :operator then 'ruby-operator'
+ when :keyword then 'ruby-keyword'
+ when :constant then 'ruby-constant'
+ when :ivar then 'ruby-ivar'
+ when :comment then 'ruby-comment'
+ when :value then 'ruby-value'
+ when :string then 'ruby-string'
+ when :symbol then 'ruby-value'
+ when :x_string then 'ruby-string'
+ when :regexp then 'ruby-regexp'
+ when :identifier then 'ruby-identifier'
end
text = t[:text]
diff --git a/test/rdoc/markup/to_html_test.rb b/test/rdoc/markup/to_html_test.rb
index 820c8c2597..aa9ef04aa8 100644
--- a/test/rdoc/markup/to_html_test.rb
+++ b/test/rdoc/markup/to_html_test.rb
@@ -545,17 +545,17 @@ def foo
"'",
"\'\"\`",
"\#",
- "\#{}",
+ "\#{1}",
"#",
- "#{}",
+ "#{1}",
/'"/,
/\'\"/,
/\//,
/\\/,
/\#/,
- /\#{}/,
+ /\#{1}/,
/#/,
- /#{}/
+ /#{1}/
]
end
def bar
@@ -570,22 +570,22 @@ def bar
def foo [ - '\\', - '\'', - "'", - "\'\"\`", - "\#", - "\#{}", - "#", - "#{}", + '\\', + '\'', + "'", + "\'\"\`", + "\#", + "\#{1}", + "#", + "#{1}", /'"/, /\'\"/, /\//, /\\/, /\#/, - /\#{}/, + /\#{1}/, /#/, - /#{}/ + /#{1}/ ] end def bar @@ -603,9 +603,9 @@ def foo `\\`, `\'\"\``, `\#`, - `\#{}`, + `\#{1}`, `#`, - `#{}` + `#{1}` ] end def bar @@ -620,12 +620,12 @@ def bardef foo [ - `\\`, - `\'\"\``, - `\#`, - `\#{}`, - `#`, - `#{}` + `\\`, + `\'\"\``, + `\#`, + `\#{1}`, + `#`, + `#{1}` ] end def bar diff --git a/test/rdoc/parser/ruby_colorizer_test.rb b/test/rdoc/parser/ruby_colorizer_test.rb new file mode 100644 index 0000000000..b3fd1b71de --- /dev/null +++ b/test/rdoc/parser/ruby_colorizer_test.rb @@ -0,0 +1,202 @@ +# frozen_string_literal: true +require_relative '../helper' +require 'rdoc/parser/ruby_colorizer' + +class RDocParserRubyColorizerTest < RDoc::TestCase + def token(kind, text) + RDoc::Parser::RubyColorizer::ColoredToken.new(kind, text) + end + + def test_partial_colorize + code = <<~RUBY + class A + def m + # comment + 42 + end + end + RUBY + parse_result = Prism.parse_lex(code) + program_node, unordered_tokens = parse_result.value + prism_tokens = unordered_tokens.map(&:first).sort_by! { |token| token.location.start_offset } + def_node = program_node.statements.body[0].body.body[0] + tokens = RDoc::Parser::RubyColorizer.partial_colorize(code, def_node, prism_tokens) + expected = [' ', 'def', ' ', 'm', "\n", ' ', "# comment\n", ' ', '42', "\n", ' ', 'end'] + assert_equal(expected, tokens.map(&:text)) + end + + def test_comment + code = <<~RUBY + # comment1 + class A + =begin + comment2 + =end + def m + 42 # comment3 + end + end + RUBY + tokens = RDoc::Parser::RubyColorizer.colorize(code) + assert_equal(code, tokens.map(&:text).join) + assert_include(tokens, token(:comment, "# comment1\n")) + assert_include(tokens, token(:comment, "=begin\n")) + assert_include(tokens, token(:comment, "comment2\n")) + assert_include(tokens, token(:comment, "=end\n")) + assert_include(tokens, token(:comment, "# comment3\n")) + end + + def test_interpolated_node + code = <<~'RUBY' + def m + "string#{interpolation1}example#@embvar" + /regexp#{interpolation2}example#$embvar/ + `xstring#{interpolation3}example#@embvar` + :"symbol#{interpolation4}example#$embvar" + end + RUBY + tokens = RDoc::Parser::RubyColorizer.colorize(code) + assert_equal(code, tokens.map(&:text).join) + + assert_include(tokens, token(:string, '"')) + assert_include(tokens, token(:string, 'string')) + assert_include(tokens, token(:string, '#{')) + assert_include(tokens, token(:identifier, 'interpolation1')) + assert_include(tokens, token(:string, '}')) + assert_include(tokens, token(:string, 'example')) + assert_include(tokens, token(:string, '#')) + + assert_include(tokens, token(:regexp, '/')) + assert_include(tokens, token(:regexp, 'regexp')) + assert_include(tokens, token(:regexp, '#{')) + assert_include(tokens, token(:identifier, 'interpolation2')) + assert_include(tokens, token(:regexp, '}')) + assert_include(tokens, token(:regexp, 'example')) + assert_include(tokens, token(:regexp, '#')) + + assert_include(tokens, token(:x_string, '`')) + assert_include(tokens, token(:x_string, 'xstring')) + assert_include(tokens, token(:x_string, '#{')) + assert_include(tokens, token(:identifier, 'interpolation3')) + assert_include(tokens, token(:x_string, '}')) + assert_include(tokens, token(:x_string, 'example')) + assert_include(tokens, token(:x_string, '#')) + + assert_include(tokens, token(:symbol, ':"')) + assert_include(tokens, token(:symbol, 'symbol')) + assert_include(tokens, token(:symbol, '#{')) + assert_include(tokens, token(:identifier, 'interpolation4')) + assert_include(tokens, token(:symbol, '}')) + assert_include(tokens, token(:symbol, 'example')) + assert_include(tokens, token(:symbol, '#')) + assert_include(tokens, token(:symbol, '"')) + end + + def test_percent_literal_arrays + code = <<~'RUBY' + def m + %w[1 2 3] + %W[one #{two} three] + %i[4 5 6] + %I[four #{five} six] + end + RUBY + tokens = RDoc::Parser::RubyColorizer.colorize(code) + assert_equal(code, tokens.map(&:text).join) + assert_include(tokens, token(:string, '%w[')) + assert_include(tokens, token(:string, '%W[')) + assert_include(tokens, token(:string, ']')) + assert_include(tokens, token(:string, '1')) + assert_include(tokens, token(:string, 'one')) + assert_include(tokens, token(:string, '#{')) + assert_include(tokens, token(:identifier, 'two')) + assert_include(tokens, token(:string, '}')) + assert_include(tokens, token(:symbol, '%i[')) + assert_include(tokens, token(:symbol, '%I[')) + assert_include(tokens, token(:symbol, ']')) + assert_include(tokens, token(:symbol, '4')) + assert_include(tokens, token(:symbol, 'four')) + assert_include(tokens, token(:symbol, '#{')) + assert_include(tokens, token(:identifier, 'five')) + assert_include(tokens, token(:symbol, '}')) + end + + def test_multibyte + code = <<~RUBY + def f(s = '💎') + # comment 💎 + puts '💎' + s + end + RUBY + tokens = RDoc::Parser::RubyColorizer.colorize(code) + assert_equal(code, tokens.map(&:text).join) + end + + def test_string + code = <<~'RUBY' + # string without closing + ?S + # interpolated string node may not have opening/closing + # parts may have opening/closing + %[s3] 's4'\ + "s5#{[?s]}s6" + RUBY + tokens = RDoc::Parser::RubyColorizer.colorize(code) + assert_equal(code, tokens.map(&:text).join) + string_token_texts = tokens.select { |t| t[:kind] == :string }.map(&:text) + expected_string_token_texts = %w[? S %[ s3 ] ' s4 ' " s5 #{ ? s } s6 "] + assert_equal(expected_string_token_texts, string_token_texts) + end + + def test_symbol + code = <<~'RUBY' + # symbol without closing + :sym1 + # symbol with opening/closing + :"sym2" + %s[sym3] + # opening and content has gap + <<~A; :\ + A + sym4 + RUBY + tokens = RDoc::Parser::RubyColorizer.colorize(code) + assert_equal(code, tokens.map(&:text).join) + symbol_token_texts = tokens.select { |t| t[:kind] == :symbol }.map(&:text) + expected_symbol_token_texts = %w[: sym1 :" sym2 " %s[ sym3 ] : sym4] + assert_equal(expected_symbol_token_texts, symbol_token_texts) + end + + def test_heredoc + code = <<~'RUBY' + def f + str1 = <<~AA + single-line-heredoc + AA + str2 = <<~`BB` # comment + x-string-heredoc + BB + str3 = <<~CC.itself + multi-line + #{embed} + heredoc + CC + end + RUBY + tokens = RDoc::Parser::RubyColorizer.colorize(code) + assert_equal(code, tokens.map(&:text).join) + assert_include(tokens, token(:string, '<<~AA')) + assert_include(tokens, token(:x_string, '<<~`BB`')) + assert_include(tokens, token(:string, '<<~CC')) + assert_include(tokens, token(:string, " AA\n")) + assert_include(tokens, token(:x_string, " BB\n")) + assert_include(tokens, token(:string, " CC\n")) + assert_include(tokens, token(:string, " single-line-heredoc\n")) + assert_include(tokens, token(:x_string, " x-string-heredoc\n")) + assert_include(tokens, token(:string, " multi-line\n")) + assert_include(tokens, token(:string, '#{')) + assert_include(tokens, token(:identifier, 'embed')) + assert_include(tokens, token(:string, '}')) + assert_include(tokens, token(:string, " heredoc\n")) + end +end diff --git a/test/rdoc/parser/ruby_test.rb b/test/rdoc/parser/ruby_test.rb index b3cef3a341..a5c79071ba 100644 --- a/test/rdoc/parser/ruby_test.rb +++ b/test/rdoc/parser/ruby_test.rb @@ -2406,6 +2406,23 @@ def test_read_directive_linear_performance end end + def test_code_object_token_stream + util_parser <<~RUBY + class Foo + def foo + 42 + end + + private def bar + baz + end + end + RUBY + + foo, bar = @top_level.classes.first.method_list + assert_equal([' ', 'def', ' ', 'foo', "\n", ' ', '42', "\n", ' ', 'end'], foo.token_stream.map(&:text)) + assert_equal([' ', 'def', ' ', 'bar', "\n", ' ', 'baz', "\n", ' ', 'end'], bar.token_stream.map(&:text)) + end def test_markup_first_comment util_parser <<~RUBY diff --git a/test/rdoc/rdoc_token_stream_test.rb b/test/rdoc/rdoc_token_stream_test.rb index ed5e124cc6..254811c72a 100644 --- a/test/rdoc/rdoc_token_stream_test.rb +++ b/test/rdoc/rdoc_token_stream_test.rb @@ -5,17 +5,18 @@ class RDocTokenStreamTest < RDoc::TestCase def test_class_to_html tokens = [ - { :line_no => 0, :char_no => 0, :kind => :on_const, :text => 'CONSTANT' }, - { :line_no => 0, :char_no => 0, :kind => :on_kw, :text => 'KW' }, - { :line_no => 0, :char_no => 0, :kind => :on_ivar, :text => 'IVAR' }, - { :line_no => 0, :char_no => 0, :kind => :on_op, :text => 'Op' }, - { :line_no => 0, :char_no => 0, :kind => :on_ident, :text => 'Id' }, - { :line_no => 0, :char_no => 0, :kind => :on_backref, :text => 'Node' }, - { :line_no => 0, :char_no => 0, :kind => :on_comment, :text => 'COMMENT' }, - { :line_no => 0, :char_no => 0, :kind => :on_regexp, :text => 'REGEXP' }, - { :line_no => 0, :char_no => 0, :kind => :on_tstring, :text => 'STRING' }, - { :line_no => 0, :char_no => 0, :kind => :on_int, :text => 'Val' }, - { :line_no => 0, :char_no => 0, :kind => :on_unknown, :text => '\\' } + { kind: :constant, text: 'CONSTANT' }, + { kind: :keyword, text: 'KW' }, + { kind: :ivar, text: 'IVAR' }, + { kind: :operator, text: 'Op' }, + { kind: :identifier, text: 'Id' }, + { kind: :symbol, text: 'Symbol' }, + { kind: :x_string, text: 'XString' }, + { kind: :comment, text: 'COMMENT' }, + { kind: :regexp, text: 'REGEXP' }, + { kind: :string, text: 'STRING' }, + { kind: :value, text: 'Val' }, + { kind: :plain, text: '\\' } ] expected = [ @@ -24,7 +25,8 @@ def test_class_to_html 'IVAR', 'Op', 'Id', - 'Node', + 'Symbol', + 'XString', 'COMMENT', 'REGEXP', 'STRING',