From 78ea670d1bf97aa1a6ed3d3efd736c9f75d6e6cc Mon Sep 17 00:00:00 2001
From: tompng <tomoyapenguin@gmail.com>
Date: Wed, 26 Nov 2025 01:55:35 +0900
Subject: [PATCH] Syntax highlighting using Prism.parse_lex

Completely drops Ripper dependency
---
 lib/rdoc/generator/markup.rb                  |   3 -
 .../generator/template/aliki/css/rdoc.css     |   2 -
 .../generator/template/darkfish/css/rdoc.css  |   1 -
 lib/rdoc/markup/to_html.rb                    |  34 +-
 lib/rdoc/parser/ripper_state_lex.rb           | 302 ------------------
 lib/rdoc/parser/ruby.rb                       |  33 +-
 lib/rdoc/parser/ruby_colorizer.rb             | 249 +++++++++++++++
 lib/rdoc/token_stream.rb                      |  33 +-
 test/rdoc/markup/to_html_test.rb              |  44 +--
 test/rdoc/parser/ruby_colorizer_test.rb       | 202 ++++++++++++
 test/rdoc/parser/ruby_test.rb                 |  17 +
 test/rdoc/rdoc_token_stream_test.rb           |  26 +-
 12 files changed, 544 insertions(+), 402 deletions(-)
 delete mode 100644 lib/rdoc/parser/ripper_state_lex.rb
 create mode 100644 lib/rdoc/parser/ruby_colorizer.rb
 create mode 100644 test/rdoc/parser/ruby_colorizer_test.rb
diff --git a/lib/rdoc/generator/markup.rb b/lib/rdoc/generator/markup.rb
index 54158c29ba..5bc0c5849a 100644
--- a/lib/rdoc/generator/markup.rb
+++ b/lib/rdoc/generator/markup.rb
@@ -125,9 +125,6 @@ def markup_code
 
     src = RDoc::TokenStream.to_html @token_stream
 
-    # add initial whitespace so that the indent gets calculated correctly
-    src.prepend(' ' * @token_stream.first[:char_no]) if source_language == 'ruby' && @token_stream.first
-
     # dedent the source
     common_indent = src.length
     src.scan(/^ *(?=\S)/) do |whitespace|
diff --git a/lib/rdoc/generator/template/aliki/css/rdoc.css b/lib/rdoc/generator/template/aliki/css/rdoc.css
index e3e0aec650..50e810c6ec 100644
--- a/lib/rdoc/generator/template/aliki/css/rdoc.css
+++ b/lib/rdoc/generator/template/aliki/css/rdoc.css
@@ -1021,7 +1021,6 @@ main h6 a:hover {
 .ruby-ivar       { color: var(--code-orange); }
 .ruby-operator   { color: var(--code-green); }
 .ruby-identifier { color: var(--code-blue); }
-.ruby-node       { color: var(--code-purple); }
 
 .ruby-comment {
   color: var(--color-neutral-500);
@@ -1037,7 +1036,6 @@ main h6 a:hover {
 [data-theme="dark"] .ruby-ivar       { color: var(--code-orange); }
 [data-theme="dark"] .ruby-operator   { color: var(--code-green); }
 [data-theme="dark"] .ruby-identifier { color: var(--code-blue); }
-[data-theme="dark"] .ruby-node       { color: var(--code-purple); }
 
 [data-theme="dark"] .ruby-comment {
   color: var(--color-neutral-400);
diff --git a/lib/rdoc/generator/template/darkfish/css/rdoc.css b/lib/rdoc/generator/template/darkfish/css/rdoc.css
index 87bf24c36e..6f1163affa 100644
--- a/lib/rdoc/generator/template/darkfish/css/rdoc.css
+++ b/lib/rdoc/generator/template/darkfish/css/rdoc.css
@@ -449,7 +449,6 @@ main h6 {
 .ruby-ivar       { color: #B57614; }  /* Brown */
 .ruby-operator   { color: #427B58; }  /* Dark Teal */
 .ruby-identifier { color: #076678; }  /* Deep Teal */
-.ruby-node       { color: #8F3F71; }  /* Plum */
 .ruby-comment    { color: #928374; font-style: italic; }  /* Gray */
 .ruby-regexp     { color: #8F3F71; }  /* Plum */
 .ruby-value      { color: #AF3A03; }  /* Dark Orange */
diff --git a/lib/rdoc/markup/to_html.rb b/lib/rdoc/markup/to_html.rb
index 0da0081e70..9499118120 100644
--- a/lib/rdoc/markup/to_html.rb
+++ b/lib/rdoc/markup/to_html.rb
@@ -2,6 +2,7 @@
 require 'cgi/escape'
 require 'cgi/util' unless defined?(CGI::EscapeExt)
 require 'prism'
+require 'rdoc/parser/ruby_colorizer'
 
 ##
 # Outputs RDoc markup as HTML.
@@ -321,6 +322,15 @@ def accept_paragraph(paragraph)
     @res << "</p>\n"
   end
 
+  # Generate syntax highlighted html for ruby-like text.
+
+  def parsable_text_to_html(text)
+    tokens = RDoc::Parser::RubyColorizer.colorize(text)
+    result = RDoc::TokenStream.to_html tokens
+    result = result + "\n" unless result.end_with?("\n")
+    result
+  end
+
   ##
   # Adds +verbatim+ to the output
 
@@ -328,27 +338,17 @@ def accept_verbatim(verbatim)
     text = verbatim.text.rstrip
     format = verbatim.format
 
-    klass = nil
-
     # Apply Ruby syntax highlighting if
     # - explicitly marked as Ruby (via ruby? which accepts :ruby or :rb)
     # - no format specified but the text is parseable as Ruby
     # Otherwise, add language class when applicable and skip Ruby highlighting
-    content = if verbatim.ruby? || (format.nil? && parseable?(text))
-                begin
-                  tokens = RDoc::Parser::RipperStateLex.parse text
-                  klass  = ' class="ruby"'
-
-                  result = RDoc::TokenStream.to_html tokens
-                  result = result + "\n" unless "\n" == result[-1]
-                  result
-                rescue
-                  CGI.escapeHTML text
-                end
-              else
-                klass = " class=\"#{format}\"" if format
-                CGI.escapeHTML text
-              end
+    if verbatim.ruby? || (format.nil? && parseable?(text))
+      content = parsable_text_to_html(text)
+      klass = ' class="ruby"'
+    else
+      content = CGI.escapeHTML text
+      klass = " class=\"#{format}\"" if format
+    end
 
     if @pipe
       @res << "\n<pre><code>#{CGI.escapeHTML text}\n</code></pre>\n"
diff --git a/lib/rdoc/parser/ripper_state_lex.rb b/lib/rdoc/parser/ripper_state_lex.rb
deleted file mode 100644
index 2212906bbd..0000000000
--- a/lib/rdoc/parser/ripper_state_lex.rb
+++ /dev/null
@@ -1,302 +0,0 @@
-# frozen_string_literal: true
-require 'ripper'
-
-##
-# Wrapper for Ripper lex states
-
-class RDoc::Parser::RipperStateLex
-  # :stopdoc:
-
-  Token = Struct.new(:line_no, :char_no, :kind, :text, :state)
-
-  EXPR_END   = Ripper::EXPR_END
-  EXPR_ENDFN = Ripper::EXPR_ENDFN
-  EXPR_ARG   = Ripper::EXPR_ARG
-  EXPR_FNAME = Ripper::EXPR_FNAME
-
-  class InnerStateLex < Ripper::Filter
-    def initialize(code)
-      super(code)
-    end
-
-    def on_default(event, tok, data)
-      data << Token.new(lineno, column, event, tok, state)
-    end
-  end
-
-  def get_squashed_tk
-    if @buf.empty?
-      tk = @tokens.shift
-    else
-      tk = @buf.shift
-    end
-    return nil if tk.nil?
-    case tk[:kind]
-    when :on_symbeg then
-      tk = get_symbol_tk(tk)
-    when :on_tstring_beg then
-      tk = get_string_tk(tk)
-    when :on_backtick then
-      if (tk[:state] & (EXPR_FNAME | EXPR_ENDFN)) != 0
-        tk[:kind] = :on_ident
-        tk[:state] = Ripper::Lexer::State.new(EXPR_ARG)
-      else
-        tk = get_string_tk(tk)
-      end
-    when :on_regexp_beg then
-      tk = get_regexp_tk(tk)
-    when :on_embdoc_beg then
-      tk = get_embdoc_tk(tk)
-    when :on_heredoc_beg then
-      @heredoc_queue << retrieve_heredoc_info(tk)
-    when :on_nl, :on_ignored_nl, :on_comment, :on_heredoc_end then
-      if !@heredoc_queue.empty?
-        get_heredoc_tk(*@heredoc_queue.shift)
-      elsif tk[:text].nil? # :on_ignored_nl sometimes gives nil
-        tk[:text] = ''
-      end
-    when :on_words_beg then
-      tk = get_words_tk(tk)
-    when :on_qwords_beg then
-      tk = get_words_tk(tk)
-    when :on_symbols_beg then
-      tk = get_words_tk(tk)
-    when :on_qsymbols_beg then
-      tk = get_words_tk(tk)
-    when :on_op then
-      if '&.' == tk[:text]
-        tk[:kind] = :on_period
-      else
-        tk = get_op_tk(tk)
-      end
-    end
-    tk
-  end
-
-  private def get_symbol_tk(tk)
-    is_symbol = true
-    symbol_tk = Token.new(tk.line_no, tk.char_no, :on_symbol)
-    if ":'" == tk[:text] or ':"' == tk[:text] or tk[:text].start_with?('%s')
-      tk1 = get_string_tk(tk)
-      symbol_tk[:text] = tk1[:text]
-      symbol_tk[:state] = tk1[:state]
-    else
-      case (tk1 = get_squashed_tk)[:kind]
-      when :on_ident
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = tk1[:state]
-      when :on_tstring_content
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = get_squashed_tk[:state] # skip :on_tstring_end
-      when :on_tstring_end
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = tk1[:state]
-      when :on_op
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = tk1[:state]
-      when :on_ivar
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = tk1[:state]
-      when :on_cvar
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = tk1[:state]
-      when :on_gvar
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = tk1[:state]
-      when :on_const
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = tk1[:state]
-      when :on_kw
-        symbol_tk[:text] = ":#{tk1[:text]}"
-        symbol_tk[:state] = tk1[:state]
-      else
-        is_symbol = false
-        tk = tk1
-      end
-    end
-    if is_symbol
-      tk = symbol_tk
-    end
-    tk
-  end
-
-  private def get_string_tk(tk)
-    string = tk[:text]
-    state = nil
-    kind = :on_tstring
-    loop do
-      inner_str_tk = get_squashed_tk
-      if inner_str_tk.nil?
-        break
-      elsif :on_tstring_end == inner_str_tk[:kind]
-        string = string + inner_str_tk[:text]
-        state = inner_str_tk[:state]
-        break
-      elsif :on_label_end == inner_str_tk[:kind]
-        string = string + inner_str_tk[:text]
-        state = inner_str_tk[:state]
-        kind = :on_symbol
-        break
-      else
-        string = string + inner_str_tk[:text]
-        if :on_embexpr_beg == inner_str_tk[:kind] then
-          kind = :on_dstring if :on_tstring == kind
-        end
-      end
-    end
-    Token.new(tk.line_no, tk.char_no, kind, string, state)
-  end
-
-  private def get_regexp_tk(tk)
-    string = tk[:text]
-    state = nil
-    loop do
-      inner_str_tk = get_squashed_tk
-      if inner_str_tk.nil?
-        break
-      elsif :on_regexp_end == inner_str_tk[:kind]
-        string = string + inner_str_tk[:text]
-        state = inner_str_tk[:state]
-        break
-      else
-        string = string + inner_str_tk[:text]
-      end
-    end
-    Token.new(tk.line_no, tk.char_no, :on_regexp, string, state)
-  end
-
-  private def get_embdoc_tk(tk)
-    string = tk[:text]
-    until :on_embdoc_end == (embdoc_tk = get_squashed_tk)[:kind] do
-      string = string + embdoc_tk[:text]
-    end
-    string = string + embdoc_tk[:text]
-    Token.new(tk.line_no, tk.char_no, :on_embdoc, string, embdoc_tk.state)
-  end
-
-  private def get_heredoc_tk(heredoc_name, indent)
-    string = ''
-    start_tk = nil
-    prev_tk = nil
-    until heredoc_end?(heredoc_name, indent, tk = @tokens.shift) do
-      start_tk = tk unless start_tk
-      if (prev_tk.nil? or "\n" == prev_tk[:text][-1]) and 0 != tk[:char_no]
-        string = string + (' ' * tk[:char_no])
-      end
-      string = string + tk[:text]
-      prev_tk = tk
-    end
-    start_tk = tk unless start_tk
-    prev_tk = tk unless prev_tk
-    @buf.unshift tk # closing heredoc
-    heredoc_tk = Token.new(start_tk.line_no, start_tk.char_no, :on_heredoc, string, prev_tk.state)
-    @buf.unshift heredoc_tk
-  end
-
-  private def retrieve_heredoc_info(tk)
-    name = tk[:text].gsub(/\A<<[-~]?(['"`]?)(.+)\1\z/, '\2')
-    indent = tk[:text] =~ /\A<<[-~]/
-    [name, indent]
-  end
-
-  private def heredoc_end?(name, indent, tk)
-    result = false
-    if :on_heredoc_end == tk[:kind] then
-      tk_name = tk[:text].chomp
-      tk_name.lstrip! if indent
-      if name == tk_name
-        result = true
-      end
-    end
-    result
-  end
-
-  private def get_words_tk(tk)
-    string = ''
-    start_token = tk[:text]
-    start_quote = tk[:text].rstrip[-1]
-    line_no = tk[:line_no]
-    char_no = tk[:char_no]
-    state = tk[:state]
-    end_quote =
-      case start_quote
-      when ?( then ?)
-      when ?[ then ?]
-      when ?{ then ?}
-      when ?< then ?>
-      else start_quote
-      end
-    end_token = nil
-    loop do
-      tk = get_squashed_tk
-      if tk.nil?
-        end_token = end_quote
-        break
-      elsif :on_tstring_content == tk[:kind] then
-        string += tk[:text]
-      elsif :on_words_sep == tk[:kind] or :on_tstring_end == tk[:kind] then
-        if end_quote == tk[:text].strip then
-          end_token = tk[:text]
-          break
-        else
-          string += tk[:text]
-        end
-      else
-        string += tk[:text]
-      end
-    end
-    text = "#{start_token}#{string}#{end_token}"
-    Token.new(line_no, char_no, :on_dstring, text, state)
-  end
-
-  private def get_op_tk(tk)
-    redefinable_operators = %w[! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~]
-    if redefinable_operators.include?(tk[:text]) and tk[:state] == EXPR_ARG then
-      tk[:state] = Ripper::Lexer::State.new(EXPR_ARG)
-      tk[:kind] = :on_ident
-    elsif tk[:text] =~ /^[-+]$/ then
-      tk_ahead = get_squashed_tk
-      case tk_ahead[:kind]
-      when :on_int, :on_float, :on_rational, :on_imaginary then
-        tk[:text] += tk_ahead[:text]
-        tk[:kind] = tk_ahead[:kind]
-        tk[:state] = tk_ahead[:state]
-      when :on_heredoc_beg, :on_tstring, :on_dstring # frozen/non-frozen string literal
-        tk[:text] += tk_ahead[:text]
-        tk[:kind] = tk_ahead[:kind]
-        tk[:state] = tk_ahead[:state]
-      else
-        @buf.unshift tk_ahead
-      end
-    end
-    tk
-  end
-
-  # :startdoc:
-
-  # New lexer for +code+.
-  def initialize(code)
-    @buf = []
-    @heredoc_queue = []
-    @inner_lex = InnerStateLex.new(code)
-    @tokens = @inner_lex.parse([])
-  end
-
-  # Returns tokens parsed from +code+.
-  def self.parse(code)
-    lex = self.new(code)
-    tokens = []
-    begin
-      while tk = lex.get_squashed_tk
-        tokens.push tk
-      end
-    rescue StopIteration
-    end
-    tokens
-  end
-
-  # Returns +true+ if lex state will be +END+ after +token+.
-  def self.end?(token)
-    (token[:state] & EXPR_END)
-  end
-end
diff --git a/lib/rdoc/parser/ruby.rb b/lib/rdoc/parser/ruby.rb
index 0a0f690bac..0100b04b45 100644
--- a/lib/rdoc/parser/ruby.rb
+++ b/lib/rdoc/parser/ruby.rb
@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 
 require 'prism'
-require_relative 'ripper_state_lex'
+require_relative 'ruby_colorizer'
 
 # Parse and collect document from Ruby source code.
 
@@ -198,10 +198,12 @@ def record_location(container) # :nodoc:
   # Scans this Ruby file for Ruby constructs
 
   def scan
-    @tokens = RDoc::Parser::RipperStateLex.parse(@content)
     @lines = @content.lines
-    result = Prism.parse(@content)
-    @program_node = result.value
+    result = Prism.parse_lex(@content)
+    @program_node, unordered_tokens = result.value
+    # Heredoc tokens are not in start_offset order.
+    # Need to sort them to use bsearch for finding tokens from location.
+    @prism_tokens = unordered_tokens.map(&:first).sort_by { |t| t.location.start_offset }
     @line_nodes = {}
     prepare_line_nodes(@program_node)
     prepare_comments(result.comments)
@@ -314,7 +316,7 @@ def parse_comment_tomdoc(container, comment, line_no, start_line)
 
     meth.start_collecting_tokens(:ruby)
     node = @line_nodes[line_no]
-    tokens = node ? visible_tokens_from_location(node.location) : []
+    tokens = node ? syntax_highlighted_tokens(node) : []
     tokens.each { |token| meth.token_stream << token }
 
     container.add_method meth
@@ -382,7 +384,7 @@ def handle_meta_method_comment(comment, directives, node)
     elsif line_no || node
       method_name ||= call_node_name_arguments(node).first if is_call_node
       if node
-        tokens = visible_tokens_from_location(node.location)
+        tokens = syntax_highlighted_tokens(node)
         line_no = node.location.start_line
       else
         tokens = []
@@ -490,21 +492,10 @@ def extract_section_comment(comment_text, prefix_line_count) # :nodoc:
     comment_text
   end
 
-  def slice_tokens(start_pos, end_pos) # :nodoc:
-    start_index = @tokens.bsearch_index { |t| ([t.line_no, t.char_no] <=> start_pos) >= 0 }
-    end_index = @tokens.bsearch_index { |t| ([t.line_no, t.char_no] <=> end_pos) >= 0 }
-    tokens = @tokens[start_index...end_index]
-    tokens.pop if tokens.last&.kind == :on_nl
-    tokens
-  end
-
-  # Returns tokens from the given location
+  # Returns syntax highlighted tokens of the given node
 
-  def visible_tokens_from_location(location)
-    slice_tokens(
-      [location.start_line, location.start_character_column],
-      [location.end_line, location.end_character_column]
-    )
+  def syntax_highlighted_tokens(node)
+    RDoc::Parser::RubyColorizer.partial_colorize(@content, node, @prism_tokens)
   end
 
   # Handles `public :foo, :bar` `private :foo, :bar` and `protected :foo, :bar`
@@ -1018,7 +1009,7 @@ def visit_def_node(node)
       end
       name = node.name.to_s
       params, block_params, calls_super = MethodSignatureVisitor.scan_signature(node)
-      tokens = @scanner.visible_tokens_from_location(node.location)
+      tokens = @scanner.syntax_highlighted_tokens(node)
 
       @scanner.add_method(
         name,
diff --git a/lib/rdoc/parser/ruby_colorizer.rb b/lib/rdoc/parser/ruby_colorizer.rb
new file mode 100644
index 0000000000..4765e79c3d
--- /dev/null
+++ b/lib/rdoc/parser/ruby_colorizer.rb
@@ -0,0 +1,249 @@
+# frozen_string_literal: true
+
+require 'prism'
+require 'set'
+
+# Ruby code syntax highlighter.
+# Colorize result is an array of +RDoc::Parser::RubyColorizer::ColoredToken+
+# Actual color for each token kind is determined elsewhere (e.g., HTML generator)
+module RDoc::Parser::RubyColorizer
+
+  ColoredToken = Struct.new(:kind, :text)
+
+  # Prism operator token types except assignment '='
+  OP_TOKENS = %i[
+    AMPERSAND AMPERSAND_AMPERSAND
+    BANG BANG_EQUAL BANG_TILDE CARET COLON COLON_COLON
+    EQUAL_EQUAL EQUAL_GREATER EQUAL_TILDE
+    GREATER GREATER_GREATER
+    LESS LESS_EQUAL LESS_EQUAL_GREATER LESS_LESS
+    MINUS MINUS_GREATER PERCENT PIPE PIPE_PIPE PLUS
+    QUESTION_MARK SLASH STAR STAR_STAR TILDE
+    UAMPERSAND UMINUS UPLUS USTAR USTAR_STAR
+  ].to_set
+
+  # Prism token type to ColoredToken kind map
+  TOKEN_TYPE_MAP = {
+    IDENTIFIER: :identifier,
+    METHOD_NAME: :identifier,
+    INSTANCE_VARIABLE: :ivar,
+    CLASS_VARIABLE: :identifier,
+    GLOBAL_VARIABLE: :identifier,
+    BACK_REFERENCE: :identifier,
+    NUMBERED_REFERENCE: :identifier,
+    CONSTANT: :constant,
+    LABEL: :value,
+    INTEGER: :value,
+    FLOAT: :value,
+    RATIONAL: :value,
+    IMAGINARY: :value,
+    COMMENT: :comment,
+    EMBDOC_BEGIN: :comment,
+    EMBDOC_LINE: :comment,
+    EMBDOC_END: :comment
+  }
+
+  class << self
+
+    # Colorize the entire +code+ and returns colored token stream.
+    def colorize(code)
+      result = Prism.parse_lex(code)
+      program_node, unordered_tokens = result.value
+      prism_tokens = unordered_tokens.map(&:first).sort_by! { |token| token.location.start_offset }
+      partial_colorize(code, program_node, prism_tokens, 0, code.bytesize)
+    end
+
+    # Colorize partial +node+ in +whole_code+ and returns colored token stream.
+    def partial_colorize(whole_code, node, prism_tokens, start_offset = nil, end_offset = nil)
+      start_offset ||= node.location.start_offset
+      end_offset ||= node.location.end_offset
+      visitor = NodeColorizeVisitor.new
+      node.accept(visitor)
+      prior_tokens = visitor.tokens.sort_by {|_, start_offset, _| start_offset }
+      normal_tokens = normal_tokens(slice_by_location(prism_tokens, start_offset, end_offset))
+      colored_tokens = unify_tokens(whole_code, prior_tokens, normal_tokens, start_offset, end_offset)
+      colored_tokens.unshift(ColoredToken.new(:plain, ' ' * node.location.start_column)) if node.location.start_column > 0
+      colored_tokens
+    end
+
+    private
+
+    def slice_by_location(items, start_offset, end_offset)
+      start_index = items.bsearch_index { |item| item.location.end_offset > start_offset } || items.size
+      end_index = items.bsearch_index { |item| item.location.start_offset >= end_offset } || items.size
+      items[start_index...end_index]
+    end
+
+    # Unify prior tokens and normal tokens into a single token stream.
+    # Prior tokens have higher priority than normal tokens.
+    # Also adds missing text (spaces, newlines, etc.) as :plain tokens
+    # so that the entire range is covered.
+    def unify_tokens(whole_code, prior_tokens, normal_tokens, start_offset, end_offset)
+      tokens = []
+      offset = start_offset
+
+      # Add missing text such as spaces and newlines as a separate :plain token
+      flush = -> next_offset {
+        return if offset == next_offset
+
+        whole_code.byteslice(offset...next_offset).scan(/\n|\s+|[^\s]+/) do |text|
+          tokens << ColoredToken.new(:plain, text)
+        end
+      }
+
+      until prior_tokens.empty? && normal_tokens.empty?
+        ptok = prior_tokens.first
+        ntok = normal_tokens.first
+        if ntok && (!ptok || ntok[2] <= ptok[1])
+          token = normal_tokens.shift
+        else
+          token = prior_tokens.shift
+        end
+        kind, start_pos, end_pos = token
+        next if start_pos < offset
+
+        flush.call(start_pos)
+        tokens << ColoredToken.new(kind, whole_code.byteslice(start_pos...end_pos))
+        offset = end_pos
+      end
+      flush.call(end_offset)
+      tokens
+    end
+
+    # Convert normal Prism tokens to [kind, start_offset, end_offset]
+    def normal_tokens(tokens)
+      tokens.map do |token,|
+        kind =
+          if token.type.start_with?('KEYWORD_')
+            :keyword
+          elsif OP_TOKENS.include?(token.type.to_sym)
+            :operator
+          else
+            TOKEN_TYPE_MAP[token.type] || :plain
+          end
+        [kind, token.location.start_offset, token.location.end_offset]
+      end
+    end
+  end
+
+  # Visitor to determine node colorizing which can't be determined by tokens.
+  # STRING_CONTENT/EMBEXPR_BEGIN/EMBEXPR_END in string/regexp/symbol have different colorizing
+  class NodeColorizeVisitor < Prism::Visitor # :nodoc:
+    attr_reader :tokens
+
+    def initialize
+      @tokens = []
+    end
+
+    def visit_symbol_node(node)
+      # SymbolNode#location may contain heredoc content and closing
+      # e.g., `<<A; :\\\nA\nsymbol`
+      # So we need to colorize opening, content and closing separately.
+      push_location(:symbol, node.opening_loc)
+      push_location(:symbol, node.value_loc)
+      push_location(:symbol, node.closing_loc)
+    end
+
+    def visit_interpolated_symbol_node(node)
+      push_location(:symbol, node.opening_loc)
+      handle_interpolated_parts(:symbol, node.parts)
+      push_location(:symbol, node.closing_loc)
+    end
+
+    def visit_regular_expression_node(node)
+      push_location(:regexp, node.location)
+    end
+
+    def visit_interpolated_regular_expression_node(node)
+      push_location(:regexp, node.opening_loc)
+      handle_interpolated_parts(:regexp, node.parts)
+      push_location(:regexp, node.closing_loc)
+    end
+
+    alias visit_match_last_line_node visit_regular_expression_node
+    alias visit_interpolated_match_last_line_node visit_interpolated_regular_expression_node
+
+    def visit_string_node(node)
+      # Node's location may not cover the entire string literal.
+      # For example, in a heredoc string, the node's location covers only the heredoc opening.
+      # We need to colorize opening, content and closing separately.
+      push_location(:string, node.opening_loc)
+      push_location(:string, node.content_loc)
+      push_location(:string, node.closing_loc)
+    end
+
+    def visit_interpolated_string_node(node)
+      push_location(:string, node.opening_loc)
+      handle_interpolated_parts(:string, node.parts)
+      push_location(:string, node.closing_loc)
+    end
+
+    def visit_x_string_node(node)
+      # Same as visit_string_node, node.location of <<`X` only covers opening,
+      # so we need to colorize opening, content and closing separately.
+      push_location(:x_string, node.opening_loc)
+      push_location(:x_string, node.content_loc)
+      push_location(:x_string, node.closing_loc)
+    end
+
+    def visit_interpolated_x_string_node(node)
+      push_location(:x_string, node.opening_loc)
+      handle_interpolated_parts(:x_string, node.parts)
+      push_location(:x_string, node.closing_loc)
+    end
+
+    def visit_array_node(node)
+      super
+      # Colorize %w[...] array literal like string literals, and %i[...] like symbol literals
+      case node.opening
+      when /\A%[wW].\z/
+        push_location(:string, node.opening_loc)
+        push_location(:string, node.closing_loc)
+      when /\A%[iI].\z/
+        push_location(:symbol, node.opening_loc)
+        push_location(:symbol, node.closing_loc)
+      end
+    end
+
+    def visit_def_node(node)
+      # For special colorizing of method name in def node
+      # e.g., `def <=>; end`
+      push_location(:identifier, node.name_loc)
+      super
+    end
+
+    private
+
+    def push_location(kind, location)
+      # Only push tokens that have a non-zero length
+      if location && location.start_offset < location.end_offset
+        @tokens << [kind, location.start_offset, location.end_offset]
+      end
+    end
+
+    def handle_interpolated_parts(kind, parts)
+      # StringNode, EmbeddedStatementsNode brackets, and EmbeddedVariableNode hash in
+      # interpolated regexp/symbol/string parts should be colored as regexp/symbol/string respectively.
+      parts.each do |part|
+        case part
+        when Prism::StringNode
+          # InterpolatedStringNode#parts may have its own opening/closing. e.g., `'a' "b"`
+          push_location(kind, part.opening_loc)
+          push_location(kind, part.content_loc)
+          push_location(kind, part.closing_loc)
+        when Prism::InterpolatedStringNode
+          # InterpolatedStringNode#parts may contain InterpolatedStringNode. e.g., `'a' "#{}"`
+          part.accept(self)
+        when Prism::EmbeddedStatementsNode
+          push_location(kind, part.opening_loc)
+          push_location(kind, part.closing_loc)
+          part.accept(self)
+        when Prism::EmbeddedVariableNode
+          push_location(kind, part.operator_loc)
+        end
+      end
+    end
+  end
+
+  private_constant :NodeColorizeVisitor
+end
diff --git a/lib/rdoc/token_stream.rb b/lib/rdoc/token_stream.rb
index cc89397c60..71bd4a7078 100644
--- a/lib/rdoc/token_stream.rb
+++ b/lib/rdoc/token_stream.rb
@@ -19,28 +19,17 @@ def self.to_html(token_stream)
       next unless t
 
       style = case t[:kind]
-              when :on_const   then 'ruby-constant'
-              when :on_kw      then 'ruby-keyword'
-              when :on_ivar    then 'ruby-ivar'
-              when :on_cvar    then 'ruby-identifier'
-              when :on_gvar    then 'ruby-identifier'
-              when '=' != t[:text] && :on_op
-                               then 'ruby-operator'
-              when :on_tlambda then 'ruby-operator'
-              when :on_ident   then 'ruby-identifier'
-              when :on_label   then 'ruby-value'
-              when :on_backref, :on_dstring
-                               then 'ruby-node'
-              when :on_comment then 'ruby-comment'
-              when :on_embdoc  then 'ruby-comment'
-              when :on_regexp  then 'ruby-regexp'
-              when :on_tstring then 'ruby-string'
-              when :on_int, :on_float,
-                   :on_rational, :on_imaginary,
-                   :on_heredoc,
-                   :on_symbol, :on_CHAR then 'ruby-value'
-              when :on_heredoc_beg, :on_heredoc_end
-                               then 'ruby-identifier'
+              when :operator   then 'ruby-operator'
+              when :keyword    then 'ruby-keyword'
+              when :constant   then 'ruby-constant'
+              when :ivar       then 'ruby-ivar'
+              when :comment    then 'ruby-comment'
+              when :value      then 'ruby-value'
+              when :string     then 'ruby-string'
+              when :symbol     then 'ruby-value'
+              when :x_string   then 'ruby-string'
+              when :regexp     then 'ruby-regexp'
+              when :identifier then 'ruby-identifier'
               end
 
       text = t[:text]
diff --git a/test/rdoc/markup/to_html_test.rb b/test/rdoc/markup/to_html_test.rb
index 820c8c2597..aa9ef04aa8 100644
--- a/test/rdoc/markup/to_html_test.rb
+++ b/test/rdoc/markup/to_html_test.rb
@@ -545,17 +545,17 @@ def foo
     "'",
     "\'\"\`",
     "\#",
-    "\#{}",
+    "\#{1}",
     "#",
-    "#{}",
+    "#{1}",
     /'"/,
     /\'\"/,
     /\//,
     /\\/,
     /\#/,
-    /\#{}/,
+    /\#{1}/,
     /#/,
-    /#{}/
+    /#{1}/
   ]
 end
 def bar
@@ -570,22 +570,22 @@ def bar
 
 <pre class="ruby"><span class="ruby-keyword">def</span> <span class="ruby-identifier">foo</span>
   [
-    <span class="ruby-string">&#39;\\&#39;</span>,
-    <span class="ruby-string">&#39;\&#39;&#39;</span>,
-    <span class="ruby-string">&quot;&#39;&quot;</span>,
-    <span class="ruby-string">&quot;\&#39;\&quot;\`&quot;</span>,
-    <span class="ruby-string">&quot;\#&quot;</span>,
-    <span class="ruby-string">&quot;\#{}&quot;</span>,
-    <span class="ruby-string">&quot;#&quot;</span>,
-    <span class="ruby-node">&quot;#{}&quot;</span>,
+    <span class="ruby-string">&#39;</span><span class="ruby-string">\\</span><span class="ruby-string">&#39;</span>,
+    <span class="ruby-string">&#39;</span><span class="ruby-string">\&#39;</span><span class="ruby-string">&#39;</span>,
+    <span class="ruby-string">&quot;</span><span class="ruby-string">&#39;</span><span class="ruby-string">&quot;</span>,
+    <span class="ruby-string">&quot;</span><span class="ruby-string">\&#39;\&quot;\`</span><span class="ruby-string">&quot;</span>,
+    <span class="ruby-string">&quot;</span><span class="ruby-string">\#</span><span class="ruby-string">&quot;</span>,
+    <span class="ruby-string">&quot;</span><span class="ruby-string">\#{1}</span><span class="ruby-string">&quot;</span>,
+    <span class="ruby-string">&quot;</span><span class="ruby-string">#</span><span class="ruby-string">&quot;</span>,
+    <span class="ruby-string">&quot;</span><span class="ruby-string">#{</span><span class="ruby-value">1</span><span class="ruby-string">}</span><span class="ruby-string">&quot;</span>,
     <span class="ruby-regexp">/&#39;&quot;/</span>,
     <span class="ruby-regexp">/\&#39;\&quot;/</span>,
     <span class="ruby-regexp">/\//</span>,
     <span class="ruby-regexp">/\\/</span>,
     <span class="ruby-regexp">/\#/</span>,
-    <span class="ruby-regexp">/\#{}/</span>,
+    <span class="ruby-regexp">/\#{1}/</span>,
     <span class="ruby-regexp">/#/</span>,
-    <span class="ruby-regexp">/#{}/</span>
+    <span class="ruby-regexp">/</span><span class="ruby-regexp">#{</span><span class="ruby-value">1</span><span class="ruby-regexp">}</span><span class="ruby-regexp">/</span>
   ]
 <span class="ruby-keyword">end</span>
 <span class="ruby-keyword">def</span> <span class="ruby-identifier">bar</span>
@@ -603,9 +603,9 @@ def foo
     `\\`,
     `\'\"\``,
     `\#`,
-    `\#{}`,
+    `\#{1}`,
     `#`,
-    `#{}`
+    `#{1}`
   ]
 end
 def bar
@@ -620,12 +620,12 @@ def bar
 
 <pre class="ruby"><span class="ruby-keyword">def</span> <span class="ruby-identifier">foo</span>
   [
-    <span class="ruby-string">`\\`</span>,
-    <span class="ruby-string">`\&#39;\&quot;\``</span>,
-    <span class="ruby-string">`\#`</span>,
-    <span class="ruby-string">`\#{}`</span>,
-    <span class="ruby-string">`#`</span>,
-    <span class="ruby-node">`#{}`</span>
+    <span class="ruby-string">`</span><span class="ruby-string">\\</span><span class="ruby-string">`</span>,
+    <span class="ruby-string">`</span><span class="ruby-string">\&#39;\&quot;\`</span><span class="ruby-string">`</span>,
+    <span class="ruby-string">`</span><span class="ruby-string">\#</span><span class="ruby-string">`</span>,
+    <span class="ruby-string">`</span><span class="ruby-string">\#{1}</span><span class="ruby-string">`</span>,
+    <span class="ruby-string">`</span><span class="ruby-string">#</span><span class="ruby-string">`</span>,
+    <span class="ruby-string">`</span><span class="ruby-string">#{</span><span class="ruby-value">1</span><span class="ruby-string">}</span><span class="ruby-string">`</span>
   ]
 <span class="ruby-keyword">end</span>
 <span class="ruby-keyword">def</span> <span class="ruby-identifier">bar</span>
diff --git a/test/rdoc/parser/ruby_colorizer_test.rb b/test/rdoc/parser/ruby_colorizer_test.rb
new file mode 100644
index 0000000000..b3fd1b71de
--- /dev/null
+++ b/test/rdoc/parser/ruby_colorizer_test.rb
@@ -0,0 +1,202 @@
+# frozen_string_literal: true
+require_relative '../helper'
+require 'rdoc/parser/ruby_colorizer'
+
+class RDocParserRubyColorizerTest < RDoc::TestCase
+  def token(kind, text)
+    RDoc::Parser::RubyColorizer::ColoredToken.new(kind, text)
+  end
+
+  def test_partial_colorize
+    code = <<~RUBY
+      class A
+        def m
+          # comment
+          42
+        end
+      end
+    RUBY
+    parse_result = Prism.parse_lex(code)
+    program_node, unordered_tokens = parse_result.value
+    prism_tokens = unordered_tokens.map(&:first).sort_by! { |token| token.location.start_offset }
+    def_node = program_node.statements.body[0].body.body[0]
+    tokens = RDoc::Parser::RubyColorizer.partial_colorize(code, def_node, prism_tokens)
+    expected = ['  ', 'def', ' ', 'm', "\n", '    ', "# comment\n", '    ', '42', "\n", '  ', 'end']
+    assert_equal(expected, tokens.map(&:text))
+  end
+
+  def test_comment
+    code = <<~RUBY
+      # comment1
+      class A
+      =begin
+      comment2
+      =end
+        def m
+          42 # comment3
+        end
+      end
+    RUBY
+    tokens = RDoc::Parser::RubyColorizer.colorize(code)
+    assert_equal(code, tokens.map(&:text).join)
+    assert_include(tokens, token(:comment, "# comment1\n"))
+    assert_include(tokens, token(:comment, "=begin\n"))
+    assert_include(tokens, token(:comment, "comment2\n"))
+    assert_include(tokens, token(:comment, "=end\n"))
+    assert_include(tokens, token(:comment, "# comment3\n"))
+  end
+
+  def test_interpolated_node
+    code = <<~'RUBY'
+      def m
+        "string#{interpolation1}example#@embvar"
+        /regexp#{interpolation2}example#$embvar/
+        `xstring#{interpolation3}example#@embvar`
+        :"symbol#{interpolation4}example#$embvar"
+      end
+    RUBY
+    tokens = RDoc::Parser::RubyColorizer.colorize(code)
+    assert_equal(code, tokens.map(&:text).join)
+
+    assert_include(tokens, token(:string, '"'))
+    assert_include(tokens, token(:string, 'string'))
+    assert_include(tokens, token(:string, '#{'))
+    assert_include(tokens, token(:identifier, 'interpolation1'))
+    assert_include(tokens, token(:string, '}'))
+    assert_include(tokens, token(:string, 'example'))
+    assert_include(tokens, token(:string, '#'))
+
+    assert_include(tokens, token(:regexp, '/'))
+    assert_include(tokens, token(:regexp, 'regexp'))
+    assert_include(tokens, token(:regexp, '#{'))
+    assert_include(tokens, token(:identifier, 'interpolation2'))
+    assert_include(tokens, token(:regexp, '}'))
+    assert_include(tokens, token(:regexp, 'example'))
+    assert_include(tokens, token(:regexp, '#'))
+
+    assert_include(tokens, token(:x_string, '`'))
+    assert_include(tokens, token(:x_string, 'xstring'))
+    assert_include(tokens, token(:x_string, '#{'))
+    assert_include(tokens, token(:identifier, 'interpolation3'))
+    assert_include(tokens, token(:x_string, '}'))
+    assert_include(tokens, token(:x_string, 'example'))
+    assert_include(tokens, token(:x_string, '#'))
+
+    assert_include(tokens, token(:symbol, ':"'))
+    assert_include(tokens, token(:symbol, 'symbol'))
+    assert_include(tokens, token(:symbol, '#{'))
+    assert_include(tokens, token(:identifier, 'interpolation4'))
+    assert_include(tokens, token(:symbol, '}'))
+    assert_include(tokens, token(:symbol, 'example'))
+    assert_include(tokens, token(:symbol, '#'))
+    assert_include(tokens, token(:symbol, '"'))
+  end
+
+  def test_percent_literal_arrays
+    code = <<~'RUBY'
+      def m
+        %w[1 2 3]
+        %W[one #{two} three]
+        %i[4 5 6]
+        %I[four #{five} six]
+      end
+    RUBY
+    tokens = RDoc::Parser::RubyColorizer.colorize(code)
+    assert_equal(code, tokens.map(&:text).join)
+    assert_include(tokens, token(:string, '%w['))
+    assert_include(tokens, token(:string, '%W['))
+    assert_include(tokens, token(:string, ']'))
+    assert_include(tokens, token(:string, '1'))
+    assert_include(tokens, token(:string, 'one'))
+    assert_include(tokens, token(:string, '#{'))
+    assert_include(tokens, token(:identifier, 'two'))
+    assert_include(tokens, token(:string, '}'))
+    assert_include(tokens, token(:symbol, '%i['))
+    assert_include(tokens, token(:symbol, '%I['))
+    assert_include(tokens, token(:symbol, ']'))
+    assert_include(tokens, token(:symbol, '4'))
+    assert_include(tokens, token(:symbol, 'four'))
+    assert_include(tokens, token(:symbol, '#{'))
+    assert_include(tokens, token(:identifier, 'five'))
+    assert_include(tokens, token(:symbol, '}'))
+  end
+
+  def test_multibyte
+    code = <<~RUBY
+      def f(s = '💎')
+        # comment 💎
+        puts '💎' + s
+      end
+    RUBY
+    tokens = RDoc::Parser::RubyColorizer.colorize(code)
+    assert_equal(code, tokens.map(&:text).join)
+  end
+
+  def test_string
+    code = <<~'RUBY'
+      # string without closing
+      ?S
+      # interpolated string node may not have opening/closing
+      # parts may have opening/closing
+      %[s3] 's4'\
+      "s5#{[?s]}s6"
+    RUBY
+    tokens = RDoc::Parser::RubyColorizer.colorize(code)
+    assert_equal(code, tokens.map(&:text).join)
+    string_token_texts = tokens.select { |t| t[:kind] == :string }.map(&:text)
+    expected_string_token_texts = %w[? S %[ s3 ] ' s4 ' " s5  #{ ? s } s6 "]
+    assert_equal(expected_string_token_texts, string_token_texts)
+  end
+
+  def test_symbol
+    code = <<~'RUBY'
+      # symbol without closing
+      :sym1
+      # symbol with opening/closing
+      :"sym2"
+      %s[sym3]
+      # opening and content has gap
+      <<~A; :\
+      A
+      sym4
+    RUBY
+    tokens = RDoc::Parser::RubyColorizer.colorize(code)
+    assert_equal(code, tokens.map(&:text).join)
+    symbol_token_texts = tokens.select { |t| t[:kind] == :symbol }.map(&:text)
+    expected_symbol_token_texts = %w[: sym1 :" sym2 " %s[ sym3 ] : sym4]
+    assert_equal(expected_symbol_token_texts, symbol_token_texts)
+  end
+
+  def test_heredoc
+    code = <<~'RUBY'
+      def f
+        str1 = <<~AA
+          single-line-heredoc
+        AA
+        str2 = <<~`BB` # comment
+          x-string-heredoc
+        BB
+        str3 = <<~CC.itself
+          multi-line
+          #{embed}
+          heredoc
+        CC
+      end
+    RUBY
+    tokens = RDoc::Parser::RubyColorizer.colorize(code)
+    assert_equal(code, tokens.map(&:text).join)
+    assert_include(tokens, token(:string, '<<~AA'))
+    assert_include(tokens, token(:x_string, '<<~`BB`'))
+    assert_include(tokens, token(:string, '<<~CC'))
+    assert_include(tokens, token(:string, "  AA\n"))
+    assert_include(tokens, token(:x_string, "  BB\n"))
+    assert_include(tokens, token(:string, "  CC\n"))
+    assert_include(tokens, token(:string, "    single-line-heredoc\n"))
+    assert_include(tokens, token(:x_string, "    x-string-heredoc\n"))
+    assert_include(tokens, token(:string, "    multi-line\n"))
+    assert_include(tokens, token(:string, '#{'))
+    assert_include(tokens, token(:identifier, 'embed'))
+    assert_include(tokens, token(:string, '}'))
+    assert_include(tokens, token(:string, "    heredoc\n"))
+  end
+end
diff --git a/test/rdoc/parser/ruby_test.rb b/test/rdoc/parser/ruby_test.rb
index b3cef3a341..a5c79071ba 100644
--- a/test/rdoc/parser/ruby_test.rb
+++ b/test/rdoc/parser/ruby_test.rb
@@ -2406,6 +2406,23 @@ def test_read_directive_linear_performance
     end
   end
 
+  def test_code_object_token_stream
+    util_parser <<~RUBY
+      class Foo
+        def foo
+          42
+        end
+
+        private def bar
+          baz
+        end
+      end
+    RUBY
+
+    foo, bar = @top_level.classes.first.method_list
+    assert_equal(['  ', 'def', ' ', 'foo', "\n", '    ', '42', "\n", '  ', 'end'], foo.token_stream.map(&:text))
+    assert_equal(['          ', 'def', ' ', 'bar', "\n", '    ', 'baz', "\n", '  ', 'end'], bar.token_stream.map(&:text))
+  end
 
   def test_markup_first_comment
     util_parser <<~RUBY
diff --git a/test/rdoc/rdoc_token_stream_test.rb b/test/rdoc/rdoc_token_stream_test.rb
index ed5e124cc6..254811c72a 100644
--- a/test/rdoc/rdoc_token_stream_test.rb
+++ b/test/rdoc/rdoc_token_stream_test.rb
@@ -5,17 +5,18 @@ class RDocTokenStreamTest < RDoc::TestCase
 
   def test_class_to_html
     tokens = [
-      { :line_no => 0, :char_no => 0, :kind => :on_const, :text => 'CONSTANT' },
-      { :line_no => 0, :char_no => 0, :kind => :on_kw, :text => 'KW' },
-      { :line_no => 0, :char_no => 0, :kind => :on_ivar, :text => 'IVAR' },
-      { :line_no => 0, :char_no => 0, :kind => :on_op, :text => 'Op' },
-      { :line_no => 0, :char_no => 0, :kind => :on_ident, :text => 'Id' },
-      { :line_no => 0, :char_no => 0, :kind => :on_backref, :text => 'Node' },
-      { :line_no => 0, :char_no => 0, :kind => :on_comment, :text => 'COMMENT' },
-      { :line_no => 0, :char_no => 0, :kind => :on_regexp, :text => 'REGEXP' },
-      { :line_no => 0, :char_no => 0, :kind => :on_tstring, :text => 'STRING' },
-      { :line_no => 0, :char_no => 0, :kind => :on_int, :text => 'Val' },
-      { :line_no => 0, :char_no => 0, :kind => :on_unknown, :text => '\\' }
+      { kind: :constant, text: 'CONSTANT' },
+      { kind: :keyword, text: 'KW' },
+      { kind: :ivar, text: 'IVAR' },
+      { kind: :operator, text: 'Op' },
+      { kind: :identifier, text: 'Id' },
+      { kind: :symbol, text: 'Symbol' },
+      { kind: :x_string, text: 'XString' },
+      { kind: :comment, text: 'COMMENT' },
+      { kind: :regexp, text: 'REGEXP' },
+      { kind: :string, text: 'STRING' },
+      { kind: :value, text: 'Val' },
+      { kind: :plain, text: '\\' }
     ]
 
     expected = [
@@ -24,7 +25,8 @@ def test_class_to_html
       '<span class="ruby-ivar">IVAR</span>',
       '<span class="ruby-operator">Op</span>',
       '<span class="ruby-identifier">Id</span>',
-      '<span class="ruby-node">Node</span>',
+      '<span class="ruby-value">Symbol</span>',
+      '<span class="ruby-string">XString</span>',
       '<span class="ruby-comment">COMMENT</span>',
       '<span class="ruby-regexp">REGEXP</span>',
       '<span class="ruby-string">STRING</span>',