@@ -42,6 +42,86 @@ class RDoc::Markup::ToHtml < RDoc::Markup::Formatter
4242
4343 # :section:
4444
45+ # Maps an encoding to a Hash of characters properly transcoded for that
46+ # encoding.
47+ #
48+ # See also encode_fallback.
49+
50+ TO_HTML_CHARACTERS = Hash . new do |h , encoding |
51+ h [ encoding ] = {
52+ close_dquote : encode_fallback ( '”' , encoding , '"' ) ,
53+ close_squote : encode_fallback ( '’' , encoding , '\'' ) ,
54+ copyright : encode_fallback ( '©' , encoding , '(c)' ) ,
55+ ellipsis : encode_fallback ( '…' , encoding , '...' ) ,
56+ dot_ellipsis : encode_fallback ( '.…' , encoding , '....' ) ,
57+ em_dash : encode_fallback ( '—' , encoding , '---' ) ,
58+ en_dash : encode_fallback ( '–' , encoding , '--' ) ,
59+ open_dquote : encode_fallback ( '“' , encoding , '"' ) ,
60+ open_squote : encode_fallback ( '‘' , encoding , '\'' ) ,
61+ trademark : encode_fallback ( '®' , encoding , '(r)' ) ,
62+ }
63+ end
64+
65+ HTML_CHARACTER_ALIASES = {
66+ '(c)' => :copyright ,
67+ '(C)' => :copyright ,
68+ '(r)' => :trademark ,
69+ '(R)' => :trademark ,
70+ '---' => :em_dash ,
71+ '--' => :en_dash ,
72+ '....' => :dot_ellipsis ,
73+ '...' => :ellipsis ,
74+ '``' => :open_dquote ,
75+ "''" => :close_dquote ,
76+ }
77+
78+ # Transcodes +character+ to +encoding+ with a +fallback+ character.
79+
80+ def self . encode_fallback ( character , encoding , fallback )
81+ character . encode (
82+ encoding ,
83+ fallback : { character => fallback } ,
84+ undef : :replace ,
85+ replace : fallback
86+ )
87+ end
88+
89+ # Converts ascii quote pairs to multibyte quote characters
90+ class QuoteConverter
91+
92+ def initialize
93+ @in_dquote = false
94+ @in_squote = false
95+ end
96+
97+ def convert ( quote , after_word :)
98+ case quote
99+ when '"'
100+ type = @in_dquote ? :close_dquote : :open_dquote
101+ @in_dquote = !@in_dquote
102+ when "'"
103+ if @in_squote
104+ type = :close_squote
105+ @in_squote = false
106+ elsif after_word
107+ # Mary's dog, my parents' house: do not start paired quotes
108+ type = :close_squote
109+ else
110+ type = :open_squote
111+ @in_squote = true
112+ end
113+ when '`'
114+ # Opening quote of <tt>`quoted sentence'</tt>.
115+ # This will conflict with code blocks <tt>`puts('hello')`</tt> in the future.
116+ if !@in_squote && !after_word
117+ type = :open_squote
118+ @in_squote = true
119+ end
120+ end
121+ TO_HTML_CHARACTERS [ quote . encoding ] [ type ] if type
122+ end
123+ end
124+
45125 ##
46126 # Creates a new formatter that will output HTML
47127
@@ -55,6 +135,7 @@ def initialize(pipe: false, output_decoration: true)
55135 @in_list_entry = nil
56136 @list = nil
57137 @th = nil
138+ @quote_converter = nil
58139 @in_tidylink_label = false
59140 @hard_break = "<br>\n "
60141
@@ -79,6 +160,11 @@ def init_regexp_handlings
79160 # suppress crossref: \#method \::method \ClassName \method_with_underscores
80161 @markup . add_regexp_handling ( /\\ (?:[#:A-Z]|[a-z]+_[a-z0-9])/ , :SUPPRESSED_CROSSREF )
81162
163+ @markup . add_regexp_handling ( Regexp . union ( HTML_CHARACTER_ALIASES . keys ) , :HTML_CHARACTERS )
164+
165+ @markup . add_regexp_handling ( /\b ['"`]/ , :QUOTE_AFTER_WORD )
166+ @markup . add_regexp_handling ( /\B ['"`]/ , :QUOTE_NOT_AFTER_WORD )
167+
82168 init_link_notation_regexp_handlings
83169 end
84170
@@ -231,12 +317,28 @@ def handle_TIDYLINK(label_part, url)
231317
232318 def handle_inline ( text ) # :nodoc:
233319 @inline_output = +''
320+ @quote_converter = QuoteConverter . new
234321 super
235322 out = @inline_output
236323 @inline_output = nil
324+ @quote_converter = nil
237325 out
238326 end
239327
328+ # Converts <tt>(c), (r), --, --- , ..., ...., ``, ''</tt> to HTML characters.
329+ def handle_regexp_HTML_CHARACTERS ( text )
330+ name = HTML_CHARACTER_ALIASES [ text ]
331+ TO_HTML_CHARACTERS [ text . encoding ] [ name ] if name
332+ end
333+
334+ def handle_regexp_QUOTE_NOT_AFTER_WORD ( text )
335+ @quote_converter . convert ( text , after_word : false ) || convert_string ( text )
336+ end
337+
338+ def handle_regexp_QUOTE_AFTER_WORD ( text )
339+ @quote_converter . convert ( text , after_word : true ) || convert_string ( text )
340+ end
341+
240342 # Converts suppressed cross-reference +text+ to HTML by removing the leading backslash.
241343
242344 def handle_regexp_SUPPRESSED_CROSSREF ( text )
@@ -576,9 +678,6 @@ def parseable?(text)
576678 # Converts +item+ to HTML using RDoc::Text#to_html
577679
578680 def to_html ( item )
579- # Ideally, we should convert html characters at handle_PLAIN_TEXT or somewhere else,
580- # but we need to convert it here for now because to_html_characters converts pair of backticks to ’‘ and pair of double backticks to ”“.
581- # Known bugs: `...` in `<code>def f(...); end</code>` and `(c) in `<a href="(c)">` will be wrongly converted.
582- to_html_characters ( handle_inline ( item ) )
681+ handle_inline ( item )
583682 end
584683end
0 commit comments