@@ -40,6 +40,82 @@ class RDoc::Markup::ToHtml < RDoc::Markup::Formatter
4040
4141 # :section:
4242
43+ # Maps an encoding to a Hash of characters properly transcoded for that
44+ # encoding.
45+ #
46+ # See also encode_fallback.
47+
48+ TO_HTML_CHARACTERS = Hash . new do |h , encoding |
49+ h [ encoding ] = {
50+ :close_dquote => encode_fallback ( '”' , encoding , '"' ) ,
51+ :close_squote => encode_fallback ( '’' , encoding , '\'' ) ,
52+ :copyright => encode_fallback ( '©' , encoding , '(c)' ) ,
53+ :ellipsis => encode_fallback ( '…' , encoding , '...' ) ,
54+ :dot_ellipsis => encode_fallback ( '.…' , encoding , '....' ) ,
55+ :em_dash => encode_fallback ( '—' , encoding , '---' ) ,
56+ :en_dash => encode_fallback ( '–' , encoding , '--' ) ,
57+ :open_dquote => encode_fallback ( '“' , encoding , '"' ) ,
58+ :open_squote => encode_fallback ( '‘' , encoding , '\'' ) ,
59+ :trademark => encode_fallback ( '®' , encoding , '(r)' ) ,
60+ }
61+ end
62+
63+ HTML_CHARACTER_ALIASES = {
64+ '(c)' => :copyright ,
65+ '(C)' => :copyright ,
66+ '(r)' => :trademark ,
67+ '(R)' => :trademark ,
68+ '---' => :em_dash ,
69+ '--' => :en_dash ,
70+ '....' => :dot_ellipsis ,
71+ '...' => :ellipsis ,
72+ '``' => :open_dquote ,
73+ "''" => :close_dquote ,
74+ }
75+
76+ # Transcodes +character+ to +encoding+ with a +fallback+ character.
77+
78+ def self . encode_fallback ( character , encoding , fallback )
79+ character . encode ( encoding , :fallback => { character => fallback } ,
80+ :undef => :replace , :replace => fallback )
81+ end
82+
83+ # Converts ascii quote pairs to multibyte quote characters
84+ class QuoteConverter
85+
86+ def initialize
87+ @in_dquote = false
88+ @in_squote = false
89+ end
90+
91+ def convert ( quote , after_word :)
92+ case quote
93+ when '"'
94+ type = @in_dquote ? :close_dquote : :open_dquote
95+ @in_dquote = !@in_dquote
96+ when "'"
97+ if @insquotes
98+ type = :close_squote
99+ @insquotes = false
100+ elsif after_word
101+ # Mary's dog, my parents' house: do not start paired quotes
102+ type = :close_squote
103+ else
104+ type = :open_squote
105+ @insquotes = true
106+ end
107+ when '`'
108+ # Opening quote of <tt>`quoted sentence'</tt>.
109+ # This will conflict with code blocks <tt>`puts('hello')`</tt> in the future.
110+ if !@insquotes && !after_word
111+ type = :open_squote
112+ @insquotes = true
113+ end
114+ end
115+ TO_HTML_CHARACTERS [ quote . encoding ] [ type ] if type
116+ end
117+ end
118+
43119 ##
44120 # Creates a new formatter that will output HTML
45121
@@ -51,6 +127,7 @@ def initialize(options, markup = nil)
51127 @in_list_entry = nil
52128 @list = nil
53129 @th = nil
130+ @quote_converter = nil
54131 @in_tidylink_label = false
55132 @hard_break = "<br>\n "
56133
@@ -75,6 +152,11 @@ def init_regexp_handlings
75152 # suppress crossref: \#method \::method \ClassName \method_with_underscores
76153 @markup . add_regexp_handling ( /\\ (?:[#:A-Z]|[a-z]+_[a-z0-9])/ , :SUPPRESSED_CROSSREF )
77154
155+ @markup . add_regexp_handling ( Regexp . union ( HTML_CHARACTER_ALIASES . keys ) , :HTML_CHARACTERS )
156+
157+ @markup . add_regexp_handling ( /\b ['"`]/ , :QUOTE_AFTER_WORD )
158+ @markup . add_regexp_handling ( /\B ['"`]/ , :QUOTE_NOT_AFTER_WORD )
159+
78160 init_link_notation_regexp_handlings
79161 end
80162
@@ -227,12 +309,28 @@ def handle_TIDYLINK(label_part, url)
227309
228310 def handle_inline ( text ) # :nodoc:
229311 @inline_output = +''
312+ @quote_converter = QuoteConverter . new
230313 super
231314 out = @inline_output
232315 @inline_output = nil
316+ @quote_converter = nil
233317 out
234318 end
235319
320+ # Converts <tt>(c), (r), --, --- , ..., ...., ``, ""</tt> to HTML characters.
321+ def handle_regexp_HTML_CHARACTERS ( text )
322+ name = HTML_CHARACTER_ALIASES [ text ]
323+ TO_HTML_CHARACTERS [ text . encoding ] [ name ] if name
324+ end
325+
326+ def handle_regexp_QUOTE_NOT_AFTER_WORD ( text )
327+ @quote_converter . convert ( text , after_word : false ) || convert_string ( text )
328+ end
329+
330+ def handle_regexp_QUOTE_AFTER_WORD ( text )
331+ @quote_converter . convert ( text , after_word : true ) || convert_string ( text )
332+ end
333+
236334 # Converts suppressed cross-reference +text+ to HTML by removing the leading backslash.
237335
238336 def handle_regexp_SUPPRESSED_CROSSREF ( text )
@@ -565,10 +663,7 @@ def parseable?(text)
565663 # Converts +item+ to HTML using RDoc::Text#to_html
566664
567665 def to_html ( item )
568- # Ideally, we should convert html characters at handle_PLAIN_TEXT or somewhere else,
569- # but we need to convert it here for now because to_html_characters converts pair of backticks to ’‘ and pair of double backticks to ”“.
570- # Known bugs: `...` in `<code>def f(...); end</code>` and `(c) in `<a href="(c)">` will be wrongly converted.
571- to_html_characters ( handle_inline ( item ) )
666+ handle_inline ( item )
572667 end
573668end
574669
0 commit comments