@@ -53,23 +53,6 @@ defmodule Readability do
5353 page_url: nil
5454 ]
5555
56- @ regexes [
57- unlikely_candidate:
58- ~r/ combx|comment|community|disqus|extra|foot|header|hidden|lightbox|modal|menu|meta|nav|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/ i ,
59- ok_maybe_its_a_candidate: ~r/ and|article|body|column|main|shadow/ i ,
60- positive: ~r/ article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/ i ,
61- negative:
62- ~r/ hidden|^hid|combx|comment|com-|contact|foot|footer|footnote|link|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|utility|widget/ i ,
63- div_to_p_elements: ~r/ <(a|blockquote|dl|div|img|ol|p|pre|table|ul)/ i ,
64- replace_brs: ~r/ (<br[^>]*>[ \n \r \t ]*){2,}/ i ,
65- replace_fonts: ~r/ <(\/ ?)font[^>]*>/ i ,
66- replace_xml_version: ~r/ <\? xml.*\? >/ i ,
67- normalize: ~r/ \s {2,}/ ,
68- video: ~r/ \/ \/ (www\. )?(dailymotion|youtube|youtube-nocookie|player\. vimeo)\. com/ i ,
69- protect_attrs: ~r/ ^(?!id|rel|for|summary|title|href|src|alt|srcdoc)/ i ,
70- img_tag_src: ~r/ (<img.*src=['"])([^'"]+)(['"][^>]*>)/ Ui
71- ]
72-
7356 @ markup_mimes ~r/ ^(application|text)\/ [a-z\- _\. \+ ]+ml(;\s *charset=.*)?$/ i
7457
7558 @ type html_tree :: tuple | list
@@ -249,7 +232,37 @@ defmodule Readability do
249232 end
250233 end
251234
252- def regexes ( key ) , do: @ regexes [ key ]
235+ def regexes ( :unlikely_candidate ) ,
236+ do:
237+ ~r/ combx|comment|community|disqus|extra|foot|header|hidden|lightbox|modal|menu|meta|nav|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/ i
238+
239+ def regexes ( :ok_maybe_its_a_candidate ) , do: ~r/ and|article|body|column|main|shadow/ i
240+
241+ def regexes ( :positive ) ,
242+ do: ~r/ article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/ i
243+
244+ def regexes ( :negative ) ,
245+ do:
246+ ~r/ hidden|^hid|combx|comment|com-|contact|foot|footer|footnote|link|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|utility|widget/ i
247+
248+ def regexes ( :div_to_p_elements ) , do: ~r/ <(a|blockquote|dl|div|img|ol|p|pre|table|ul)/ i
249+
250+ def regexes ( :replace_brs ) , do: ~r/ (<br[^>]*>[ \n \r \t ]*){2,}/ i
251+
252+ def regexes ( :replace_fonts ) , do: ~r/ <(\/ ?)font[^>]*>/ i
253+
254+ def regexes ( :replace_xml_version ) , do: ~r/ <\? xml.*\? >/ i
255+
256+ def regexes ( :normalize ) , do: ~r/ \s {2,}/
257+
258+ def regexes ( :video ) ,
259+ do: ~r/ \/ \/ (www\. )?(dailymotion|youtube|youtube-nocookie|player\. vimeo)\. com/ i
260+
261+ def regexes ( :protect_attrs ) , do: ~r/ ^(?!id|rel|for|summary|title|href|src|alt|srcdoc)/ i
262+
263+ def regexes ( :img_tag_src ) , do: ~r/ (<img.*src=['"])([^'"]+)(['"][^>]*>)/ Ui
264+
265+ def regexes ( _key ) , do: nil
253266
254267 def default_options , do: @ default_options
255268end
0 commit comments