Skip to content

Commit 4edafba

Browse files
authored
Inline regex definitions to avoid escape errors on Elixir 1.18 (#69)
1 parent 06db3f1 commit 4edafba

1 file changed

Lines changed: 31 additions & 18 deletions

File tree

lib/readability.ex

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -53,23 +53,6 @@ defmodule Readability do
5353
page_url: nil
5454
]
5555

56-
@regexes [
57-
unlikely_candidate:
58-
~r/combx|comment|community|disqus|extra|foot|header|hidden|lightbox|modal|menu|meta|nav|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/i,
59-
ok_maybe_its_a_candidate: ~r/and|article|body|column|main|shadow/i,
60-
positive: ~r/article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i,
61-
negative:
62-
~r/hidden|^hid|combx|comment|com-|contact|foot|footer|footnote|link|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|utility|widget/i,
63-
div_to_p_elements: ~r/<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i,
64-
replace_brs: ~r/(<br[^>]*>[ \n\r\t]*){2,}/i,
65-
replace_fonts: ~r/<(\/?)font[^>]*>/i,
66-
replace_xml_version: ~r/<\?xml.*\?>/i,
67-
normalize: ~r/\s{2,}/,
68-
video: ~r/\/\/(www\.)?(dailymotion|youtube|youtube-nocookie|player\.vimeo)\.com/i,
69-
protect_attrs: ~r/^(?!id|rel|for|summary|title|href|src|alt|srcdoc)/i,
70-
img_tag_src: ~r/(<img.*src=['"])([^'"]+)(['"][^>]*>)/Ui
71-
]
72-
7356
@markup_mimes ~r/^(application|text)\/[a-z\-_\.\+]+ml(;\s*charset=.*)?$/i
7457

7558
@type html_tree :: tuple | list
@@ -249,7 +232,37 @@ defmodule Readability do
249232
end
250233
end
251234

252-
def regexes(key), do: @regexes[key]
235+
def regexes(:unlikely_candidate),
236+
do:
237+
~r/combx|comment|community|disqus|extra|foot|header|hidden|lightbox|modal|menu|meta|nav|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/i
238+
239+
def regexes(:ok_maybe_its_a_candidate), do: ~r/and|article|body|column|main|shadow/i
240+
241+
def regexes(:positive),
242+
do: ~r/article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i
243+
244+
def regexes(:negative),
245+
do:
246+
~r/hidden|^hid|combx|comment|com-|contact|foot|footer|footnote|link|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|utility|widget/i
247+
248+
def regexes(:div_to_p_elements), do: ~r/<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i
249+
250+
def regexes(:replace_brs), do: ~r/(<br[^>]*>[ \n\r\t]*){2,}/i
251+
252+
def regexes(:replace_fonts), do: ~r/<(\/?)font[^>]*>/i
253+
254+
def regexes(:replace_xml_version), do: ~r/<\?xml.*\?>/i
255+
256+
def regexes(:normalize), do: ~r/\s{2,}/
257+
258+
def regexes(:video),
259+
do: ~r/\/\/(www\.)?(dailymotion|youtube|youtube-nocookie|player\.vimeo)\.com/i
260+
261+
def regexes(:protect_attrs), do: ~r/^(?!id|rel|for|summary|title|href|src|alt|srcdoc)/i
262+
263+
def regexes(:img_tag_src), do: ~r/(<img.*src=['"])([^'"]+)(['"][^>]*>)/Ui
264+
265+
def regexes(_key), do: nil
253266

254267
def default_options, do: @default_options
255268
end

0 commit comments

Comments
 (0)