Skip to content

Commit e3c152b

Browse files
authored
Fix: Readability.published_at and Readability.title works as expected (#60)
1 parent e9a80fc commit e3c152b

4 files changed

Lines changed: 50 additions & 23 deletions

File tree

lib/readability.ex

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ defmodule Readability do
152152
@spec title(binary | html_tree) :: binary
153153
def title(raw_html) when is_binary(raw_html) do
154154
raw_html
155-
|> Floki.parse_document()
155+
|> Floki.parse_document!()
156156
|> title
157157
end
158158

@@ -183,7 +183,7 @@ defmodule Readability do
183183
@spec published_at(binary | html_tree) :: %DateTime{} | %Date{} | nil
184184
def published_at(raw_html) when is_binary(raw_html) do
185185
raw_html
186-
|> Floki.parse_document()
186+
|> Floki.parse_document!()
187187
|> published_at()
188188
end
189189

test/readability/author_finder_test.exs

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,35 @@ defmodule Readability.AuthoFinderTest do
33

44
alias Readability.AuthorFinder
55

6+
defp test_fixture(file_name, expected_authors) do
7+
html = TestHelper.read_fixture(file_name)
8+
assert Readability.authors(html) == expected_authors
9+
10+
parsed_html = TestHelper.read_parse_fixture(file_name)
11+
assert AuthorFinder.find(parsed_html) == expected_authors
12+
end
13+
614
test "extracting bbc format author" do
7-
html = TestHelper.read_parse_fixture("bbc.html")
8-
assert AuthorFinder.find(html) == ["BBC News"]
15+
test_fixture("bbc.html", ["BBC News"])
916
end
1017

1118
test "extracting buzzfeed format author" do
12-
html = TestHelper.read_parse_fixture("buzzfeed.html")
13-
assert AuthorFinder.find(html) == ["Salvador Hernandez", "Hamza Shaban"]
19+
test_fixture("buzzfeed.html", ["Salvador Hernandez", "Hamza Shaban"])
1420
end
1521

1622
test "extracting medium format author" do
17-
html = TestHelper.read_parse_fixture("medium.html")
18-
assert AuthorFinder.find(html) == ["Ken Mazaika"]
23+
test_fixture("medium.html", ["Ken Mazaika"])
1924
end
2025

2126
test "extracting nytimes format author" do
22-
html = TestHelper.read_parse_fixture("nytimes.html")
23-
assert AuthorFinder.find(html) == ["Judith H. Dobrzynski"]
27+
test_fixture("nytimes.html", ["Judith H. Dobrzynski"])
2428
end
29+
30+
test "extracting pubmed format author" do
31+
test_fixture("pubmed.html", ["Meno H ", "et al."])
32+
end
33+
34+
# test "extracting elixir format author" do
35+
# test_fixture("elixir.html", ["José Valim"])
36+
# end
2537
end

test/readability/published_at_finder_test.exs

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,31 +3,30 @@ defmodule Readability.PublishedAtFinderTest do
33

44
alias Readability.PublishedAtFinder
55

6-
test "extracting bbc format published at" do
7-
html = TestHelper.read_parse_fixture("bbc.html")
6+
defp test_fixture(file_name, expected_published_at) do
7+
html = TestHelper.read_fixture(file_name)
8+
assert Readability.published_at(html) == expected_published_at
9+
parsed_html = TestHelper.read_parse_fixture(file_name)
10+
assert PublishedAtFinder.find(parsed_html) == expected_published_at
11+
end
812

9-
assert PublishedAtFinder.find(html) == nil
13+
test "extracting bbc format published at" do
14+
test_fixture("bbc.html", nil)
1015
end
1116

1217
test "extracting buzzfeed format published at" do
13-
html = TestHelper.read_parse_fixture("buzzfeed.html")
14-
15-
assert PublishedAtFinder.find(html) == nil
18+
test_fixture("buzzfeed.html", nil)
1619
end
1720

1821
test "extracting elixir format published at" do
19-
html = TestHelper.read_parse_fixture("elixir.html")
20-
21-
assert PublishedAtFinder.find(html) == nil
22+
test_fixture("elixir.html", nil)
2223
end
2324

2425
test "extracting medium format published at" do
25-
html = TestHelper.read_parse_fixture("medium.html")
26-
assert PublishedAtFinder.find(html) == ~U[2015-01-31 22:58:05.645Z]
26+
test_fixture("medium.html", ~U[2015-01-31 22:58:05.645Z])
2727
end
2828

2929
test "extracting nytimes format published at" do
30-
html = TestHelper.read_parse_fixture("nytimes.html")
31-
assert PublishedAtFinder.find(html) == ~D[2016-03-16]
30+
test_fixture("nytimes.html", ~D[2016-03-16])
3231
end
3332
end

test/readability/title_finder_test.exs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,22 @@ defmodule Readability.TitleFinderTest do
2525
assert title == "og title"
2626
end
2727

28+
test "extracts most proper title from an html string" do
29+
assert Readability.title(@html) == "og title"
30+
end
31+
32+
test "extracts regular title from an html string" do
33+
html = """
34+
<html>
35+
<head>
36+
<title>Tag title - test</title>
37+
</head>
38+
</html>
39+
"""
40+
41+
assert Readability.title(html) == "Tag title"
42+
end
43+
2844
test "extract og title" do
2945
title = Readability.TitleFinder.og_title(@html_tree)
3046
assert title == "og title"

0 commit comments

Comments
 (0)