|
7 | 7 | end |
8 | 8 | let(:result) { Extractor.call(File.join(FIXTURES_DIR, "van-gogh-paintings.html")) } |
9 | 9 |
|
10 | | - it "returns the same number of items as the SerpApi reference output" do |
11 | | - expect(result.size).to eq(expected.size) |
| 10 | + it "matches SerpApi reference output" do |
| 11 | + expect(result).to eq(expected) |
12 | 12 | end |
| 13 | + end |
13 | 14 |
|
14 | | - it "matches name, extensions and link byte-for-byte across all items" do |
15 | | - mismatches = result.each_with_index.reject do |item, i| |
16 | | - %w[name extensions link].all? { |f| item[f] == expected[i][f] } |
17 | | - end |
18 | | - expect(mismatches).to be_empty, -> { |
19 | | - mismatches.first(3).map { |it, i| |
20 | | - "row #{i}: got=#{it.reject { |k,_| k == 'image' }.inspect} " \ |
21 | | - "exp=#{expected[i].reject { |k,_| k == 'image' }.inspect}" |
22 | | - }.join("\n") |
23 | | - } |
| 15 | + describe ".call with U.S. Presidents fixture" do |
| 16 | + let(:expected) do |
| 17 | + JSON.parse(File.read((File.expand_path("fixtures/u.s._presidents.json", __dir__))))["artworks"] |
24 | 18 | end |
| 19 | + let(:result) { Extractor.call(File.expand_path("fixtures/u.s._presidents.html", __dir__)) } |
25 | 20 |
|
26 | | - it "extracts every inline thumbnail present in the HTML exactly" do |
27 | | - # The page only ships the first N base64 thumbnails inline. The rest |
28 | | - # are URL thumbnails in in-file attributes (e.g. data-src). Those are |
29 | | - # still part of the page snapshot and should be surfaced as-is. |
30 | | - inline_expected = expected.each_with_index.select { |e, _| e["image"].to_s.start_with?("data:") } |
| 21 | + it "matches U.S. Presidents reference output" do |
| 22 | + expect(result).to eq(expected) |
| 23 | + end |
| 24 | + end |
31 | 25 |
|
32 | | - inline_expected.each do |e, i| |
33 | | - expect(result[i]["image"]).to eq(e["image"]), |
34 | | - "mismatch on row #{i} (#{e['name']})" |
35 | | - end |
| 26 | + describe ".call with Tom Cruise movies fixture" do |
| 27 | + let(:expected) do |
| 28 | + JSON.parse(File.read((File.expand_path("fixtures/tom_cruise_movies.json", __dir__))))["artworks"] |
36 | 29 | end |
| 30 | + let(:result) { Extractor.call(File.expand_path("fixtures/tom_cruise_movies.html", __dir__)) } |
37 | 31 |
|
38 | | - it "matches image output byte-for-byte against expected array" do |
39 | | - mismatches = result.each_with_index.reject { |item, i| item["image"] == expected[i]["image"] } |
40 | | - expect(mismatches).to be_empty, -> { |
41 | | - mismatches.first(3).map { |(item, i)| |
42 | | - "row #{i}: got=#{item['image'].inspect} exp=#{expected[i]['image'].inspect}" |
43 | | - }.join("\n") |
44 | | - } |
| 32 | + it "matches Tom Cruise movies reference output" do |
| 33 | + expect(result).to eq(expected) |
45 | 34 | end |
46 | 35 | end |
47 | 36 | end |
0 commit comments