@@ -5,45 +5,138 @@ def doc_for(html)
55 Nokolexbor ::HTML ( html )
66 end
77
8- it "prefers the candidate group with stronger tile signals when sizes tie" do
9- doc = doc_for ( <<~HTML )
10- < html > < body >
11- < div id ="weak ">
12- < div > < a href ="/search?stick=w1 "> One</ a > </ div >
13- < div > < a href ="/search?stick=w2 "> Two</ a > </ div >
14- < div > < a href ="/search?stick=w3 "> Three</ a > </ div >
15- </ div >
16- < div id ="strong ">
17- < div > < a href ="/search?stick=s1 "> < img alt ="S1 "> < span > 2001</ span > </ a > </ div >
18- < div > < a href ="/search?stick=s2 "> < img alt ="S2 "> < span > 2002</ span > </ a > </ div >
19- < div > < a href ="/search?stick=s3 "> < img alt ="S3 "> < span > 2003</ span > </ a > </ div >
20- </ div >
21- </ body > </ html >
22- HTML
8+ context "when sizes tie" do
9+ it "prioritizes image elements with alt attributes over elements with title attributes" do
10+ doc = doc_for ( <<~HTML )
11+ < html > < body >
12+ < div id ="title ">
13+ < div > < div title ="one "> </ div > < img > </ a > </ div >
14+ < div > < div title ="two "> </ div > < img > </ a > </ div >
15+ < div > < div title ="three "> </ div > < img > </ a > </ div >
16+ </ div >
17+ < div id ="alt ">
18+ < div > < img alt ="S1 "> </ div >
19+ < div > < img alt ="S2 "> </ div >
20+ < div > < img alt ="S3 "> </ div >
21+ </ div >
22+ </ body > </ html >
23+ HTML
2324
24- tiles = described_class . tiles ( doc )
25- expect ( tiles . size ) . to eq ( 3 )
26- expect ( tiles . first . parent [ "id" ] ) . to eq ( "strong" )
25+ tiles = described_class . tiles ( doc )
26+ expect ( tiles . size ) . to eq ( 3 )
27+ expect ( tiles . first . parent [ "id" ] ) . to eq ( "alt" )
28+ end
29+
30+ it "prioritizes elements with title attributes over aria-labels" do
31+ doc = doc_for ( <<~HTML )
32+ < html > < body >
33+ < div id ="aria ">
34+ < div > < div aria-label ="one "> </ div > < img > </ a > </ div >
35+ < div > < div aria-label ="two "> </ div > < img > </ a > </ div >
36+ < div > < div aria-label ="three "> </ div > < img > </ a > </ div >
37+ </ div >
38+ < div id ="title ">
39+ < div > < div title ="one "> </ div > < img > </ a > </ div >
40+ < div > < div title ="two "> </ div > < img > </ a > </ div >
41+ < div > < div title ="three "> </ div > < img > </ a > </ div >
42+ </ div >
43+ </ body > </ html >
44+ HTML
45+
46+ tiles = described_class . tiles ( doc )
47+ expect ( tiles . size ) . to eq ( 3 )
48+ expect ( tiles . first . parent [ "id" ] ) . to eq ( "title" )
49+ end
50+
51+ it "is deterministic on exact ties by picking the first group in DOM order" do
52+ doc = doc_for ( <<~HTML )
53+ < html > < body >
54+ < div id ="first ">
55+ < div > < a href ="/search?stick=f1 "> < img alt ="F1 "> </ a > </ div >
56+ < div > < a href ="/search?stick=f2 "> < img alt ="F2 "> </ a > </ div >
57+ < div > < a href ="/search?stick=f3 "> < img alt ="F3 "> </ a > </ div >
58+ </ div >
59+ < div id ="second ">
60+ < div > < a href ="/search?stick=s1 "> < img alt ="S1 "> </ a > </ div >
61+ < div > < a href ="/search?stick=s2 "> < img alt ="S2 "> </ a > </ div >
62+ < div > < a href ="/search?stick=s3 "> < img alt ="S3 "> </ a > </ div >
63+ </ div >
64+ </ body > </ html >
65+ HTML
66+
67+ tiles = described_class . tiles ( doc )
68+ expect ( tiles . size ) . to eq ( 3 )
69+ expect ( tiles . first . parent [ "id" ] ) . to eq ( "first" )
70+ end
2771 end
2872
29- it "is deterministic on exact ties by picking the first group in DOM order" do
73+ describe "group score & quality weights" do
74+ it "prefers the candidate group with stronger tile signals when sizes are close" do
75+ doc = doc_for ( <<~HTML )
76+ < div id ="quantity ">
77+ < div > < a href ="/search?stick=f1 "> </ a > </ div >
78+ < div > < a href ="/search?stick=f2 "> </ a > </ div >
79+ < div > < a href ="/search?stick=f3 "> </ a > </ div >
80+ < div > < a href ="/search?stick=f4 "> </ a > </ div >
81+ < div > < a href ="/search?stick=f5 "> </ a > </ div >
82+ < div > < a href ="/search?stick=f6 "> </ a > </ div >
83+ </ div >
84+ < div id ="quality ">
85+ < div > < a href ="/search?stick=s1 "> < img alt ="S1 "> </ a > </ div >
86+ < div > < a href ="/search?stick=s2 "> < img alt ="S2 "> </ a > </ div >
87+ < div > < a href ="/search?stick=s3 "> < img alt ="S3 "> </ a > </ div >
88+ < div > < a href ="/search?stick=s4 "> < img alt ="S4 "> </ a > </ div >
89+ < div > < a href ="/search?stick=f5 "> < img alt ="S5 "> </ a > </ div >
90+ </ div >
91+ </ body> </ html>
92+ HTML
93+
94+ tiles = described_class . tiles ( doc )
95+ expect ( tiles . size ) . to eq ( 5 )
96+ expect ( tiles . first . parent [ "id" ] ) . to eq ( "quality" )
97+ end
98+
99+ it "the ACCEPTABLE_NUMBER_OF_MISFORMED_TILES environment variable can soften the uniformity requirement" do
100+ allow ( ENV ) . to receive ( :fetch ) . and_call_original # Preserves unmocked keys
101+ allow ( ENV ) . to receive ( :fetch ) . with ( "ACCEPTABLE_NUMBER_OF_MISFORMED_TILES" , 0 ) . and_return ( "2" )
102+ doc = doc_for ( <<~HTML )
103+ < div id ="quantity ">
104+ < div > < a href ="/search?stick=f1 "> </ a > </ div >
105+ < div > < a href ="/search?stick=f2 "> </ a > </ div >
106+ < div > < a href ="/search?stick=f3 "> </ a > </ div >
107+ < div > < a href ="/search?stick=f4 "> </ a > </ div >
108+ < div > < a href ="/search?stick=f5 "> </ a > </ div >
109+ < div > < a href ="/search?stick=f6 "> </ a > </ div >
110+ </ div >
111+ < div id ="quality ">
112+ < div > < a href ="/search?stick=s1 "> < img alt ="S1 "> </ a > </ div >
113+ < div > < a href ="/search?stick=s2 "> < img alt ="S2 "> </ a > </ div >
114+ < div > < a href ="/search?stick=s3 "> < img alt ="S3 "> </ a > </ div >
115+ < div > < a href ="/search?stick=s4 "> </ a > </ div >
116+ < div > < a href ="/search?stick=f5 "> </ a > </ div >
117+ </ div >
118+ </ body> </ html>
119+ HTML
120+
121+ tiles = described_class . tiles ( doc )
122+ expect ( tiles . size ) . to eq ( 5 )
123+ expect ( tiles . first . parent [ "id" ] ) . to eq ( "quality" )
124+ end
125+ end
126+
127+ it "selects anchor elements if easily detectable name candidates can't be found" do
30128 doc = doc_for ( <<~HTML )
31129 < html > < body >
32- < div id ="first ">
33- < div > < a href ="/search?stick=f1 "> < img alt ="F1 "> </ a > </ div >
34- < div > < a href ="/search?stick=f2 "> < img alt ="F2 "> </ a > </ div >
35- < div > < a href ="/search?stick=f3 "> < img alt ="F3 "> </ a > </ div >
36- </ div >
37- < div id ="second ">
38- < div > < a href ="/search?stick=s1 "> < img alt ="S1 "> </ a > </ div >
39- < div > < a href ="/search?stick=s2 "> < img alt ="S2 "> </ a > </ div >
40- < div > < a href ="/search?stick=s3 "> < img alt ="S3 "> </ a > </ div >
130+ < div id ="stick ">
131+ < div > < a href ="/search?stick=f2 "> < img > </ a > </ div >
132+ < div > < a href ="/search?stick=f2 "> < img > </ a > </ div >
133+ < div > < a href ="/search?stick=f3 "> < img > </ a > </ div >
41134 </ div >
42135 </ body > </ html >
43136 HTML
44137
45138 tiles = described_class . tiles ( doc )
46139 expect ( tiles . size ) . to eq ( 3 )
47- expect ( tiles . first . parent [ "id" ] ) . to eq ( "first " )
140+ expect ( tiles . first . parent [ "id" ] ) . to eq ( "stick " )
48141 end
49142end
0 commit comments