@@ -19,45 +19,48 @@ class TestLDC:
1919 importlib .resources .path (fixtures , "ldc-treebank-3.json" ) as test_json ,
2020 open (test_json , encoding = "utf-8" ) as fh
2121 ):
22- duplicate_invoice_data = json .loads (fh .read ())
23- single_item = [duplicate_invoice_data [0 ]]
22+ duplicate_items = json .loads (fh .read ())
23+ single_item = [duplicate_items [0 ]]
2424
2525 @pytest .mark .parametrize (
26- "markup,param_name,expected" , [
27- pytest .param (
28- '<form><input name="authenticity_token" value="foo"/></form>' ,
29- "authenticity_token" ,
30- {"authenticity_token" : "foo" },
31- id = "with_default_param_name"
32- ),
33- pytest .param (
34- '<form><input name="something_else" value="bar"/><input name="authenticity_token" value="baz"/></form>' ,
35- "something_else" ,
36- {"something_else" : "bar" },
37- id = "with_param_name"
38- ),
39- pytest .param (
40- '<form><input name="csrf-token" value="quux"/></form' ,
41- "doesnt_exist" ,
42- {},
43- id = "without_matching_tag"
44- )
45- ])
26+ "markup,param_name,expected" ,
27+ [
28+ pytest .param (
29+ '<form><input name="authenticity_token" value="foo"/></form>' ,
30+ "authenticity_token" ,
31+ {"authenticity_token" : "foo" },
32+ id = "with_default_param_name"
33+ ),
34+ pytest .param (
35+ '<form><input name="something_else" value="bar"/><input name="authenticity_token" value="baz"/></form>' ,
36+ "something_else" ,
37+ {"something_else" : "bar" },
38+ id = "with_param_name"
39+ ),
40+ pytest .param (
41+ '<form><input name="csrf-token" value="quux"/></form' ,
42+ "doesnt_exist" ,
43+ {},
44+ id = "without_matching_tag"
45+ )
46+ ]
47+ )
4648 def test_get_csrf_token (self , markup , param_name , expected ) -> None :
4749 """Ensure we can gather the CSRF token from the LDC login form."""
4850 assert get_csrf_token (markup = markup , param_name = param_name ) == expected
4951
5052
5153 @pytest .mark .parametrize (
52- "corpora,corpus_id,expected" , [
53- pytest .param (
54- [{ "catalog_id" : "LDC99T42" , "corpus_name" : "Treebank-3" , "download_link" : "/download/4c0512a1451377eb2790d557fc76a690fa11693ad846df02f3ee59d12788" , "invoice_date" : "2025-01-01" , "file" : "treebank_3_LDC99T42" , "filesize" : "51.6 MB" , "checksum" : "98c74f99f6ca17dc88efb4077fcd9539" }],
55- "LDC99T42" ,
56- "2025-01-01" ,
57- id = "with_single_item_list"
58- ),
59- pytest .param (duplicate_invoice_data , "LDC99T42" , "2020-08-22" , id = "with_dupes" ),
60- pytest .param ([], "bogus" , None , id = "with_empty_corpora_list" )
54+ "corpora,corpus_id,expected" ,
55+ [
56+ pytest .param (
57+ [{ "catalog_id" : "LDC99T42" , "corpus_name" : "Treebank-3" , "download_link" : "/download/4c0512a1451377eb2790d557fc76a690fa11693ad846df02f3ee59d12788" , "invoice_date" : "2025-01-01" , "file" : "treebank_3_LDC99T42" , "filesize" : "51.6 MB" , "checksum" : "98c74f99f6ca17dc88efb4077fcd9539" }],
58+ "LDC99T42" ,
59+ "2025-01-01" ,
60+ id = "with_single_item_list"
61+ ),
62+ pytest .param (duplicate_items , "LDC99T42" , "2020-08-22" , id = "with_dupes" ),
63+ pytest .param ([], "bogus" , None , id = "with_empty_corpora_list" )
6164 ]
6265 )
6366 def test_get_latest_invoice_date (
@@ -68,7 +71,8 @@ def test_get_latest_invoice_date(
6871
6972
7073 @pytest .mark .parametrize (
71- "tag,expected" , [
74+ "tag,expected" ,
75+ [
7276 pytest .param (BeautifulSoup ("""
7377 <tr class="odd">
7478 <td class="">LDC2026S04</td>
@@ -125,35 +129,18 @@ def test_scrape_corpus_metadata(self, tag, expected) -> None:
125129
126130
127131 @pytest .mark .parametrize (
128- "corpora,corpus_id,filename_regex,expected" , [
129- pytest .param (
130- single_item ,
131- "LDC99T42" ,
132- None ,
133- single_item ,
134- id = "with_single_matching_item"
135- ),
136- pytest .param (
137- duplicate_invoice_data ,
138- "LDC99T42" ,
139- ".*treebank.*" ,
140- single_item ,
141- id = "with_regex_filter"
142- ),
143- pytest .param (
144- duplicate_invoice_data ,
145- "LDC99T42" ,
146- ".*nonexistent.*" ,
147- [],
148- id = "with_no_matches"
149- ),
150- pytest .param (
151- [],
152- "bogus" ,
153- None ,
154- [],
155- id = "with_empty_corpora_list"
156- )
132+ "corpora,corpus_id,filename_regex,expected" ,
133+ [
134+ pytest .param (
135+ single_item , "LDC99T42" , None , single_item , id = "with_single_item"
136+ ),
137+ pytest .param (
138+ duplicate_items , "LDC99T42" , ".*treebank.*" , single_item , id = "with_regex_filter"
139+ ),
140+ pytest .param (
141+ duplicate_items , "LDC99T42" , ".*nonexistent.*" , [], id = "with_no_matches"
142+ ),
143+ pytest .param ([], "bogus" , None , [], id = "with_empty_corpora_list" )
157144 ]
158145 )
159146 def test_filter_corpora (
0 commit comments