|
| 1 | +import textwrap |
| 2 | + |
| 3 | +from tests.tree.base import TreeTestBase |
| 4 | +from usp.objects.sitemap import ( |
| 5 | + InvalidSitemap, |
| 6 | +) |
| 7 | +from usp.tree import sitemap_tree_for_homepage |
| 8 | + |
| 9 | + |
| 10 | +class TestTreeSecurity(TreeTestBase): |
| 11 | + def test_billion_laughs_attack(self, requests_mock, caplog): |
| 12 | + requests_mock.add_matcher(TreeTestBase.fallback_to_404_not_found_matcher) |
| 13 | + requests_mock.get( |
| 14 | + self.TEST_BASE_URL + "/robots.txt", |
| 15 | + headers={"Content-Type": "text/plain"}, |
| 16 | + text=textwrap.dedent( |
| 17 | + f""" |
| 18 | + Sitemap: {self.TEST_BASE_URL}/sitemap.xml |
| 19 | + """ |
| 20 | + ).strip(), |
| 21 | + ) |
| 22 | + |
| 23 | + requests_mock.get( |
| 24 | + self.TEST_BASE_URL + "/sitemap.xml", |
| 25 | + headers={"Content-Type": "application/xml"}, |
| 26 | + text=textwrap.dedent( |
| 27 | + f""" |
| 28 | + <?xml version="1.0" encoding="UTF-8"?> |
| 29 | + <!DOCTYPE lolz [ |
| 30 | + <!ENTITY lol "lol"> |
| 31 | + <!ENTITY lol1 "&lol;&lol;&lol;&lol;&lol;&lol;&lol |
| 32 | +;&lol;&lol;&lol;"> |
| 33 | + <!ENTITY lol2 "&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;"> |
| 34 | + <!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;"> |
| 35 | + <!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;"> |
| 36 | + <!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;"> |
| 37 | + <!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;"> |
| 38 | + <!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;"> |
| 39 | + <!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;"> |
| 40 | + <!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;"> |
| 41 | + ]> |
| 42 | + <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> |
| 43 | + <url> |
| 44 | + <loc>{self.TEST_BASE_URL}/page.html</loc> |
| 45 | + </url> |
| 46 | + </urlset> |
| 47 | + """ |
| 48 | + ).strip(), |
| 49 | + ) |
| 50 | + |
| 51 | + tree = sitemap_tree_for_homepage(self.TEST_BASE_URL) |
| 52 | + sitemaps = list(tree.all_sitemaps()) |
| 53 | + assert type(sitemaps[-1]) is InvalidSitemap |
| 54 | + |
| 55 | + assert ( |
| 56 | + "Sitemap contained unexpected non-standard XML DOCTYPE. Parsing not supported for security reasons." |
| 57 | + in caplog.text |
| 58 | + ) |
0 commit comments