4848 from html .parser import HTMLParser
4949
5050
51- class TitleExtractingHtmlParser (HTMLParser ):
52- """An HTML parser using builtin machinery which will extract the title ."""
51+ class DocumentBasicsHtmlParser (HTMLParser ):
52+ """An HTML parser using builtin machinery to check basic html structure ."""
5353
5454 def __init__ (self ):
5555 HTMLParser .__init__ (self )
56+ self ._doctype = "none"
57+ self ._saw_doctype = False
58+ self ._saw_tags = False
59+ self ._tag_html = "none"
60+
61+ def handle_decl (self , decl ):
62+ try :
63+ decltag , decltype = decl .split (' ' )
64+ except Exception :
65+ decltag = ""
66+ decltype = ""
67+
68+ if decltag .upper () == 'DOCTYPE' :
69+ self ._saw_doctype = True
70+ else :
71+ decltype = "unknown"
72+
73+ self ._doctype = decltype
74+
75+ def handle_starttag (self , tag , attrs ):
76+ if tag == 'html' :
77+ if self ._saw_tags :
78+ tag_html = 'not_first'
79+ else :
80+ tag_html = 'was_first'
81+ self ._tag_html = tag_html
82+ self ._saw_tags = True
83+
84+ def assert_basics (self ):
85+ if not self ._saw_doctype :
86+ raise AssertionError ("missing DOCTYPE" )
87+
88+ if self ._doctype != 'html' :
89+ raise AssertionError ("non-html DOCTYPE" )
90+
91+ if self ._tag_html == 'none' :
92+ raise AssertionError ("missing <html>" )
93+
94+ if self ._tag_html != 'was_first' :
95+ raise AssertionError ("first tag seen was not <html>" )
96+
97+
98+ class TitleExtractingHtmlParser (DocumentBasicsHtmlParser ):
99+ """An HTML parser using builtin machinery which will extract the title."""
100+
101+ def __init__ (self ):
102+ DocumentBasicsHtmlParser .__init__ (self )
56103 self ._title = None
57104 self ._within_title = None
58105
@@ -61,14 +108,20 @@ def handle_data(self, *args, **kwargs):
61108 self ._title = args [0 ]
62109
63110 def handle_starttag (self , tag , attrs ):
111+ super ().handle_starttag (tag , attrs )
112+
64113 if tag == 'title' :
65114 self ._within_title = True
66115
67116 def handle_endtag (self , tag ):
117+ super ().handle_endtag (tag )
118+
68119 if tag == 'title' :
69120 self ._within_title = False
70121
71122 def title (self , trim_newlines = False ):
123+ self .assert_basics ()
124+
72125 if self ._title and not self ._within_title :
73126 if trim_newlines :
74127 return self ._title .strip ()
@@ -173,5 +226,46 @@ def test_return_value_ok_returns_expected_title(self):
173226 self .assertHtmlTitle (output , title_text = 'TEST' , trim_newlines = True )
174227
175228
229+ class MigWsgibin_output_objects (MigTestCase , WsgiAssertMixin ):
230+
231+ def _provide_configuration (self ):
232+ return 'testconfig'
233+
234+ def before_each (self ):
235+ self .fake_backend = FakeBackend ()
236+ self .fake_wsgi = prepare_wsgi (self .configuration , 'http://localhost/' )
237+
238+ self .application_args = (
239+ self .fake_wsgi .environ ,
240+ self .fake_wsgi .start_response ,
241+ )
242+ self .application_kwargs = dict (
243+ configuration = self .configuration ,
244+ _import_module = self .fake_backend .to_import_module (),
245+ _set_os_environ = False ,
246+ )
247+
248+ def assertIsValidHtmlDocument (self , value ):
249+ parser = DocumentBasicsHtmlParser ()
250+ parser .feed (value )
251+ parser .assert_basics ()
252+
253+ def test_unknown_object_type_generates_valid_error_page (self ):
254+ output_objects = [
255+ {
256+ 'object_type' : 'nonexistent' , # trigger error handling path
257+ }
258+ ]
259+ self .fake_backend .set_response (output_objects , returnvalues .OK )
260+
261+ wsgi_result = migwsgi .application (
262+ * self .application_args ,
263+ ** self .application_kwargs
264+ )
265+
266+ output , _ = self .assertWsgiResponse (wsgi_result , self .fake_wsgi , 200 )
267+ self .assertIsValidHtmlDocument (output )
268+
269+
176270if __name__ == '__main__' :
177271 testmain ()
0 commit comments