11#!/usr/bin/env python3
2- """
3- Runs a benchmarking suite to compare speed
4- and output of different implementations.
5- """
2+ """Run a benchmarking suite to compare speed and output of different implementations."""
63
74import argparse
85import operator
5350
5451
5552class AbstractHtmlConverter :
56- """
57- An abstract HTML convert class.
58- """
53+ """An abstract HTML convert class."""
5954
6055 def get_text (self , html ):
61- """
62- Returns:
63- a text representation of the given HTML snippet.
64- """
56+ """Return a text representation of the given HTML snippet."""
6557 raise NotImplementedError
6658
6759 def benchmark (self , html ):
68- """
69- Benchmarks the classes HTML to text converter.
60+ """Benchmarks the classes HTML to text converter.
7061
71- Returns:
72- A tuple of the required time and the obtained text representation.
62+ Return a tuple of the required time and the obtained text representation.
7363 """
7464 start_time = time ()
7565 for _ in range (TRIES ):
@@ -78,9 +68,7 @@ def benchmark(self, html):
7868
7969
8070class BeautifulSoupHtmlConverter (AbstractHtmlConverter ):
81- """
82- Converts HTML to text using BeautifulSoup.
83- """
71+ """Converts HTML to text using BeautifulSoup."""
8472
8573 name = "BeautifulSoup"
8674
@@ -100,9 +88,7 @@ def get_text(self, html):
10088
10189
10290class JustextConverter (AbstractHtmlConverter ):
103- """
104- Converts HTML to text using Justtext.
105- """
91+ """Converts HTML to text using Justtext."""
10692
10793 name = "Justtext"
10894
@@ -116,9 +102,7 @@ def get_text(self, html):
116102
117103
118104class Html2TextConverter (AbstractHtmlConverter ):
119- """
120- Converts HTML to text using Html2Text.
121- """
105+ """Converts HTML to text using Html2Text."""
122106
123107 name = "Html2Text"
124108
@@ -133,9 +117,7 @@ def get_text(self, html):
133117
134118
135119class LynxConverter (AbstractHtmlConverter ):
136- """
137- Converts HTML to text using lynx.
138- """
120+ """Converts HTML to text using lynx."""
139121
140122 name = "Lynx"
141123
@@ -166,9 +148,7 @@ def kill_lynx(pid):
166148
167149
168150class LinksConverter (AbstractHtmlConverter ):
169- """
170- Converts HTML to text using links.
171- """
151+ """Converts HTML to text using links."""
172152
173153 name = "Links"
174154
@@ -199,9 +179,7 @@ def kill_links(pid):
199179
200180
201181class InscriptisHtmlConverter (AbstractHtmlConverter ):
202- """
203- Converts HTML to text using Inscriptis.
204- """
182+ """Converts HTML to text using Inscriptis."""
205183
206184 name = "Inscriptis"
207185
@@ -217,18 +195,14 @@ def __init__(self):
217195
218196
219197def save_to_file (algorithm , url , data , benchmarking_results_dir ):
220- """
221- Saves a benchmarking result to the given file.
222- """
198+ """Save the benchmarking result to the given file."""
223199 result_file = os .path .join (benchmarking_results_dir , f"{ algorithm } _{ url } .txt" )
224200 with open (result_file , "w" ) as output_file :
225201 output_file .write (data )
226202
227203
228204def get_speed_table (times ):
229- """
230- Provides the table which compares the conversion speed.
231- """
205+ """Provide the table which compares the conversion speed."""
232206 fastest = min ((value for _ , value in times .items ()))
233207 longest_key = max (len (key ) for key , _ in times .items ())
234208 longest_value = max (len (str (value )) for _ , value in times .items ())
@@ -251,9 +225,7 @@ def get_speed_table(times):
251225
252226
253227def get_fname (url ) -> str :
254- """
255- Transforms a URL to a file name.
256- """
228+ """Transform a URL to a file name."""
257229 trash = (("http://" , "" ), ("https://" , "" ), ("/" , "-" ), (":" , "-" ), ("%" , "" ))
258230
259231 for key , value in trash :
@@ -272,9 +244,7 @@ def get_fname(url) -> str:
272244
273245
274246def parse_args ():
275- """
276- Parse optional benchmarking arguments.
277- """
247+ """Parse optional benchmarking arguments."""
278248 parser = argparse .ArgumentParser (description = "Inscriptis benchmarking suite" )
279249 parser .add_argument (
280250 "converter" ,
@@ -306,11 +276,11 @@ def parse_args():
306276
307277
308278def _setup_benchmarking_directories (args ):
309- """
310- Setup the benchmarking result and caching directories.
279+ """Set up the benchmarking result and caching directories.
311280
312281 Args:
313282 args: command line arguments that provide the directory names.
283+
314284 """
315285 if not os .path .exists (args .benchmarking_results ):
316286 os .makedirs (args .benchmarking_results )
@@ -319,16 +289,17 @@ def _setup_benchmarking_directories(args):
319289
320290
321291def _fetch_url (url , cache_dir ):
322- """
323- Fetch the given URL either from the cache or from the Web.
292+ """Fetch the given URL either from the cache or from the Web.
324293
325294 URLs that are not yet cached are added to the cache.
326295
327296 Args:
328297 url: the URL to fetch.
298+ cache_dir: the cache directory.
329299
330300 Returns:
331301 A tuple of the cache file name and the URLs content.
302+
332303 """
333304 source_name = get_fname (url )
334305 source_cache_path = os .path .join (cache_dir , source_name )
@@ -349,14 +320,13 @@ def _fetch_url(url, cache_dir):
349320
350321
351322def benchmark (args , source_list ):
352- """
353- Run the benchmark.
323+ """Run the benchmark.
354324
355325 Args:
356326 args: command line arguments
357327 source_list: a list of URLs to benchmark.
358- """
359328
329+ """
360330 _setup_benchmarking_directories (args )
361331
362332 output = []
0 commit comments