3434
3535
3636class HTML2Text (html .parser .HTMLParser ):
37+ init_params = [
38+ "bypass_tables" ,
39+ "close_quote" ,
40+ "default_image_alt" ,
41+ "escape_snob" ,
42+ "google_list_indent" ,
43+ "ignore_emphasis" ,
44+ "ignore_images" ,
45+ "ignore_links" ,
46+ "ignore_tables" ,
47+ "images_as_html" ,
48+ "images_to_alt" ,
49+ "images_with_size" ,
50+ "inline_links" ,
51+ "links_each_paragraph" ,
52+ "mark_code" ,
53+ "open_quote" ,
54+ "pad_tables" ,
55+ "protect_links" ,
56+ "single_line_break" ,
57+ "skip_internal_links" ,
58+ "unicode_snob" ,
59+ "use_automatic_links" ,
60+ "wrap_links" ,
61+ "wrap_list_items" ,
62+ ]
63+
3764 def __init__ (
3865 self ,
3966 out : Optional [OutCallback ] = None ,
4067 baseurl : str = "" ,
4168 bodywidth : int = config .BODY_WIDTH ,
69+ ** kwargs
4270 ) -> None :
4371 """
4472 Input parameters:
@@ -52,37 +80,16 @@ def __init__(
5280 self .split_next_td = False
5381 self .td_count = 0
5482 self .table_start = False
55- self .unicode_snob = config .UNICODE_SNOB # covered in cli
56- self .escape_snob = config .ESCAPE_SNOB # covered in cli
57- self .links_each_paragraph = config .LINKS_EACH_PARAGRAPH
58- self .body_width = bodywidth # covered in cli
59- self .skip_internal_links = config .SKIP_INTERNAL_LINKS # covered in cli
60- self .inline_links = config .INLINE_LINKS # covered in cli
61- self .protect_links = config .PROTECT_LINKS # covered in cli
62- self .google_list_indent = config .GOOGLE_LIST_INDENT # covered in cli
63- self .ignore_links = config .IGNORE_ANCHORS # covered in cli
64- self .ignore_images = config .IGNORE_IMAGES # covered in cli
65- self .images_as_html = config .IMAGES_AS_HTML # covered in cli
66- self .images_to_alt = config .IMAGES_TO_ALT # covered in cli
67- self .images_with_size = config .IMAGES_WITH_SIZE # covered in cli
68- self .ignore_emphasis = config .IGNORE_EMPHASIS # covered in cli
69- self .bypass_tables = config .BYPASS_TABLES # covered in cli
70- self .ignore_tables = config .IGNORE_TABLES # covered in cli
71- self .google_doc = False # covered in cli
72- self .ul_item_mark = "*" # covered in cli
73- self .emphasis_mark = "_" # covered in cli
83+ self .google_doc = False
84+ self .ul_item_mark = "*"
85+ self .emphasis_mark = "_"
7486 self .strong_mark = "**"
75- self .single_line_break = config .SINGLE_LINE_BREAK # covered in cli
76- self .use_automatic_links = config .USE_AUTOMATIC_LINKS # covered in cli
77- self .hide_strikethrough = False # covered in cli
78- self .mark_code = config .MARK_CODE
79- self .wrap_list_items = config .WRAP_LIST_ITEMS # covered in cli
80- self .wrap_links = config .WRAP_LINKS # covered in cli
81- self .pad_tables = config .PAD_TABLES # covered in cli
82- self .default_image_alt = config .DEFAULT_IMAGE_ALT # covered in cli
87+ self .hide_strikethrough = False
8388 self .tag_callback = None
84- self .open_quote = config .OPEN_QUOTE # covered in cli
85- self .close_quote = config .CLOSE_QUOTE # covered in cli
89+ self .body_width = bodywidth
90+
91+ for param in self .init_params :
92+ setattr (self , param , kwargs .get (param , getattr (config , param .upper ())))
8693
8794 if out is None :
8895 self .out = self .outtextf
@@ -939,9 +946,14 @@ def optwrap(self, text: str) -> str:
939946 return result
940947
941948
942- def html2text (html : str , baseurl : str = "" , bodywidth : Optional [int ] = None ) -> str :
949+ def html2text (
950+ html : str ,
951+ baseurl : str = "" ,
952+ bodywidth : Optional [int ] = None ,
953+ ** kwargs : Optional [OutCallback ]
954+ ) -> str :
943955 if bodywidth is None :
944956 bodywidth = config .BODY_WIDTH
945- h = HTML2Text (baseurl = baseurl , bodywidth = bodywidth )
957+ h = HTML2Text (baseurl = baseurl , bodywidth = bodywidth , ** kwargs )
946958
947959 return h .handle (html )
0 commit comments