77
88from opendoors .mysql import SqlDb
99from opendoors .big_insert import BigInsert
10- from opendoors .utils import get_full_path , normalize , print_progress , make_banner , key_find , get_prefixed_path
10+ from opendoors .utils import (
11+ get_full_path ,
12+ normalize ,
13+ print_progress ,
14+ make_banner ,
15+ key_find ,
16+ get_prefixed_path ,
17+ )
1118
1219
1320class EFictionChapters :
1421 """
1522 Process chapter contents and move them into the Open Doors working database.
1623 """
24+
1725 def __init__ (self , config : ConfigParser , logger : Logger , sql : SqlDb = None ):
1826 self .sql = sql
1927 self .config = config
2028 self .logger = logger
21- self .working_original = self .config ['Processing' ]['simplified_original_db' ]
22- self .chapters_table = sql .read_table_to_dict (self .config ['Processing' ]['simplified_original_db' ], "chapters" )
23- self .working_open_doors = self .config ['Processing' ]['open_doors_working_db' ]
29+ self .working_original = self .config ["Processing" ]["simplified_original_db" ]
30+ self .chapters_table = sql .read_table_to_dict (
31+ self .config ["Processing" ]["simplified_original_db" ], "chapters"
32+ )
33+ self .working_open_doors = self .config ["Processing" ]["open_doors_working_db" ]
2434
2535 def _are_chapters_in_table (self ) -> bool :
26- return len ([c for c in self .chapters_table if c [' storytext' ]]) > 0
36+ return len ([c for c in self .chapters_table if c [" storytext" ]]) > 0
2737
2838 @staticmethod
2939 def __file_with_path (dirpath , subdir , filename ):
@@ -35,9 +45,9 @@ def __file_with_path(dirpath, subdir, filename):
3545 :return: A dict containing metadata about the chapter based on its file path
3646 """
3747 return {
38- ' path' : os .path .join (dirpath , filename ),
39- ' chap_id' : Path (filename ).stem ,
40- ' author_id' : subdir
48+ " path" : os .path .join (dirpath , filename ),
49+ " chap_id" : Path (filename ).stem ,
50+ " author_id" : subdir ,
4151 }
4252
4353 def load_chapter_text_into_db (self , chapter_paths : List [dict ]):
@@ -49,33 +59,39 @@ def load_chapter_text_into_db(self, chapter_paths: List[dict]):
4959 warnings = 0
5060 forced_continue = False
5161 self .logger .info ("...loading data from chapters table..." )
52- old_chapters , current , total = self .sql .read_table_with_total (self .working_original , "chapters" )
62+ old_chapters , current , total = self .sql .read_table_with_total (
63+ self .working_original , "chapters"
64+ )
5365
5466 self .logger .info ("...removing rows from existing chapters table..." )
5567 self .sql .execute (self .working_open_doors , "TRUNCATE TABLE chapters;" )
5668
5769 self .logger .info ("...loading text from chapter files..." )
5870 insert_op = BigInsert (
59- self .working_open_doors ,
60- "chapters" ,
61- ["id" , "position" , "title" , "text" , "story_id" , "notes" ],
62- self .sql
63- )
71+ self .working_open_doors ,
72+ "chapters" ,
73+ ["id" , "position" , "title" , "text" , "story_id" , "notes" ],
74+ self .sql ,
75+ )
6476 try :
65- encoding = self .config [' Archive' ][ ' encoding' ]
77+ encoding = self .config [" Archive" ][ " encoding" ]
6678 except KeyError :
6779 encoding = None
6880 if encoding is None :
69- message_string = """
81+ message_string = (
82+ """
7083You have not specified any character encoding in the config file!
7184
7285If you are unsure which encoding is used in the backup
73- """ .strip () + (
74- f""", please run the mojibake tool:
86+ """ .strip ()
87+ + (
88+ f""", please run the mojibake tool:
7589
7690 mojibake { self .config ['Archive' ]['chapter_path' ]}
7791
78- """ if shutil .which ('mojibake' ) is not None else f"""
92+ """
93+ if shutil .which ("mojibake" ) is not None
94+ else f"""
7995, you can install the
8096mojibake tool from its repository:
8197
@@ -86,49 +102,64 @@ def load_chapter_text_into_db(self, chapter_paths: List[dict]):
86102 mojibake { self .config ['Archive' ]['chapter_path' ]}
87103
88104 """ .strip ()
89- )
105+ )
106+ )
90107 print (message_string )
91108 while encoding is None :
92109 encoding_text = input ("Enter a valid encoding (press enter for utf8): " )
93110 if encoding_text == "" :
94- encoding_text = ' utf8'
111+ encoding_text = " utf8"
95112 try :
96113 # check if encoding is valid
97- '' .encode (encoding_text )
114+ "" .encode (encoding_text )
98115 encoding = encoding_text
99116 except LookupError :
100117 print (f"{ encoding_text } is not a valid encoding, try again" )
101118 for old_chapter in old_chapters :
102- chapid = old_chapter ['chapid' ]
103- chapter = [chapter_path for chapter_path in chapter_paths if chapter_path ['chap_id' ] == str (chapid )]
119+ chapid = old_chapter ["chapid" ]
120+ chapter = [
121+ chapter_path
122+ for chapter_path in chapter_paths
123+ if chapter_path ["chap_id" ] == str (chapid )
124+ ]
104125 if chapter :
105- file = chapter [0 ][' path' ]
106- with open (file , 'rb' ) as raw_chapter :
126+ file = chapter [0 ][" path" ]
127+ with open (file , "rb" ) as raw_chapter :
107128 raw = raw_chapter .read ()
108129 while isinstance (raw , bytes ):
109130 try :
110131 raw = raw .decode (encoding = encoding )
111132 except UnicodeDecodeError as e :
112133 error = f"Failed to decode { file } \n "
113- line_num = raw [:e .start ].decode (encoding ).count ("\n " )
134+ line_num = raw [: e .start ].decode (encoding ).count ("\n " )
114135 error += f"At line { line_num } :\t { str (e )} \n "
115- error += "--\t " + str (raw [max (e .start - 40 , 0 ):e .end + 30 ]) + "\n "
136+ error += (
137+ "--\t "
138+ + str (raw [max (e .start - 40 , 0 ) : e .end + 30 ])
139+ + "\n "
140+ )
116141 # print `^` under the offending byte
117- error += "\t " + \
118- " " * (len (str (raw [max (e .start - 40 , 0 ):e .start ])) - 1 ) + \
119- "^" * (len (str (raw [e .start :e .end ])) - 3 ) + "\n "
142+ error += (
143+ "\t "
144+ + " "
145+ * (len (str (raw [max (e .start - 40 , 0 ) : e .start ])) - 1 )
146+ + "^" * (len (str (raw [e .start : e .end ])) - 3 )
147+ + "\n "
148+ )
120149 error += "Will be converted to:\n "
121150 # remove the offending bytes (usually one)
122- raw = raw [:e .start ] + raw [e .end :]
123- error += "++\t " + raw [
124- max (e .start - 40 , 0 ):
125- e .end + 30
126- ].decode (encoding , errors = 'ignore' ) \
127- .replace ("\n " , "\\ n" ) \
128- .replace ("\r " , "\\ r" ) + "\n "
151+ raw = raw [: e .start ] + raw [e .end :]
152+ error += (
153+ "++\t "
154+ + raw [max (e .start - 40 , 0 ) : e .end + 30 ]
155+ .decode (encoding , errors = "ignore" )
156+ .replace ("\n " , "\\ n" )
157+ .replace ("\r " , "\\ r" )
158+ + "\n "
159+ )
129160 self .logger .warning (error )
130161 warnings += 1
131- if warnings > len (old_chapters ) * .3 and not forced_continue :
162+ if warnings > len (old_chapters ) * 0 .3 and not forced_continue :
132163 msg = f"""
133164A total of { warnings } automatic modifications have been performed so far!
134165
@@ -147,28 +178,35 @@ def load_chapter_text_into_db(self, chapter_paths: List[dict]):
147178 raise Exception ("Process aborted, too many errors!" )
148179
149180 text = normalize (raw )
150- if key_find (' endnotes' , old_chapter ):
181+ if key_find (" endnotes" , old_chapter ):
151182 text = text + f"\n \n \n <hr>\n { old_chapter ['endnotes' ]} "
152183
153184 insert_op .addRow (
154185 chapid ,
155- old_chapter [' inorder' ],
156- old_chapter [' title' ],
186+ old_chapter [" inorder" ],
187+ old_chapter [" title" ],
157188 text ,
158- old_chapter [' sid' ],
159- old_chapter [' notes' ]
189+ old_chapter [" sid" ],
190+ old_chapter [" notes" ],
160191 )
161192 current = print_progress (current , total , "chapters converted" )
162193 # If there were any errors, display a warning for the user to check the affected chapters
163194 if warnings >= 1 :
164- self .logger .warning ("If the character deletion is unacceptable please quit this processor and use the mojibake tool,"
165- " then restart the processor from step 04" )
195+ self .logger .warning (
196+ "If the character deletion is unacceptable please quit this processor and use the mojibake tool,"
197+ " then restart the processor from step 04"
198+ )
166199 self .logger .error (
167- make_banner ('-' ,
168- f"There were { warnings } warnings; check the affected chapters listed above to make sure curly quotes "
169- "and accented characters are correctly displayed." ))
200+ make_banner (
201+ "-" ,
202+ f"There were { warnings } warnings; check the affected chapters listed above to make sure curly quotes "
203+ "and accented characters are correctly displayed." ,
204+ )
205+ )
170206 insert_op .send ()
171- return self .sql .execute_and_fetchall (self .working_open_doors , "SELECT * FROM chapters;" )
207+ return self .sql .execute_and_fetchall (
208+ self .working_open_doors , "SELECT * FROM chapters;"
209+ )
172210
173211 def list_chapter_files (self ):
174212 """
@@ -177,10 +215,21 @@ def list_chapter_files(self):
177215 """
178216 self .logger .info ("Loading chapters from the filesystem..." )
179217 chapter_paths = []
180- for dirpath , dirnames , filenames in os .walk (get_full_path (self .config ['Archive' ]['chapter_path' ])):
218+ for dirpath , dirnames , filenames in os .walk (
219+ get_full_path (self .config ["Archive" ]["chapter_path" ])
220+ ):
181221 subdir = dirpath .split (os .path .sep )[- 1 ]
182- if subdir and subdir != self .config ['Archive' ]['chapter_path' ].split (os .path .sep )[- 1 ]:
183- chapter_paths .extend ([self .__file_with_path (dirpath , subdir , filename ) for filename in filenames ])
222+ if (
223+ subdir
224+ and subdir
225+ != self .config ["Archive" ]["chapter_path" ].split (os .path .sep )[- 1 ]
226+ ):
227+ chapter_paths .extend (
228+ [
229+ self .__file_with_path (dirpath , subdir , filename )
230+ for filename in filenames
231+ ]
232+ )
184233 return chapter_paths
185234
186235 def load_og_chapters_into_db (self ):
@@ -189,55 +238,63 @@ def load_og_chapters_into_db(self):
189238 :return:
190239 """
191240 self .logger .info ("...loading data from chapters table..." )
192- old_chapters , current , total = self .sql .read_table_with_total (self .working_original , "chapters" )
241+ old_chapters , current , total = self .sql .read_table_with_total (
242+ self .working_original , "chapters"
243+ )
193244
194245 self .logger .info ("...removing rows from existing chapters table..." )
195246 self .sql .execute (self .working_open_doors , "TRUNCATE TABLE chapters;" )
196247
197248 self .logger .info ("...loading chapters from original chapters table..." )
198249 insert_op = BigInsert (
199- self .working_open_doors ,
200- "chapters" ,
201- ["id" , "position" , "title" , "text" , "story_id" , "notes" ],
202- self .sql
203- )
250+ self .working_open_doors ,
251+ "chapters" ,
252+ ["id" , "position" , "title" , "text" , "story_id" , "notes" ],
253+ self .sql ,
254+ )
204255
205256 for old_chapter in old_chapters :
206- text = normalize (old_chapter [' storytext' ])
207- if key_find (' endnotes' , old_chapter ):
257+ text = normalize (old_chapter [" storytext" ])
258+ if key_find (" endnotes" , old_chapter ):
208259 text = text + f"\n \n \n <hr>\n { old_chapter ['endnotes' ]} "
209260
210261 insert_op .addRow (
211- old_chapter [' chapid' ],
212- old_chapter [' inorder' ],
213- old_chapter [' title' ],
262+ old_chapter [" chapid" ],
263+ old_chapter [" inorder" ],
264+ old_chapter [" title" ],
214265 text ,
215- old_chapter [' sid' ],
216- old_chapter [' notes' ]
266+ old_chapter [" sid" ],
267+ old_chapter [" notes" ],
217268 )
218269
219270 current = print_progress (current , total , "chapters converted" )
220271
221272 insert_op .send ()
222- return self .sql .execute_and_fetchall (self .working_open_doors , "SELECT * FROM chapters;" )
273+ return self .sql .execute_and_fetchall (
274+ self .working_open_doors , "SELECT * FROM chapters;"
275+ )
223276
224277 def load_chapters (self , step_path : str ):
225278 """
226279 Check if chapters are already present in the database and if not, load them from the filesystem
227280 :return:
228281 """
229282 if self ._are_chapters_in_table ():
230- self .logger .info ("Chapters are already present in the original database, converting now" )
283+ self .logger .info (
284+ "Chapters are already present in the original database, converting now"
285+ )
231286 self .load_og_chapters_into_db ()
232287 else :
233- if not self .config .has_option (' Archive' , ' chapter_path' ):
288+ if not self .config .has_option (" Archive" , " chapter_path" ):
234289 chapter_path = input ("Full path to chapter files\n >> " )
235- self .config [' Archive' ][ ' chapter_path' ] = os .path .normpath (chapter_path )
290+ self .config [" Archive" ][ " chapter_path" ] = os .path .normpath (chapter_path )
236291
237292 chapter_paths = self .list_chapter_files ()
238293 self .load_chapter_text_into_db (chapter_paths )
239294
240- database_dump = get_prefixed_path ("04" , step_path , f"{ self .working_open_doors } .sql" )
295+ database_dump = get_prefixed_path (
296+ "04" , step_path , f"{ self .working_open_doors } .sql"
297+ )
241298 self .logger .info (f"Exporting converted tables to { database_dump } ..." )
242299 self .sql .dump_database (self .working_open_doors , database_dump )
243300 return True
0 commit comments