@@ -141,7 +141,7 @@ def prepare_data(
141141 print ("Processing training data..." , flush = True )
142142 count = 0
143143 pos = 0
144- for db_name , db_config in training_config .items ():
144+ for db_name , db_config in training_config [ "databases" ] .items ():
145145 count += 1
146146 preproc = PreProcessor (
147147 text_object_type = db_config ["text_object_level" ],
@@ -170,7 +170,7 @@ def prepare_data(
170170 os .system (f"mkdir -p { os .path .join (training_texts_path , db_name , 'texts' )} " )
171171 for text in preproc .process_texts (
172172 file_list ,
173- progress_prefix = f"Processing { file_count } files from collection { count } of { len (training_config )} ..." ,
173+ progress_prefix = f"Processing { file_count } files from collection { count } of { len (training_config [ 'databases' ] )} ..." ,
174174 ):
175175 if metadata_filters and text .metadata ["philo_id" ] not in philo_ids :
176176 continue
@@ -190,7 +190,7 @@ def prepare_data(
190190
191191 pos = 0
192192 print ("Processing inference data..." , flush = True )
193- for db_name , db_config in inference_config .items ():
193+ for db_name , db_config in inference_config [ "databases" ] .items ():
194194 count += 1
195195 if db_name in training_config :
196196 if db_config ["text_object_level" ] == training_config [db_name ]["text_object_level" ]:
@@ -222,7 +222,7 @@ def prepare_data(
222222 os .system (f"mkdir -p { os .path .join (inference_texts_path , db_name , 'texts' )} " )
223223 for text in preproc .process_texts (
224224 file_list ,
225- progress_prefix = f"Processing { file_count } files from collection { count } of { len (inference_config )} ..." ,
225+ progress_prefix = f"Processing { file_count } files from collection { count } of { len (inference_config [ 'databases' ] )} ..." ,
226226 ):
227227 if metadata_filters and text .metadata ["philo_id" ] not in philo_ids :
228228 continue
@@ -267,14 +267,14 @@ def build_model(
267267 print ("vocabulary size:" , len (training_corpus .vectorizer .vocabulary_ ))
268268
269269 identical_corpus = True
270- if len (training_config ) != len (inference_config ):
270+ if len (training_config [ "databases" ] ) != len (inference_config [ "databases" ] ):
271271 identical_corpus = False
272272 if identical_corpus is True :
273- for db , db_config in training_config .items ():
274- if db not in inference_config :
273+ for db , db_config in training_config [ "databases" ] .items ():
274+ if db not in inference_config [ "databases" ] :
275275 identical_corpus = False
276276 break
277- if db_config ["text_object_level" ] != inference_config [db ]["text_object_level" ]:
277+ if db_config ["text_object_level" ] != inference_config ["databases" ][ db ]["text_object_level" ]:
278278 identical_corpus = False
279279 break
280280
@@ -383,7 +383,7 @@ def build_web_app(
383383 db_path ,
384384 database_name ,
385385 GLOBAL_CONFIG ["WEB_APP" ]["server_name" ],
386- {db_name : db_config ["db_url" ] for db_name , db_config in inference_config .items ()},
386+ {db_name : db_config ["db_url" ] for db_name , db_config in inference_config [ "databases" ] .items ()},
387387 min_year ,
388388 max_year ,
389389 topics_over_time ["topics_over_time_interval" ],
0 commit comments