Skip to content

Commit 1ed0f11

Browse files
committed
bug fix
1 parent 75d97ef commit 1ed0f11

2 files changed

Lines changed: 13 additions & 11 deletions

File tree

lib/scripts/topologic

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def prepare_data(
141141
print("Processing training data...", flush=True)
142142
count = 0
143143
pos = 0
144-
for db_name, db_config in training_config.items():
144+
for db_name, db_config in training_config["databases"].items():
145145
count += 1
146146
preproc = PreProcessor(
147147
text_object_type=db_config["text_object_level"],
@@ -170,7 +170,7 @@ def prepare_data(
170170
os.system(f"mkdir -p {os.path.join(training_texts_path, db_name, 'texts')}")
171171
for text in preproc.process_texts(
172172
file_list,
173-
progress_prefix=f"Processing {file_count} files from collection {count} of {len(training_config)}...",
173+
progress_prefix=f"Processing {file_count} files from collection {count} of {len(training_config['databases'])}...",
174174
):
175175
if metadata_filters and text.metadata["philo_id"] not in philo_ids:
176176
continue
@@ -190,7 +190,7 @@ def prepare_data(
190190

191191
pos = 0
192192
print("Processing inference data...", flush=True)
193-
for db_name, db_config in inference_config.items():
193+
for db_name, db_config in inference_config["databases"].items():
194194
count += 1
195195
if db_name in training_config:
196196
if db_config["text_object_level"] == training_config[db_name]["text_object_level"]:
@@ -222,7 +222,7 @@ def prepare_data(
222222
os.system(f"mkdir -p {os.path.join(inference_texts_path, db_name, 'texts')}")
223223
for text in preproc.process_texts(
224224
file_list,
225-
progress_prefix=f"Processing {file_count} files from collection {count} of {len(inference_config)}...",
225+
progress_prefix=f"Processing {file_count} files from collection {count} of {len(inference_config['databases'])}...",
226226
):
227227
if metadata_filters and text.metadata["philo_id"] not in philo_ids:
228228
continue
@@ -267,14 +267,14 @@ def build_model(
267267
print("vocabulary size:", len(training_corpus.vectorizer.vocabulary_))
268268

269269
identical_corpus = True
270-
if len(training_config) != len(inference_config):
270+
if len(training_config["databases"]) != len(inference_config["databases"]):
271271
identical_corpus = False
272272
if identical_corpus is True:
273-
for db, db_config in training_config.items():
274-
if db not in inference_config:
273+
for db, db_config in training_config["databases"].items():
274+
if db not in inference_config["databases"]:
275275
identical_corpus = False
276276
break
277-
if db_config["text_object_level"] != inference_config[db]["text_object_level"]:
277+
if db_config["text_object_level"] != inference_config["databases"][db]["text_object_level"]:
278278
identical_corpus = False
279279
break
280280

@@ -383,7 +383,7 @@ def build_web_app(
383383
db_path,
384384
database_name,
385385
GLOBAL_CONFIG["WEB_APP"]["server_name"],
386-
{db_name: db_config["db_url"] for db_name, db_config in inference_config.items()},
386+
{db_name: db_config["db_url"] for db_name, db_config in inference_config["databases"].items()},
387387
min_year,
388388
max_year,
389389
topics_over_time["topics_over_time_interval"],

lib/topologic/config.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ def read_config(config_path):
1414
training_dbs = [i.strip() for i in config["TRAINING_DATA"]["philologic_database_paths"].split(",")]
1515
training_db_urls = [i.strip() for i in config["TRAINING_DATA"]["philologic_database_urls"].split(",")]
1616
training_text_object_levels = [i.strip() for i in config["TRAINING_DATA"]["text_object_level"].split(",")]
17-
training_data: Dict[str, Union[int, Dict[str, str]]] = {
17+
training_data: Dict[str, Union[int, Dict[str, Dict[str, str]]]] = {}
18+
training_data["databases"] = {
1819
os.path.basename(os.path.normpath(db_path)): {
1920
"db_path": db_path,
2021
"db_url": db_url,
@@ -27,7 +28,8 @@ def read_config(config_path):
2728
inference_dbs = [i.strip() for i in config["INFERENCE_DATA"]["philologic_database_paths"].split(",")]
2829
inference_db_urls = [i.strip() for i in config["INFERENCE_DATA"]["philologic_database_urls"].split(",")]
2930
inference_text_object_levels = [i.strip() for i in config["INFERENCE_DATA"]["text_object_level"].split(",")]
30-
inference_data: Dict[str, Union[int, Dict[str, str]]] = {
31+
inference_data: Dict[str, Union[int, Dict[str, Dict[str, str]]]] = {}
32+
inference_data["databases"] = {
3133
os.path.basename(os.path.normpath(db_path)): {
3234
"db_path": db_path,
3335
"db_url": db_url,

0 commit comments

Comments
 (0)