Skip to content

Commit f144dfd

Browse files
committed
OD-2139: Fix skipping tags on damaged imports
Previously, the system would skip importing the entire class of tags when a malformed tag was found in any story. This patch changes it to error on malformed tags, but allow the non-standard (mostly) all string tag format. In addition, we now warn when importing stories that have tags not in the tag table.
1 parent 30fa0ac commit f144dfd

3 files changed

Lines changed: 32 additions & 20 deletions

File tree

efiction/metadata.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -153,11 +153,13 @@ def _convert_story_tag_table(self, table_name, old_tags):
153153
if self.tag_table_is_nonstandard[table_name]:
154154
# Tag table identified by name rather than id.
155155
original_tagid = "original_tag"
156-
return [
157-
c["id"]
158-
for c in self.tag_tables[table_name]
159-
if str(c[original_tagid]) in old_tags[table_name]
160-
]
156+
157+
valid_tag_ids = {str(c[original_tagid]): c["id"] for c in self.tag_tables[table_name]}
158+
159+
dropped_tags = [tag for tag in old_tags[table_name] if tag and tag not in valid_tag_ids]
160+
if dropped_tags:
161+
self.logger.warning(f"Found tags in stories but not in tag table: {dropped_tags}")
162+
return [valid_tag_ids[tag] for tag in old_tags[table_name] if tag in valid_tag_ids]
161163

162164
def _convert_story_tags(self, old_story):
163165
old_tags = {

efiction/tag_converter.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33

44
from typing import Dict, List
55

6-
from opendoors.mysql import SqlDb
6+
from opendoors.mysql import OperationalError, ProgrammingError, SqlDb
77
from opendoors.utils import print_progress
88

9-
109
class TagConverter:
1110
def __init__(self, config: ConfigParser, logger: Logger, sql: SqlDb):
1211
self.logger = logger
@@ -56,20 +55,31 @@ def check_for_nonstandard_tag_tables(self) -> bool:
5655

5756
try:
5857
query = f"SELECT {id_name} FROM stories;"
59-
tags = self.sql.execute_and_fetchall(self.working_original, query)
60-
try:
61-
tags = list(
62-
map(
63-
lambda story_tags: story_tags[id_name].replace(",", ""),
64-
tags,
65-
)
66-
)
67-
int("".join(tags))
58+
tag_rows = self.sql.execute_and_fetchall(self.working_original, query)
59+
story_tags = [tag_row[id_name] for tag_row in tag_rows]
60+
# Get the number of story tags which contain something other
61+
# than a comma-separated list of digits
62+
is_numeric = [not tag or tag.isdigit() for tags in story_tags for tag in tags.split(",")]
63+
64+
# There are three possibilities for the way tags have been
65+
# put into stories. The most common, by far, is that they're
66+
# all comma-separated lists of integers. If this is not the
67+
# case, then they should be almost all -- though not
68+
# necessarily all -- comma-separated strings. (This is
69+
# because a tag could potentially be all-numeric, and have
70+
# at least one fic only tagged with that all-numeric tag).
71+
if sum(is_numeric) == len(is_numeric):
72+
self.logger.debug(f"Standard story tag syntax in {tag_table_name}")
6873
tag_tables[tag_table_name] = False
69-
except Exception:
70-
# Non-integer in identifier
74+
75+
elif (sum(is_numeric) / len(is_numeric)) < .1:
76+
self.logger.info(f"Non-standard story tag syntax in {tag_table_name}")
7177
tag_tables[tag_table_name] = True
72-
except Exception as e:
78+
79+
# Finally, the fields could be completely corrupt -- in which case we want to break.
80+
else:
81+
raise Exception(f"Broken story tag syntax in {tag_table_name}")
82+
except (OperationalError, ProgrammingError) as e:
7383
self.logger.info(e)
7484
self.logger.info("No such table?")
7585
tag_tables[tag_table_name] = None

opendoors/mysql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
import pymysql
77
import sqlparse
88
from pymysql.cursors import DictCursor
9+
from pymysql.err import ProgrammingError, OperationalError # noqa: F401
910

1011
from opendoors.utils import get_full_path
1112

12-
1313
class SqlDb:
1414
"""
1515
Wrapper and helper methods for MySQL commands

0 commit comments

Comments
 (0)