Skip to content

Commit a52427b

Browse files
authored
OD-2139: Fix skipping tags on damaged imports (#41)
Previously, the system would skip importing the entire class of tags when a malformed tag was found in any story. This patch changes it to error on malformed tags, but allow the non-standard (mostly) all string tag format. In addition, we now warn when importing stories that have tags not in the tag table.
1 parent 30fa0ac commit a52427b

4 files changed

Lines changed: 50 additions & 16 deletions

File tree

efiction/metadata.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,20 @@ def _convert_story_tag_table(self, table_name, old_tags):
153153
if self.tag_table_is_nonstandard[table_name]:
154154
# Tag table identified by name rather than id.
155155
original_tagid = "original_tag"
156+
157+
valid_tag_ids = {
158+
str(c[original_tagid]): c["id"] for c in self.tag_tables[table_name]
159+
}
160+
161+
dropped_tags = [
162+
tag for tag in old_tags[table_name] if tag and tag not in valid_tag_ids
163+
]
164+
if dropped_tags:
165+
self.logger.warning(
166+
f"Found tags in stories but not in tag table: {dropped_tags}"
167+
)
156168
return [
157-
c["id"]
158-
for c in self.tag_tables[table_name]
159-
if str(c[original_tagid]) in old_tags[table_name]
169+
valid_tag_ids[tag] for tag in old_tags[table_name] if tag in valid_tag_ids
160170
]
161171

162172
def _convert_story_tags(self, old_story):

efiction/tag_converter.py

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from typing import Dict, List
55

6-
from opendoors.mysql import SqlDb
6+
from opendoors.mysql import OperationalError, ProgrammingError, SqlDb
77
from opendoors.utils import print_progress
88

99

@@ -56,20 +56,43 @@ def check_for_nonstandard_tag_tables(self) -> bool:
5656

5757
try:
5858
query = f"SELECT {id_name} FROM stories;"
59-
tags = self.sql.execute_and_fetchall(self.working_original, query)
60-
try:
61-
tags = list(
62-
map(
63-
lambda story_tags: story_tags[id_name].replace(",", ""),
64-
tags,
65-
)
59+
tag_rows = self.sql.execute_and_fetchall(
60+
self.working_original, query
61+
)
62+
story_tags = [tag_row[id_name] for tag_row in tag_rows]
63+
# Get the number of story tags which contain something other
64+
# than a comma-separated list of digits
65+
is_numeric = [
66+
not tag or tag.isdigit()
67+
for tags in story_tags
68+
for tag in tags.split(",")
69+
]
70+
71+
# There are three possibilities for the way tags have been
72+
# put into stories. The most common, by far, is that they're
73+
# all comma-separated lists of integers. If this is not the
74+
# case, then they should be almost all -- though not
75+
# necessarily all -- comma-separated strings. (This is
76+
# because a tag could potentially be all-numeric, and have
77+
# at least one fic only tagged with that all-numeric tag).
78+
if sum(is_numeric) == len(is_numeric):
79+
self.logger.debug(
80+
f"Standard story tag syntax in {tag_table_name}"
6681
)
67-
int("".join(tags))
6882
tag_tables[tag_table_name] = False
69-
except Exception:
70-
# Non-integer in identifier
83+
84+
elif (sum(is_numeric) / len(is_numeric)) < 0.1:
85+
self.logger.info(
86+
f"Non-standard story tag syntax in {tag_table_name}"
87+
)
7188
tag_tables[tag_table_name] = True
72-
except Exception as e:
89+
90+
# Finally, the fields could be completely corrupt -- in which case we want to break.
91+
else:
92+
raise Exception(
93+
f"Broken story tag syntax in {tag_table_name}; mix of standard (numeric) and non-standard (string) tags."
94+
)
95+
except (OperationalError, ProgrammingError) as e:
7396
self.logger.info(e)
7497
self.logger.info("No such table?")
7598
tag_tables[tag_table_name] = None

efiction/tests/test_metadata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ def test_convert_story_tags_normal_ratings(self):
215215
}
216216
]
217217
result = self.efiction_converter._convert_story_tags(old_stories[0])
218-
self.assertEqual(
218+
self.assertCountEqual(
219219
{
220220
"categories": [6],
221221
"characters": [106, 107],

opendoors/mysql.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import pymysql
77
import sqlparse
88
from pymysql.cursors import DictCursor
9+
from pymysql.err import OperationalError, ProgrammingError # noqa: F401
910

1011
from opendoors.utils import get_full_path
1112

0 commit comments

Comments
 (0)