Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions efiction/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,20 @@ def _convert_story_tag_table(self, table_name, old_tags):
if self.tag_table_is_nonstandard[table_name]:
# Tag table identified by name rather than id.
original_tagid = "original_tag"

valid_tag_ids = {
str(c[original_tagid]): c["id"] for c in self.tag_tables[table_name]
}

dropped_tags = [
tag for tag in old_tags[table_name] if tag and tag not in valid_tag_ids
]
if dropped_tags:
self.logger.warning(
f"Found tags in stories but not in tag table: {dropped_tags}"
)
return [
c["id"]
for c in self.tag_tables[table_name]
if str(c[original_tagid]) in old_tags[table_name]
valid_tag_ids[tag] for tag in old_tags[table_name] if tag in valid_tag_ids
]

def _convert_story_tags(self, old_story):
Expand Down
47 changes: 35 additions & 12 deletions efiction/tag_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from typing import Dict, List

from opendoors.mysql import SqlDb
from opendoors.mysql import OperationalError, ProgrammingError, SqlDb
from opendoors.utils import print_progress


Expand Down Expand Up @@ -56,20 +56,43 @@ def check_for_nonstandard_tag_tables(self) -> bool:

try:
query = f"SELECT {id_name} FROM stories;"
tags = self.sql.execute_and_fetchall(self.working_original, query)
try:
tags = list(
map(
lambda story_tags: story_tags[id_name].replace(",", ""),
tags,
)
tag_rows = self.sql.execute_and_fetchall(
self.working_original, query
)
story_tags = [tag_row[id_name] for tag_row in tag_rows]
# Get the number of story tags which contain something other
# than a comma-separated list of digits
is_numeric = [
not tag or tag.isdigit()
for tags in story_tags
for tag in tags.split(",")
]

# There are three possibilities for the way tags have been
# put into stories. The most common, by far, is that they're
# all comma-separated lists of integers. If this is not the
# case, then they should be almost all -- though not
# necessarily all -- comma-separated strings. (This is
# because a tag could potentially be all-numeric, and have
# at least one fic only tagged with that all-numeric tag).
if sum(is_numeric) == len(is_numeric):
self.logger.debug(
f"Standard story tag syntax in {tag_table_name}"
)
int("".join(tags))
tag_tables[tag_table_name] = False
except Exception:
# Non-integer in identifier

elif (sum(is_numeric) / len(is_numeric)) < 0.1:
self.logger.info(
f"Non-standard story tag syntax in {tag_table_name}"
)
tag_tables[tag_table_name] = True
except Exception as e:

# Finally, the fields could be completely corrupt -- in which case we want to break.
else:
raise Exception(
f"Broken story tag syntax in {tag_table_name}; mix of standard (numeric) and non-standard (string) tags."
)
except (OperationalError, ProgrammingError) as e:
self.logger.info(e)
self.logger.info("No such table?")
tag_tables[tag_table_name] = None
Expand Down
2 changes: 1 addition & 1 deletion efiction/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def test_convert_story_tags_normal_ratings(self):
}
]
result = self.efiction_converter._convert_story_tags(old_stories[0])
self.assertEqual(
self.assertCountEqual(
{
"categories": [6],
"characters": [106, 107],
Expand Down
1 change: 1 addition & 0 deletions opendoors/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pymysql
import sqlparse
from pymysql.cursors import DictCursor
from pymysql.err import OperationalError, ProgrammingError # noqa: F401

from opendoors.utils import get_full_path

Expand Down
Loading