Skip to content

Commit 694b70a

Browse files
authored
Fix: Resolve infinite scan loop caused by problematic files hanging the worker during thumbnail generation. (#21)
1 parent 7bea993 commit 694b70a

3 files changed

Lines changed: 357 additions & 53 deletions

File tree

backend/indexer.py

Lines changed: 99 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
logger = logging.getLogger("grimoire.indexer")
1919

20-
_FITZ_TIMEOUT = 300 # seconds
20+
_FITZ_TIMEOUT = 30 # seconds — files that can't be opened in 30s are unreadable
2121
_DB_TIMEOUT = 30 # seconds — max time to wait for a DB operation before treating it as hung
2222

2323

@@ -119,7 +119,7 @@ def guess_category(filepath: str) -> str:
119119
return "core"
120120

121121

122-
_THUMBNAIL_TIMEOUT = 60 # seconds
122+
_THUMBNAIL_TIMEOUT = 30 # seconds
123123

124124

125125
def _generate_thumbnail_task(filepath: str, output_path: str, size: tuple, result: list, exc: list):
@@ -176,10 +176,10 @@ def generate_thumbnail(filepath: str, output_path: str, size: tuple = (300, 400)
176176
logger.warning(f"Thumbnail generation aborted by stop request for {filepath}")
177177
return False
178178
if t.is_alive():
179-
logger.warning(f"Thumbnail generation timed out after {_THUMBNAIL_TIMEOUT}s for {filepath}")
179+
logger.error(f"Thumbnail generation timed out after {_THUMBNAIL_TIMEOUT}s for {filepath}")
180180
return False
181181
if exc[0] is not None:
182-
logger.warning(f"Thumbnail generation failed for {filepath}: {exc[0]}")
182+
logger.error(f"Thumbnail generation failed for {filepath}: {exc[0]}")
183183
return False
184184
return bool(result[0])
185185

@@ -320,63 +320,115 @@ def scan_library(library_path: str, data_path: str, session: Session, on_progres
320320
stats["errors"] += 1
321321
continue
322322
if existing:
323-
logger.debug(f"Already registered, skipping: {filename}")
324-
continue
325-
326-
category = guess_category(relative_path)
327-
title = Path(filename).stem.replace("_", " ").replace("-", " ").strip()
323+
if existing.scan_failed:
324+
logger.debug(f"Already registered, skipping: {filename}")
325+
continue
326+
needs_thumbnail = not existing.has_thumbnail
327+
needs_page_count = ext == ".pdf" and existing.page_count == 0 and not existing.index_error
328+
if not needs_thumbnail and not needs_page_count:
329+
logger.debug(f"Already registered, skipping: {filename}")
330+
continue
331+
logger.debug(f"Resuming incomplete scan for: {filename}")
332+
book = existing
333+
else:
334+
category = guess_category(relative_path)
335+
title = Path(filename).stem.replace("_", " ").replace("-", " ").strip()
328336

329-
try:
330-
file_size = os.path.getsize(filepath)
331-
except OSError:
332-
logger.warning(f"Cannot stat file, skipping: {filepath}")
333-
continue
337+
try:
338+
file_size = os.path.getsize(filepath)
339+
except OSError:
340+
logger.warning(f"Cannot stat file, skipping: {filepath}")
341+
continue
342+
343+
book = Book(
344+
game_system_id=system.id,
345+
title=title,
346+
filename=filename,
347+
filepath=filepath,
348+
relative_path=relative_path,
349+
category=category,
350+
file_size=file_size,
351+
mime_type="application/pdf" if ext == ".pdf" else f"image/{ext[1:]}",
352+
)
334353

335-
book = Book(
336-
game_system_id=system.id,
337-
title=title,
338-
filename=filename,
339-
filepath=filepath,
340-
relative_path=relative_path,
341-
category=category,
342-
file_size=file_size,
343-
mime_type="application/pdf" if ext == ".pdf" else f"image/{ext[1:]}",
344-
)
354+
# Commit the book record first so that if a subsequent
355+
# hang kills the worker, the file is already in the DB and
356+
# won't be re-processed on the next startup scan.
357+
session.add(book)
358+
logger.debug(f"DB: committing new book '{filename}'")
359+
try:
360+
_run_with_timeout(session.commit, _DB_TIMEOUT, f"commit book '{filepath}'")
361+
stats["new_books"] += 1
362+
logger.info(f"New book saved: {title} ({category}) in {system_name}")
363+
except TimeoutError as e:
364+
logger.error(f"DB hang: {e} — rolling back '{filename}'")
365+
session.rollback()
366+
stats["errors"] += 1
367+
continue
368+
except IntegrityError:
369+
session.rollback()
370+
logger.debug(f"Book already exists, skipping: {filepath}")
371+
continue
372+
needs_thumbnail = True
373+
needs_page_count = ext == ".pdf"
345374

346375
thumb_path = os.path.join(
347376
thumb_dir,
348377
"books",
349-
f"{slugify(title)}_{hashlib.md5(filepath.encode()).hexdigest()[:8]}.webp",
378+
f"{slugify(book.title)}_{hashlib.md5(filepath.encode()).hexdigest()[:8]}.webp",
350379
)
351-
logger.info(f"Generating thumbnail: {filepath}")
352-
if generate_thumbnail(filepath, thumb_path, should_stop=should_stop):
353-
book.has_thumbnail = True
354-
355-
if ext == ".pdf":
380+
if needs_thumbnail:
381+
# Set scan_failed before the potentially-hanging operation.
382+
# If the worker is killed mid-hang this flag persists, preventing
383+
# the file from being retried on the next scan. A clean cancel
384+
# clears it below so the file is resumed normally next time.
385+
book.scan_failed = True
386+
try:
387+
_run_with_timeout(session.commit, _DB_TIMEOUT, f"commit scan_failed '{filepath}'")
388+
except (TimeoutError, IntegrityError) as e:
389+
logger.error(f"DB hang writing scan_failed for '{filename}': {e}")
390+
session.rollback()
391+
logger.info(f"Generating thumbnail: {filepath}")
392+
if generate_thumbnail(filepath, thumb_path, should_stop=should_stop):
393+
book.has_thumbnail = True
394+
if should_stop and should_stop():
395+
# Cancelled — clear the flag so the file is resumed next scan.
396+
book.scan_failed = False
397+
try:
398+
_run_with_timeout(session.commit, _DB_TIMEOUT, f"commit thumbnail '{filepath}'")
399+
except (TimeoutError, IntegrityError) as e:
400+
logger.error(f"DB hang saving thumbnail for '{filename}': {e}")
401+
session.rollback()
402+
403+
if needs_page_count:
404+
if not book.scan_failed:
405+
book.scan_failed = True
406+
try:
407+
_run_with_timeout(session.commit, _DB_TIMEOUT, f"commit scan_failed '{filepath}'")
408+
except (TimeoutError, IntegrityError) as e:
409+
logger.error(f"DB hang writing scan_failed for '{filename}': {e}")
410+
session.rollback()
356411
logger.info(f"Opening PDF for page count: {filepath}")
357412
try:
358413
doc = _fitz_open_with_timeout(filepath, should_stop=should_stop)
359414
book.page_count = len(doc)
360415
doc.close()
361416
logger.debug(f"Page count: {book.page_count} pages in '{filename}'")
417+
book.scan_failed = False
418+
_run_with_timeout(session.commit, _DB_TIMEOUT, f"commit page_count '{filepath}'")
362419
except Exception as e:
363-
logger.warning(f"Could not read page count for '{filename}': {e}")
364-
book.index_error = str(e)[:500]
365-
stats["errors"] += 1
366-
367-
session.add(book)
368-
logger.debug(f"DB: committing new book '{filename}'")
369-
try:
370-
_run_with_timeout(session.commit, _DB_TIMEOUT, f"commit book '{filepath}'")
371-
stats["new_books"] += 1
372-
logger.info(f"New book saved: {title} ({category}) in {system_name}")
373-
except TimeoutError as e:
374-
logger.error(f"DB hang: {e} — rolling back '{filename}'")
375-
session.rollback()
376-
stats["errors"] += 1
377-
except IntegrityError:
378-
session.rollback()
379-
logger.debug(f"Book already exists, skipping: {filepath}")
420+
if should_stop and should_stop():
421+
# Cancelled — clear the flag so the file is resumed next scan.
422+
book.scan_failed = False
423+
else:
424+
logger.error(f"Could not read page count for '{filename}': {e}")
425+
book.index_error = str(e)[:500]
426+
stats["errors"] += 1
427+
try:
428+
_run_with_timeout(session.commit, _DB_TIMEOUT, f"commit scan_failed '{filepath}'")
429+
except (TimeoutError, IntegrityError) as e2:
430+
logger.error(f"DB hang saving index_error for '{filename}': {e2}")
431+
session.rollback()
380432

381433
if maps_dir.exists():
382434
for root, dirs, files in os.walk(maps_dir):

backend/models.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ class Book(Base):
7777
indexed = Column(Boolean, default=False)
7878
index_failed = Column(Boolean, default=False)
7979
index_error = Column(String(500), default="")
80+
scan_failed = Column(Boolean, default=False)
8081

8182
created_at = Column(DateTime, default=_utcnow)
8283
updated_at = Column(DateTime, default=_utcnow, onupdate=_utcnow)
@@ -372,11 +373,15 @@ def set_sqlite_pragma(dbapi_connection, connection_record):
372373

373374
with engine.connect() as conn:
374375
# Runtime migrations for columns added after initial release
375-
try:
376-
conn.execute(text("ALTER TABLE books ADD COLUMN index_failed BOOLEAN DEFAULT 0"))
377-
conn.commit()
378-
except Exception:
379-
pass # Column already exists
376+
for migration in [
377+
"ALTER TABLE books ADD COLUMN index_failed BOOLEAN DEFAULT 0",
378+
"ALTER TABLE books ADD COLUMN scan_failed BOOLEAN DEFAULT 0",
379+
]:
380+
try:
381+
conn.execute(text(migration))
382+
conn.commit()
383+
except Exception:
384+
pass # Column already exists
380385

381386
conn.execute(
382387
text(

0 commit comments

Comments
 (0)