|
17 | 17 |
|
18 | 18 | logger = logging.getLogger("grimoire.indexer") |
19 | 19 |
|
20 | | -_FITZ_TIMEOUT = 300 # seconds |
| 20 | +_FITZ_TIMEOUT = 30 # seconds — files that can't be opened in 30s are unreadable |
21 | 21 | _DB_TIMEOUT = 30 # seconds — max time to wait for a DB operation before treating it as hung |
22 | 22 |
|
23 | 23 |
|
@@ -119,7 +119,7 @@ def guess_category(filepath: str) -> str: |
119 | 119 | return "core" |
120 | 120 |
|
121 | 121 |
|
122 | | -_THUMBNAIL_TIMEOUT = 60 # seconds |
| 122 | +_THUMBNAIL_TIMEOUT = 30 # seconds |
123 | 123 |
|
124 | 124 |
|
125 | 125 | def _generate_thumbnail_task(filepath: str, output_path: str, size: tuple, result: list, exc: list): |
@@ -176,10 +176,10 @@ def generate_thumbnail(filepath: str, output_path: str, size: tuple = (300, 400) |
176 | 176 | logger.warning(f"Thumbnail generation aborted by stop request for {filepath}") |
177 | 177 | return False |
178 | 178 | if t.is_alive(): |
179 | | - logger.warning(f"Thumbnail generation timed out after {_THUMBNAIL_TIMEOUT}s for {filepath}") |
| 179 | + logger.error(f"Thumbnail generation timed out after {_THUMBNAIL_TIMEOUT}s for {filepath}") |
180 | 180 | return False |
181 | 181 | if exc[0] is not None: |
182 | | - logger.warning(f"Thumbnail generation failed for {filepath}: {exc[0]}") |
| 182 | + logger.error(f"Thumbnail generation failed for {filepath}: {exc[0]}") |
183 | 183 | return False |
184 | 184 | return bool(result[0]) |
185 | 185 |
|
@@ -320,63 +320,115 @@ def scan_library(library_path: str, data_path: str, session: Session, on_progres |
320 | 320 | stats["errors"] += 1 |
321 | 321 | continue |
322 | 322 | if existing: |
323 | | - logger.debug(f"Already registered, skipping: {filename}") |
324 | | - continue |
325 | | - |
326 | | - category = guess_category(relative_path) |
327 | | - title = Path(filename).stem.replace("_", " ").replace("-", " ").strip() |
| 323 | + if existing.scan_failed: |
| 324 | + logger.debug(f"Already registered, skipping: {filename}") |
| 325 | + continue |
| 326 | + needs_thumbnail = not existing.has_thumbnail |
| 327 | + needs_page_count = ext == ".pdf" and existing.page_count == 0 and not existing.index_error |
| 328 | + if not needs_thumbnail and not needs_page_count: |
| 329 | + logger.debug(f"Already registered, skipping: {filename}") |
| 330 | + continue |
| 331 | + logger.debug(f"Resuming incomplete scan for: {filename}") |
| 332 | + book = existing |
| 333 | + else: |
| 334 | + category = guess_category(relative_path) |
| 335 | + title = Path(filename).stem.replace("_", " ").replace("-", " ").strip() |
328 | 336 |
|
329 | | - try: |
330 | | - file_size = os.path.getsize(filepath) |
331 | | - except OSError: |
332 | | - logger.warning(f"Cannot stat file, skipping: {filepath}") |
333 | | - continue |
| 337 | + try: |
| 338 | + file_size = os.path.getsize(filepath) |
| 339 | + except OSError: |
| 340 | + logger.warning(f"Cannot stat file, skipping: {filepath}") |
| 341 | + continue |
| 342 | + |
| 343 | + book = Book( |
| 344 | + game_system_id=system.id, |
| 345 | + title=title, |
| 346 | + filename=filename, |
| 347 | + filepath=filepath, |
| 348 | + relative_path=relative_path, |
| 349 | + category=category, |
| 350 | + file_size=file_size, |
| 351 | + mime_type="application/pdf" if ext == ".pdf" else f"image/{ext[1:]}", |
| 352 | + ) |
334 | 353 |
|
335 | | - book = Book( |
336 | | - game_system_id=system.id, |
337 | | - title=title, |
338 | | - filename=filename, |
339 | | - filepath=filepath, |
340 | | - relative_path=relative_path, |
341 | | - category=category, |
342 | | - file_size=file_size, |
343 | | - mime_type="application/pdf" if ext == ".pdf" else f"image/{ext[1:]}", |
344 | | - ) |
| 354 | + # Commit the book record first so that if a subsequent |
| 355 | + # hang kills the worker, the file is already in the DB and |
| 356 | + # won't be re-processed on the next startup scan. |
| 357 | + session.add(book) |
| 358 | + logger.debug(f"DB: committing new book '{filename}'") |
| 359 | + try: |
| 360 | + _run_with_timeout(session.commit, _DB_TIMEOUT, f"commit book '{filepath}'") |
| 361 | + stats["new_books"] += 1 |
| 362 | + logger.info(f"New book saved: {title} ({category}) in {system_name}") |
| 363 | + except TimeoutError as e: |
| 364 | + logger.error(f"DB hang: {e} — rolling back '{filename}'") |
| 365 | + session.rollback() |
| 366 | + stats["errors"] += 1 |
| 367 | + continue |
| 368 | + except IntegrityError: |
| 369 | + session.rollback() |
| 370 | + logger.debug(f"Book already exists, skipping: {filepath}") |
| 371 | + continue |
| 372 | + needs_thumbnail = True |
| 373 | + needs_page_count = ext == ".pdf" |
345 | 374 |
|
346 | 375 | thumb_path = os.path.join( |
347 | 376 | thumb_dir, |
348 | 377 | "books", |
349 | | - f"{slugify(title)}_{hashlib.md5(filepath.encode()).hexdigest()[:8]}.webp", |
| 378 | + f"{slugify(book.title)}_{hashlib.md5(filepath.encode()).hexdigest()[:8]}.webp", |
350 | 379 | ) |
351 | | - logger.info(f"Generating thumbnail: {filepath}") |
352 | | - if generate_thumbnail(filepath, thumb_path, should_stop=should_stop): |
353 | | - book.has_thumbnail = True |
354 | | - |
355 | | - if ext == ".pdf": |
| 380 | + if needs_thumbnail: |
| 381 | + # Set scan_failed before the potentially-hanging operation. |
| 382 | + # If the worker is killed mid-hang this flag persists, preventing |
| 383 | + # the file from being retried on the next scan. A clean cancel |
| 384 | + # clears it below so the file is resumed normally next time. |
| 385 | + book.scan_failed = True |
| 386 | + try: |
| 387 | + _run_with_timeout(session.commit, _DB_TIMEOUT, f"commit scan_failed '{filepath}'") |
| 388 | + except (TimeoutError, IntegrityError) as e: |
| 389 | + logger.error(f"DB hang writing scan_failed for '{filename}': {e}") |
| 390 | + session.rollback() |
| 391 | + logger.info(f"Generating thumbnail: {filepath}") |
| 392 | + if generate_thumbnail(filepath, thumb_path, should_stop=should_stop): |
| 393 | + book.has_thumbnail = True |
| 394 | + if should_stop and should_stop(): |
| 395 | + # Cancelled — clear the flag so the file is resumed next scan. |
| 396 | + book.scan_failed = False |
| 397 | + try: |
| 398 | + _run_with_timeout(session.commit, _DB_TIMEOUT, f"commit thumbnail '{filepath}'") |
| 399 | + except (TimeoutError, IntegrityError) as e: |
| 400 | + logger.error(f"DB hang saving thumbnail for '{filename}': {e}") |
| 401 | + session.rollback() |
| 402 | + |
| 403 | + if needs_page_count: |
| 404 | + if not book.scan_failed: |
| 405 | + book.scan_failed = True |
| 406 | + try: |
| 407 | + _run_with_timeout(session.commit, _DB_TIMEOUT, f"commit scan_failed '{filepath}'") |
| 408 | + except (TimeoutError, IntegrityError) as e: |
| 409 | + logger.error(f"DB hang writing scan_failed for '{filename}': {e}") |
| 410 | + session.rollback() |
356 | 411 | logger.info(f"Opening PDF for page count: {filepath}") |
357 | 412 | try: |
358 | 413 | doc = _fitz_open_with_timeout(filepath, should_stop=should_stop) |
359 | 414 | book.page_count = len(doc) |
360 | 415 | doc.close() |
361 | 416 | logger.debug(f"Page count: {book.page_count} pages in '{filename}'") |
| 417 | + book.scan_failed = False |
| 418 | + _run_with_timeout(session.commit, _DB_TIMEOUT, f"commit page_count '{filepath}'") |
362 | 419 | except Exception as e: |
363 | | - logger.warning(f"Could not read page count for '{filename}': {e}") |
364 | | - book.index_error = str(e)[:500] |
365 | | - stats["errors"] += 1 |
366 | | - |
367 | | - session.add(book) |
368 | | - logger.debug(f"DB: committing new book '{filename}'") |
369 | | - try: |
370 | | - _run_with_timeout(session.commit, _DB_TIMEOUT, f"commit book '{filepath}'") |
371 | | - stats["new_books"] += 1 |
372 | | - logger.info(f"New book saved: {title} ({category}) in {system_name}") |
373 | | - except TimeoutError as e: |
374 | | - logger.error(f"DB hang: {e} — rolling back '{filename}'") |
375 | | - session.rollback() |
376 | | - stats["errors"] += 1 |
377 | | - except IntegrityError: |
378 | | - session.rollback() |
379 | | - logger.debug(f"Book already exists, skipping: {filepath}") |
| 420 | + if should_stop and should_stop(): |
| 421 | + # Cancelled — clear the flag so the file is resumed next scan. |
| 422 | + book.scan_failed = False |
| 423 | + else: |
| 424 | + logger.error(f"Could not read page count for '{filename}': {e}") |
| 425 | + book.index_error = str(e)[:500] |
| 426 | + stats["errors"] += 1 |
| 427 | + try: |
| 428 | + _run_with_timeout(session.commit, _DB_TIMEOUT, f"commit scan_failed '{filepath}'") |
| 429 | + except (TimeoutError, IntegrityError) as e2: |
| 430 | + logger.error(f"DB hang saving index_error for '{filename}': {e2}") |
| 431 | + session.rollback() |
380 | 432 |
|
381 | 433 | if maps_dir.exists(): |
382 | 434 | for root, dirs, files in os.walk(maps_dir): |
|
0 commit comments