hunter-read
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backend/config.py‎
Lines changed: 117 additions & 2 deletions b/‎backend/config.py‎
Lines changed: 117 additions & 2 deletions
diff --git a/‎backend/indexer.py‎
Lines changed: 43 additions & 15 deletions b/‎backend/indexer.py‎
Lines changed: 43 additions & 15 deletions
diff --git a/‎backend/main.py‎
Lines changed: 3 additions & 0 deletions b/‎backend/main.py‎
Lines changed: 3 additions & 0 deletions
@@ -264,6 +264,7 @@ Tags are applied (or updated) every time the library is rescanned. Tags set via
 | `DATA_PATH` | `/data` | Path for the database, thumbnails, and search cache |
 | `WORKERS` | `2` | Number of uvicorn worker processes |
 | `VALKEY_URL` | — | Optional Redis-compatible cache URL for rendered page images (e.g. `redis://valkey:6379/0`) |
+| `LOG_LEVEL` | `info` | Optional Console/Docker log verbosity: `debug`, `info`, `warning`, `error`, or `critical`. The in-app Logs tab (Settings → Logs) always captures `debug`-level entries regardless of this setting. |
 
 ### Volumes
 
 
@@ -1,6 +1,9 @@
 """Shared configuration, database, and cache setup for Grimoire."""
 import os
 import logging
+import collections
+import threading
+import datetime
 from typing import Optional
 from .models import init_db
 
@@ -14,8 +17,121 @@
 VALKEY_URL = os.environ.get("VALKEY_URL", "")
 _PAGE_CACHE_HEADERS = {"Cache-Control": "max-age=31536000, immutable"}
 
-logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s: %(message)s")
+# Console log level is controlled by the LOG_LEVEL env var (default: info).
+# In-memory ring buffer always captures DEBUG+ so the /api/logs endpoint can
+# serve debug logs regardless of the console level.
+_LOG_LEVEL_NAME = os.environ.get("LOG_LEVEL", "info").upper()
+_CONSOLE_LEVEL = getattr(logging, _LOG_LEVEL_NAME, logging.INFO)
+
+_LOG_FORMAT = "%(asctime)s [%(name)s] %(levelname)s: %(message)s"
+logging.basicConfig(level=logging.DEBUG, format=_LOG_FORMAT)
+
+for _noisy in ("uvicorn", "uvicorn.access", "uvicorn.error", "fastapi", "sqlalchemy.engine"):
+    logging.getLogger(_noisy).setLevel(logging.WARNING)
+
+for _h in logging.root.handlers:
+    _h.setLevel(_CONSOLE_LEVEL)
+
 logger = logging.getLogger("grimoire")
+logger.setLevel(logging.DEBUG)
+
+_LOG_BUFFER_MAX = 20000
+
+_seq_counter = 0
+
+
+class _LogEntry:
+    """Lightweight log record stored in the ring buffer."""
+    __slots__ = ("seq", "timestamp", "level", "logger", "message")
+
+    def __init__(self, seq: int, timestamp: str, level: str, logger_name: str, message: str):
+        self.seq       = seq
+        self.timestamp = timestamp
+        self.level     = level
+        self.logger    = logger_name
+        self.message   = message
+
+    def to_dict(self) -> dict:
+        return {
+            "seq":       self.seq,
+            "timestamp": self.timestamp,
+            "level":     self.level,
+            "logger":    self.logger,
+            "message":   self.message,
+        }
+
+
+class _MemoryLogHandler(logging.Handler):
+    """Thread-safe ring-buffer log handler for in-app log viewing."""
+
+    def __init__(self, maxlen: int = _LOG_BUFFER_MAX):
+        super().__init__(level=logging.DEBUG)
+        self._buf: collections.deque[_LogEntry] = collections.deque(maxlen=maxlen)
+        self._lock = threading.Lock()
+
+    def emit(self, record: logging.LogRecord) -> None:
+        global _seq_counter
+        try:
+            ts = datetime.datetime.fromtimestamp(
+                record.created, tz=datetime.timezone.utc
+            ).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
+            with self._lock:
+                _seq_counter += 1
+                entry = _LogEntry(
+                    seq=_seq_counter,
+                    timestamp=ts,
+                    level=record.levelname,
+                    logger_name=record.name,
+                    message=self.format(record),
+                )
+                self._buf.append(entry)
+        except Exception:
+            self.handleError(record)
+
+    def get_entries(
+        self,
+        min_level: int = logging.DEBUG,
+        limit: int = 500,
+        offset: int = 0,
+        after_seq: int = 0,
+    ) -> tuple[list[dict], int]:
+        """Return entries in oldest-to-newest order and the current max seq.
+
+        When `after_seq` > 0, returns only entries with seq > after_seq (up to
+        `limit`), ignoring `offset`.  This is the fast path for live polling.
+
+        When `after_seq` == 0 (initial / historical load), `offset` is counted
+        from the newest end: offset=0 → most-recent `limit` entries,
+        offset=limit → next-older page, etc.
+
+        Returns (entries_list, max_seq_in_buffer).
+        """
+        with self._lock:
+            all_entries = [e for e in self._buf if logging.getLevelName(e.level) >= min_level]  # type: ignore[arg-type]
+            max_seq = self._buf[-1].seq if self._buf else 0
+
+        if after_seq > 0:
+            new = [e for e in all_entries if e.seq > after_seq]
+            return [e.to_dict() for e in new[-limit:]], max_seq
+
+        total = len(all_entries)
+        end   = total - offset
+        start = max(0, end - limit)
+        return [e.to_dict() for e in all_entries[start:end]], max_seq
+
+    def get_total(self, min_level: int = logging.DEBUG) -> int:
+        with self._lock:
+            return sum(1 for e in self._buf if logging.getLevelName(e.level) >= min_level)  # type: ignore[arg-type]
+
+    def clear(self) -> None:
+        with self._lock:
+            self._buf.clear()
+
+
+_memory_handler = _MemoryLogHandler()
+_memory_handler.setFormatter(logging.Formatter("%(message)s"))
+
+logging.root.addHandler(_memory_handler)
 
 os.makedirs(DATA_PATH, exist_ok=True)
 os.makedirs(THUMB_DIR, exist_ok=True)
@@ -34,7 +150,6 @@ def get_db():
         db.close()
 
 
-# Optional Valkey page cache
 _valkey: Optional[object] = None
 if VALKEY_URL:
     try:
 
@@ -20,8 +20,12 @@
 _FITZ_TIMEOUT = 300  # seconds
 
 
-def _fitz_open_with_timeout(filepath: str, timeout: int = _FITZ_TIMEOUT):
-    """Open a PDF with fitz, raising TimeoutError if it hangs beyond `timeout` seconds."""
+def _fitz_open_with_timeout(filepath: str, timeout: int = _FITZ_TIMEOUT, should_stop=None):
+    """Open a PDF with fitz, raising TimeoutError if it hangs beyond `timeout` seconds.
+
+    If `should_stop` callable is provided, the wait is interrupted early when it
+    returns True, raising TimeoutError so the caller can exit cleanly.
+    """
     result = [None]
     exc = [None]
 
@@ -33,7 +37,14 @@ def _open():
 
     t = threading.Thread(target=_open, daemon=True)
     t.start()
-    t.join(timeout)
+    deadline = timeout
+    poll_interval = 0.5  # check stop flag every 500ms
+    elapsed = 0.0
+    while t.is_alive() and elapsed < deadline:
+        t.join(poll_interval)
+        elapsed += poll_interval
+        if should_stop and should_stop():
+            raise TimeoutError(f"fitz.open() aborted by stop request for {filepath}")
     if t.is_alive():
         raise TimeoutError(f"fitz.open() timed out after {timeout}s for {filepath}")
     if exc[0] is not None:
@@ -85,12 +96,12 @@ def guess_category(filepath: str) -> str:
     return "core"
 
 
-def generate_thumbnail(filepath: str, output_path: str, size: tuple = (300, 400)) -> bool:
+def generate_thumbnail(filepath: str, output_path: str, size: tuple = (300, 400), should_stop=None) -> bool:
     """Generate a thumbnail from the first page of a PDF or from an image."""
     try:
         ext = Path(filepath).suffix.lower()
         if ext == ".pdf":
-            doc = _fitz_open_with_timeout(filepath)
+            doc = _fitz_open_with_timeout(filepath, should_stop=should_stop)
             if len(doc) == 0:
                 return False
             page = doc[0]
@@ -114,11 +125,11 @@ def generate_thumbnail(filepath: str, output_path: str, size: tuple = (300, 400)
         return False
 
 
-def extract_text_from_pdf(filepath: str) -> list[dict]:
+def extract_text_from_pdf(filepath: str, should_stop=None) -> list[dict]:
     """Extract text from all pages of a PDF. Returns list of {page, content}."""
     pages = []
     try:
-        doc = _fitz_open_with_timeout(filepath)
+        doc = _fitz_open_with_timeout(filepath, should_stop=should_stop)
         for i, page in enumerate(doc):
             page_text = page.get_text().strip()
             if page_text:
@@ -140,11 +151,13 @@ def _count_eligible_files(directory: Path, extensions: set) -> int:
     return count
 
 
-def scan_library(library_path: str, data_path: str, session: Session, on_progress=None):
+def scan_library(library_path: str, data_path: str, session: Session, on_progress=None, should_stop=None):
     """Scan the library directory and register all files in the database.
 
     on_progress(scanned_books, total_books, scanned_maps, total_maps, scanned_tokens, total_tokens)
     is called after each file is processed if provided.
+
+    should_stop() is an optional callable that returns True when the scan should abort early.
     """
     library = Path(library_path)
     books_dir = library / "books"
@@ -214,13 +227,18 @@ def scan_library(library_path: str, data_path: str, session: Session, on_progres
                             scanned_tokens,
                             total_tokens,
                         )
+                    if should_stop and should_stop():
+                        logger.info("scan_library: stop requested during books scan.")
+                        return stats
 
                     relative_path = os.path.relpath(filepath, library_path)
 
                     existing = session.query(Book).filter_by(filepath=filepath).first()
                     if existing:
+                        logger.debug(f"File scan: already registered, skipping: {filename}")
                         continue
 
+                    logger.debug(f"File scan: new book found: {filename}")
                     category = guess_category(relative_path)
                     title = Path(filename).stem.replace("_", " ").replace("-", " ").strip()
 
@@ -246,12 +264,12 @@ def scan_library(library_path: str, data_path: str, session: Session, on_progres
                         "books",
                         f"{slugify(title)}_{hashlib.md5(filepath.encode()).hexdigest()[:8]}.webp",
                     )
-                    if generate_thumbnail(filepath, thumb_path):
+                    if generate_thumbnail(filepath, thumb_path, should_stop=should_stop):
                         book.has_thumbnail = True
 
                     if ext == ".pdf":
                         try:
-                            doc = _fitz_open_with_timeout(filepath)
+                            doc = _fitz_open_with_timeout(filepath, should_stop=should_stop)
                             book.page_count = len(doc)
                             doc.close()
                         except Exception as e:
@@ -291,13 +309,18 @@ def scan_library(library_path: str, data_path: str, session: Session, on_progres
                         scanned_tokens,
                         total_tokens,
                     )
+                if should_stop and should_stop():
+                    logger.info("scan_library: stop requested during maps scan.")
+                    return stats
 
                 relative_path = os.path.relpath(filepath, library_path)
 
                 existing = session.query(GenericMap).filter_by(filepath=filepath).first()
                 if existing:
+                    logger.debug(f"File scan: already registered, skipping: {filename}")
                     continue
 
+                logger.debug(f"File scan: new map found: {filename}")
                 title = Path(filename).stem.replace("_", " ").replace("-", " ").strip()
 
                 try:
@@ -318,7 +341,7 @@ def scan_library(library_path: str, data_path: str, session: Session, on_progres
                     "maps",
                     f"{slugify(title)}_{hashlib.md5(filepath.encode()).hexdigest()[:8]}.webp",
                 )
-                if generate_thumbnail(filepath, thumb_path):
+                if generate_thumbnail(filepath, thumb_path, should_stop=should_stop):
                     gmap.has_thumbnail = True
 
                 session.add(gmap)
@@ -353,13 +376,18 @@ def scan_library(library_path: str, data_path: str, session: Session, on_progres
                         scanned_tokens,
                         total_tokens,
                     )
+                if should_stop and should_stop():
+                    logger.info("scan_library: stop requested during tokens scan.")
+                    return stats
 
                 relative_path = os.path.relpath(filepath, library_path)
 
                 existing = session.query(Token).filter_by(filepath=filepath).first()
                 if existing:
+                    logger.debug(f"File scan: already registered, skipping: {filename}")
                     continue
 
+                logger.debug(f"File scan: new token found: {filename}")
                 title = Path(filename).stem.replace("_", " ").replace("-", " ").strip()
 
                 try:
@@ -380,7 +408,7 @@ def scan_library(library_path: str, data_path: str, session: Session, on_progres
                     "tokens",
                     f"{slugify(title)}_{hashlib.md5(filepath.encode()).hexdigest()[:8]}.webp",
                 )
-                if generate_thumbnail(filepath, thumb_path, size=(200, 200)):
+                if generate_thumbnail(filepath, thumb_path, size=(200, 200), should_stop=should_stop):
                     token.has_thumbnail = True
 
                 session.add(token)
@@ -499,12 +527,12 @@ def _apply_tags_from_library(library_path: str, session: Session) -> None:
     session.commit()
 
 
-def index_book_text(book: Book, data_path: str, session: Session):
+def index_book_text(book: Book, data_path: str, session: Session, should_stop=None):
     """Extract and index text from a PDF for full-text search."""
     if book.indexed or book.index_failed or book.mime_type != "application/pdf":
         return False
 
-    pages = extract_text_from_pdf(book.filepath)
+    pages = extract_text_from_pdf(book.filepath, should_stop=should_stop)
     if not pages:
         book.index_error = "No text extracted"
         book.index_failed = True
@@ -523,5 +551,5 @@ def index_book_text(book: Book, data_path: str, session: Session):
     book.indexed = True
     book.index_error = ""
     session.commit()
-    logger.info(f"Indexed {len(pages)} pages for: {book.title}")
+    logger.info(f"Indexed {len(pages)} pages for: {book.filename} ('{book.title}')")
     return True
@@ -24,6 +24,7 @@
 from .routers import maintenance as maintenance_router
 from .routers import settings as settings_router
 from .routers import campaigns as campaigns_router
+from .routers import logs as logs_router
 from .routers.library import run_rescan_sync
 from . import scheduler
 from . import session_creator
@@ -63,6 +64,7 @@
     },
     {"name": "settings", "description": "Application settings. **Admin only.**"},
     {"name": "maintenance", "description": "Admin housekeeping tasks."},
+    {"name": "logs", "description": "Application log retrieval. **Admin only.**"},
 ]
 
 
@@ -139,6 +141,7 @@ def do_scan():
 api.include_router(maintenance_router.router)
 api.include_router(settings_router.router)
 api.include_router(campaigns_router.router)
+api.include_router(logs_router.router)
 app.include_router(api)