|
27 | 27 |
|
28 | 28 | log = logging.getLogger("paperscout") |
29 | 29 |
|
| 30 | +# MessageQueue keys allowed in /health extras (must not overlap scheduler.health_snapshot()). |
| 31 | +_MQ_HEALTH_FIELD_NAMES = frozenset( |
| 32 | + { |
| 33 | + "mq_depth", |
| 34 | + "mq_max_size", |
| 35 | + "mq_utilization", |
| 36 | + "mq_circuit_state", |
| 37 | + } |
| 38 | +) |
| 39 | + |
| 40 | + |
| 41 | +def _mq_health_fields(mq: MessageQueue) -> dict: |
| 42 | + """MQ metrics for /health; from health_fields() when present, else depth only.""" |
| 43 | + if hasattr(mq, "health_fields"): |
| 44 | + try: |
| 45 | + raw = mq.health_fields() |
| 46 | + except Exception as exc: |
| 47 | + log.warning( |
| 48 | + "health: mq.health_fields() failed for %s id=%s: %s", |
| 49 | + type(mq).__name__, |
| 50 | + id(mq), |
| 51 | + exc, |
| 52 | + exc_info=True, |
| 53 | + ) |
| 54 | + try: |
| 55 | + return {"mq_depth": mq.depth()} |
| 56 | + except Exception: |
| 57 | + log.warning( |
| 58 | + "health: mq.depth() fallback failed; omitting MQ fields", |
| 59 | + exc_info=True, |
| 60 | + ) |
| 61 | + return {} |
| 62 | + if isinstance(raw, dict): |
| 63 | + return raw |
| 64 | + log.warning("health: mq.health_fields() returned non-dict, using mq_depth only") |
| 65 | + try: |
| 66 | + return {"mq_depth": mq.depth()} |
| 67 | + except Exception: |
| 68 | + log.warning("health: mq.depth() failed; omitting MQ fields", exc_info=True) |
| 69 | + return {} |
| 70 | + |
| 71 | + |
| 72 | +def _merge_extra_health_fields( |
| 73 | + scheduler_snap: dict, |
| 74 | + mq_extra: dict, |
| 75 | + db_pool: dict, |
| 76 | +) -> dict: |
| 77 | + """Merge health JSON with scheduler winning on key conflicts.""" |
| 78 | + scheduler_keys = set(scheduler_snap) |
| 79 | + mq_filtered: dict = {} |
| 80 | + for key, value in mq_extra.items(): |
| 81 | + if key in _MQ_HEALTH_FIELD_NAMES: |
| 82 | + if key in scheduler_keys: |
| 83 | + log.debug( |
| 84 | + "health: mq_extra key %r conflicts with scheduler snapshot; scheduler wins", |
| 85 | + key, |
| 86 | + ) |
| 87 | + else: |
| 88 | + mq_filtered[key] = value |
| 89 | + elif key in scheduler_keys: |
| 90 | + log.debug( |
| 91 | + "health: mq_extra key %r not allow-listed; scheduler snapshot kept", |
| 92 | + key, |
| 93 | + ) |
| 94 | + else: |
| 95 | + log.debug("health: mq_extra key %r not allow-listed, dropping", key) |
| 96 | + return {**scheduler_snap, **mq_filtered, "db_pool": db_pool} |
| 97 | + |
30 | 98 |
|
31 | 99 | def _setup_logging(data_dir: Path, console_level: str = "INFO", retention_days: int = 7) -> None: |
32 | 100 | """Console + daily rotating file logging; third-party loggers capped at WARNING.""" |
@@ -141,20 +209,11 @@ def _pool_status(p) -> dict: |
141 | 209 | ) |
142 | 210 |
|
143 | 211 | def _extra_health_fields() -> dict: |
144 | | - lsp = scheduler._last_successful_poll |
145 | | - s = scheduler._last_probe_stats |
146 | | - # HTTP 200 outcomes / non-skipped probe attempts (excludes skipped_discovered, skipped_in_index). |
147 | | - hits = s.get("hit_recent", 0) + s.get("hit_old", 0) + s.get("hit_no_lm", 0) |
148 | | - attempted = hits + s.get("miss", 0) + s.get("error", 0) |
149 | | - probe_success_rate = hits / attempted if attempted > 0 else None |
150 | | - return { |
151 | | - "last_successful_poll": ( |
152 | | - datetime.fromtimestamp(lsp, tz=timezone.utc).isoformat() if lsp else None |
153 | | - ), |
154 | | - "probe_success_rate": probe_success_rate, |
155 | | - "mq_depth": mq.depth(), |
156 | | - "db_pool": _pool_status(pool), |
157 | | - } |
| 212 | + return _merge_extra_health_fields( |
| 213 | + scheduler.health_snapshot(), |
| 214 | + _mq_health_fields(mq), |
| 215 | + _pool_status(pool), |
| 216 | + ) |
158 | 217 |
|
159 | 218 | register_handlers(app, user_watchlist, state, paper_count_fn, launch_time) |
160 | 219 |
|
|
0 commit comments