mostafam-dev
diff --git a/‎src/scraperguard/alerts/models.py‎
Lines changed: 3 additions & 2 deletions b/‎src/scraperguard/alerts/models.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/scraperguard/alerts/slack.py‎
Lines changed: 3 additions & 3 deletions b/‎src/scraperguard/alerts/slack.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/scraperguard/alerts/webhook.py‎
Lines changed: 3 additions & 3 deletions b/‎src/scraperguard/alerts/webhook.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/scraperguard/api/app.py‎
Lines changed: 2 additions & 1 deletion b/‎src/scraperguard/api/app.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/scraperguard/api/routes.py‎
Lines changed: 39 additions & 32 deletions b/‎src/scraperguard/api/routes.py‎
Lines changed: 39 additions & 32 deletions
diff --git a/‎src/scraperguard/cli/main.py‎
Lines changed: 36 additions & 20 deletions b/‎src/scraperguard/cli/main.py‎
Lines changed: 36 additions & 20 deletions
@@ -4,6 +4,7 @@
 
 from dataclasses import dataclass, field
 from datetime import UTC, datetime
+from typing import Any
 
 
 def _utcnow() -> datetime:
@@ -21,9 +22,9 @@ class Alert:
     url: str
     run_id: str
     timestamp: datetime = field(default_factory=_utcnow)
-    details: dict = field(default_factory=dict)
+    details: dict[str, Any] = field(default_factory=dict)
 
-    def to_dict(self) -> dict:
+    def to_dict(self) -> dict[str, Any]:
         """Serialize to a plain dict for JSON transport."""
         return {
             "severity": self.severity,
 
@@ -4,7 +4,7 @@
 
 import json
 import urllib.request
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 from scraperguard.alerts.base import AlertDispatcher
 
@@ -28,7 +28,7 @@ def __init__(self, webhook_url: str) -> None:
     def name(self) -> str:
         return "slack"
 
-    def _build_payload(self, alert: Alert) -> dict:
+    def _build_payload(self, alert: Alert) -> dict[str, Any]:
         emoji = _SEVERITY_EMOJI.get(alert.severity, ":grey_question:")
         return {
             "blocks": [
@@ -73,6 +73,6 @@ def send(self, alert: Alert) -> bool:
                 method="POST",
             )
             with urllib.request.urlopen(req) as resp:
-                return resp.status == 200
+                return bool(resp.status == 200)
         except Exception:
             return False
@@ -4,7 +4,7 @@
 
 import json
 import urllib.request
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 from scraperguard.alerts.base import AlertDispatcher
 
@@ -15,7 +15,7 @@
 class WebhookDispatcher(AlertDispatcher):
     """Dispatches alerts as JSON POST requests to a configurable URL."""
 
-    def __init__(self, url: str, headers: dict | None = None) -> None:
+    def __init__(self, url: str, headers: dict[str, Any] | None = None) -> None:
         self.url = url
         self.headers = headers or {}
 
@@ -38,6 +38,6 @@ def send(self, alert: Alert) -> bool:
                 method="POST",
             )
             with urllib.request.urlopen(req) as resp:
-                return resp.status == 200
+                return bool(resp.status == 200)
         except Exception:
             return False
@@ -4,6 +4,7 @@
 
 import logging
 import time
+from typing import Any
 
 from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
@@ -54,7 +55,7 @@ async def _unhandled_exception_handler(request: Request, exc: Exception) -> JSON
 
     # Request logging middleware
     @app.middleware("http")
-    async def _request_logging_middleware(request: Request, call_next):
+    async def _request_logging_middleware(request: Request, call_next: Any) -> Any:
         start = time.perf_counter()
         response = await call_next(request)
         duration_ms = (time.perf_counter() - start) * 1000
 
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from dataclasses import asdict
+from typing import Any
 
 from fastapi import APIRouter, Query, Request
 from fastapi.responses import JSONResponse
@@ -16,18 +17,18 @@
 router = APIRouter(prefix="/api")
 
 
-def _get_storage(request: Request):
+def _get_storage(request: Request) -> Any:
     return request.app.state.storage
 
 
 @router.get("/health")
-async def health() -> dict:
+async def health() -> dict[str, str]:
     """Service health check."""
     return {"status": "ok", "version": scraperguard.__version__}
 
 
 @router.get("/runs")
-async def list_runs(request: Request, limit: int = Query(default=20, ge=1)) -> dict:
+async def list_runs(request: Request, limit: int = Query(default=20, ge=1)) -> dict[str, Any]:
     """List recent scraper runs."""
     storage = _get_storage(request)
     runs = storage.list_runs(limit=limit)
@@ -49,7 +50,7 @@ async def list_snapshots(
     request: Request,
     url: str = Query(...),
     limit: int = Query(default=10, ge=1),
-) -> dict:
+) -> dict[str, Any]:
     """List recent snapshots for a URL (lightweight, no HTML bodies)."""
     storage = _get_storage(request)
     snapshots = storage.list_snapshots(url, limit=limit)
@@ -79,7 +80,7 @@ async def list_validation_results(
     request: Request,
     schema_name: str = Query(...),
     limit: int = Query(default=10, ge=1),
-) -> dict:
+) -> dict[str, Any]:
     """List validation result history for a URL and schema."""
     storage = _get_storage(request)
     results = storage.list_validation_results(url, schema_name, limit=limit)
@@ -103,11 +104,13 @@ async def get_drift(
             content={"error": "No validation results found for this URL and schema"},
         )
     events = run_drift_analysis(latest, storage, baseline_count=baseline_count, threshold=threshold)
-    return JSONResponse(content={
-        "drift_events": [asdict(e) for e in events],
-        "baseline_count": baseline_count,
-        "threshold": threshold,
-    })
+    return JSONResponse(
+        content={
+            "drift_events": [asdict(e) for e in events],
+            "baseline_count": baseline_count,
+            "threshold": threshold,
+        }
+    )
 
 
 @router.get("/report/{run_id}")
@@ -127,7 +130,7 @@ async def get_report(
     if url is None:
         # Query snapshots associated with this run — we need to find a URL
         # The storage doesn't have a list-by-run method, so use the connection directly
-        if hasattr(storage, '_conn'):
+        if hasattr(storage, "_conn"):
             cursor = storage._conn.execute(
                 "SELECT url FROM snapshots WHERE run_id = ? LIMIT 1",
                 (run_id,),
@@ -145,7 +148,7 @@ async def get_report(
     storage.get_latest_snapshot(url)
     validation_result = storage.get_latest_validation_result(url, schema_name="")
     # Try to find any schema name for this URL
-    if validation_result is None and hasattr(storage, '_conn'):
+    if validation_result is None and hasattr(storage, "_conn"):
         cursor = storage._conn.execute(
             "SELECT schema_name FROM validation_results"
             " WHERE url = ? ORDER BY timestamp DESC LIMIT 1",
@@ -169,23 +172,25 @@ async def get_report(
         url=url,
     )
 
-    return JSONResponse(content={
-        "overall_score": report.overall_score,
-        "status": report.status,
-        "components": [
-            {
-                "name": c.name,
-                "score": round(c.score, 4),
-                "weight": c.weight,
-                "details": c.details,
-            }
-            for c in report.components
-        ],
-        "drift_events": [asdict(e) for e in report.drift_events],
-        "run_id": report.run_id,
-        "url": report.url,
-        "timestamp": report.timestamp.isoformat(),
-    })
+    return JSONResponse(
+        content={
+            "overall_score": report.overall_score,
+            "status": report.status,
+            "components": [
+                {
+                    "name": c.name,
+                    "score": round(c.score, 4),
+                    "weight": c.weight,
+                    "details": c.details,
+                }
+                for c in report.components
+            ],
+            "drift_events": [asdict(e) for e in report.drift_events],
+            "run_id": report.run_id,
+            "url": report.url,
+            "timestamp": report.timestamp.isoformat(),
+        }
+    )
 
 
 @router.get("/selectors/{url:path}")
@@ -214,6 +219,8 @@ async def get_selector_statuses(
     previous_tree = parse_to_tree(snapshots[1].normalized_html) if len(snapshots) > 1 else None
 
     statuses = track_selectors(current_tree, previous_tree, selector_list)
-    return JSONResponse(content={
-        "selector_statuses": [asdict(s) for s in statuses],
-    })
+    return JSONResponse(
+        content={
+            "selector_statuses": [asdict(s) for s in statuses],
+        }
+    )
@@ -15,6 +15,7 @@
 import urllib.request
 from datetime import UTC, datetime
 from pathlib import Path
+from typing import Any
 
 import click
 
@@ -33,7 +34,7 @@
 from scraperguard.storage.models import SnapshotMetadata
 
 
-def _fetch_url(url: str) -> tuple[str, int, dict, float]:
+def _fetch_url(url: str) -> tuple[str, int, dict[str, Any], float]:
     """Fetch a URL and return (html, status, headers, latency_ms)."""
     start = time.monotonic()
     req = urllib.request.Request(url, headers={"User-Agent": "ScraperGuard/1.0"})
@@ -55,7 +56,9 @@ def cli() -> None:
 @click.argument("target")
 @click.option("--schema", default=None, help="Path to a Python file with a BaseSchema subclass.")
 @click.option(
-    "--config", "config_path", default=None,
+    "--config",
+    "config_path",
+    default=None,
     help="Path to scraperguard.yaml config file.",
 )
 @click.option("--run-id", default=None, help="Run ID to group with (creates new if not provided).")
@@ -89,9 +92,9 @@ def run(
         # d) Get HTML and items
         url: str
         html: str
-        items: list[dict]
+        items: list[dict[str, Any]]
         http_status: int = 200
-        headers: dict = {}
+        headers: dict[str, Any] = {}
         latency_ms: float = 0.0
 
         if target.startswith("http://") or target.startswith("https://"):
@@ -148,11 +151,14 @@ def run(
             try:
                 schema_cls = load_schema_from_file(schema)
                 validation_result = schema_cls.validate_batch(
-                    items, run_id=run_meta.id, url=url,
+                    items,
+                    run_id=run_meta.id,
+                    url=url,
                 )
                 try:
                     drift_events = run_drift_analysis(
-                        validation_result, storage,
+                        validation_result,
+                        storage,
                         threshold=cfg.schema.null_drift_threshold,
                     )
                 except Exception as exc:
@@ -197,7 +203,8 @@ def run(
                         prev_snapshot_obj = s
                         break
                 if prev_snapshot_obj and should_diff(
-                    snapshot.fingerprint, prev_snapshot_obj.fingerprint,
+                    snapshot.fingerprint,
+                    prev_snapshot_obj.fingerprint,
                 ):
                     before_tree = parse_to_tree(prev_snapshot_obj.normalized_html)
                     after_tree = parse_to_tree(snapshot.normalized_html)
@@ -214,7 +221,8 @@ def run(
                         prev_snapshot_obj = s
                         break
                 if prev_snapshot_obj and should_diff(
-                    snapshot.fingerprint, prev_snapshot_obj.fingerprint,
+                    snapshot.fingerprint,
+                    prev_snapshot_obj.fingerprint,
                 ):
                     before_tree = parse_to_tree(prev_snapshot_obj.normalized_html)
                     after_tree = parse_to_tree(snapshot.normalized_html)
@@ -223,14 +231,16 @@ def run(
                 click.echo(f"Warning: DOM diff failed: {exc}", err=True)
 
         # i) Failure classification
-        classifications = classify_failure(ClassificationInput(
-            validation_result=validation_result,
-            dom_changes=dom_changes,
-            selector_statuses=selector_statuses,
-            raw_html=html,
-            http_status=http_status,
-            response_size_bytes=len(html.encode("utf-8")),
-        ))
+        classifications = classify_failure(
+            ClassificationInput(
+                validation_result=validation_result,
+                dom_changes=dom_changes,
+                selector_statuses=selector_statuses,
+                raw_html=html,
+                http_status=http_status,
+                response_size_bytes=len(html.encode("utf-8")),
+            )
+        )
 
         # j) Health score
         report = compute_health_score(
@@ -244,16 +254,21 @@ def run(
         )
 
         # k) Alerting
-        dispatchers = []
+        from scraperguard.alerts.base import AlertDispatcher
+
+        dispatchers: list[AlertDispatcher] = []
         if cfg.alerts.slack.enabled and cfg.alerts.slack.webhook:
             from scraperguard.alerts.slack import SlackDispatcher
+
             dispatchers.append(SlackDispatcher(cfg.alerts.slack.webhook))
         if cfg.alerts.webhook_url:
             from scraperguard.alerts.webhook import WebhookDispatcher
+
             dispatchers.append(WebhookDispatcher(cfg.alerts.webhook_url))
         if dispatchers:
             from scraperguard.alerts.dispatcher import AlertManager
             from scraperguard.alerts.models import Alert
+
             alert_mgr = AlertManager(dispatchers, cfg.alerts.thresholds)
             for c in classifications:
                 if c.severity in ("critical", "warning"):
@@ -483,7 +498,9 @@ def report(url: str, run_id: str | None, fmt: str) -> None:
         schema_compliance = comp_map.get("Schema Compliance", "")
         extraction_completeness = comp_map.get("Extraction Completeness", "")
         selector_stability = comp_map.get("Selector Stability", "")
-        click.echo("url,score,status,schema_compliance,extraction_completeness,selector_stability,timestamp")
+        click.echo(
+            "url,score,status,schema_compliance,extraction_completeness,selector_stability,timestamp"
+        )
         click.echo(
             f"{url},{health_report.overall_score},{health_report.status},"
             f"{schema_compliance},{extraction_completeness},{selector_stability},"
@@ -500,8 +517,7 @@ def serve(host: str, port: int) -> None:
         import uvicorn
     except ImportError:
         click.echo(
-            "Error: uvicorn not installed. "
-            "Install API dependencies: pip install scraperguard[api]",
+            "Error: uvicorn not installed. Install API dependencies: pip install scraperguard[api]",
             err=True,
         )
         raise SystemExit(1)