diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..e3dd09d --- /dev/null +++ b/.dockerignore @@ -0,0 +1,17 @@ +# The API image needs only pyproject/uv.lock + src/. Everything else is noise. +.git +.github +.venv +__pycache__ +**/__pycache__ +*.pyc +.pytest_cache +.mypy_cache +.ruff_cache +output +*.db +tests +docs +web +node_modules +*.md diff --git a/DEPLOY.md b/DEPLOY.md new file mode 100644 index 0000000..79b8646 --- /dev/null +++ b/DEPLOY.md @@ -0,0 +1,98 @@ +# Deploying the hosted report + +Two deployables: + +- **API** (FastAPI engine) → a container host. This guide uses **Fly.io** + (`Dockerfile` + `fly.toml` are ready); Railway works from the same Dockerfile. +- **Frontend** (`web/`, Next.js) → **Vercel**. + +Optional but recommended for production: + +- **Upstash Redis** — shared cache + per-IP throttle across instances. +- A **GitHub token** (PAT or GitHub App installation token) so the API runs on + the 5 000 req/hr authenticated limit instead of 60 req/hr. + +--- + +## 1. API → Fly.io + +```bash +# One-time +fly launch --no-deploy # or `fly apps create ghra-report-api` +fly volumes create ghra_data --region iad --size 1 # persists the waitlist DB + +# Secrets (never commit these; set them on Fly) +fly secrets set GHRA_GITHUB_TOKEN=ghp_xxx +fly secrets set GHRA_CORS_ORIGINS=https://your-frontend.vercel.app +# If using Upstash (see §3): +fly secrets set GHRA_REDIS_URL=rediss://default:xxx@xxx.upstash.io:6379 + +fly deploy +``` + +`fly.toml` already wires the health check (`GET /api/health`), the `/data` +volume mount, and the non-secret config (`GHRA_REPORT_TTL_SECONDS`, +`GHRA_RATE_LIMIT`, `GHRA_RATE_WINDOW_SECONDS`, `GHRA_WAITLIST_DB=/data/waitlist.db`). + +The container runs uvicorn with `--forwarded-allow-ips=*`, so behind Fly's proxy +the per-IP throttle keys on the real client address (no `GHRA_TRUST_FORWARDED_FOR` +needed). + +Verify: `curl https://ghra-report-api.fly.dev/api/health` → +`{"status":"ok","github_token":true}`. + +--- + +## 2. Frontend → Vercel + +```bash +cd web +vercel link +vercel env add NEXT_PUBLIC_API_BASE production # → https://ghra-report-api.fly.dev +vercel --prod +``` + +Set the project **Root Directory** to `web/` in Vercel (the repo root is the +Python engine). After the frontend URL is known, set it as `GHRA_CORS_ORIGINS` +on the API (§1) so the browser's cross-origin calls are allowed. + +> Vercel commit-author gotcha: if `vercel --prod` is blocked on the commit +> author, deploy from a git-free copy of `web/` and `vercel alias set`. + +--- + +## 3. Upstash Redis (production cache + throttle) + +Without `GHRA_REDIS_URL` the API uses an in-process store — correct, but +per-instance (cache and throttle don't share across machines). For more than one +instance, create an Upstash Redis database and set its `rediss://` URL as +`GHRA_REDIS_URL` (§1). The `hosting` extra (`redis`) is already installed in the +image. Any Redis server version works (the throttle uses plain `EXPIRE`). + +--- + +## 4. Environment reference + +| Variable | Where | Default | Purpose | +| -------------------------- | --------- | ------------------ | -------------------------------------------------- | +| `GHRA_GITHUB_TOKEN` | API | _(none)_ | Server token → 5 000 req/hr + GraphQL repo lists. | +| `GHRA_CORS_ORIGINS` | API | localhost:3000 | Comma-separated allowed browser origins. | +| `GHRA_REDIS_URL` | API | _(in-memory)_ | Upstash/Redis URL for shared cache + throttle. | +| `GHRA_REPORT_TTL_SECONDS` | API | `3600` | Report cache TTL. | +| `GHRA_RATE_LIMIT` | API | `20` | Requests per window per IP. | +| `GHRA_RATE_WINDOW_SECONDS` | API | `3600` | Throttle window. | +| `GHRA_WAITLIST_DB` | API | `/waitlist.db` | SQLite path (point at the mounted volume). | +| `GHRA_TRUST_FORWARDED_FOR` | API | off | Only if not using uvicorn `--forwarded-allow-ips`. | +| `NEXT_PUBLIC_API_BASE` | Frontend | localhost:8080 | API base URL the browser calls. | + +--- + +## 5. Notes & follow-ups + +- **Waitlist durability:** SQLite on the mounted Fly volume survives restarts. + For multi-instance writes, migrate the waitlist to Postgres (Neon) — only + `SqliteWaitlistStore` needs a sibling implementation behind the existing + `WaitlistStore` protocol. +- **Local parity:** run the API with + `uv run --extra serve python -m uvicorn --factory src.serve.app:create_app --port 8080` + and the frontend with `pnpm dev` in `web/`. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..cddefd9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ +# API server image for the hosted clone-free report (FastAPI engine). +# The Next.js frontend deploys separately to Vercel — see DEPLOY.md. +FROM python:3.12-slim + +# uv for fast, reproducible, lockfile-pinned installs. +COPY --from=ghcr.io/astral-sh/uv:0.5 /uv /bin/uv + +WORKDIR /app +ENV UV_COMPILE_BYTECODE=1 \ + UV_LINK_MODE=copy \ + PYTHONUNBUFFERED=1 + +# Install dependencies only (the app runs from the source tree via `src.*` +# imports, so the project itself isn't packaged). Cached unless deps change. +COPY pyproject.toml uv.lock ./ +RUN uv sync --frozen --no-install-project --no-dev --extra serve --extra hosting + +COPY src ./src + +EXPOSE 8080 +# Do not trust spoofable forwarded headers by default. Deployments behind a +# known proxy can opt in with GHRA_TRUST_FORWARDED_FOR and a platform-specific +# Uvicorn forwarded-allow-ips override. +CMD ["uv", "run", "--no-sync", "python", "-m", "uvicorn", \ + "--factory", "src.serve.app:create_app", \ + "--host", "0.0.0.0", "--port", "8080"] diff --git a/fly.toml b/fly.toml new file mode 100644 index 0000000..4220f3e --- /dev/null +++ b/fly.toml @@ -0,0 +1,39 @@ +# Fly.io config for the hosted report API. Rename `app` to your Fly app. +# Secrets (GHRA_GITHUB_TOKEN, GHRA_REDIS_URL, GHRA_CORS_ORIGINS) are set with +# `fly secrets set …`, NOT here. See DEPLOY.md. +app = "ghra-report-api" +primary_region = "iad" + +[build] + dockerfile = "Dockerfile" + +[env] + GHRA_REPORT_TTL_SECONDS = "3600" + GHRA_RATE_LIMIT = "20" + GHRA_RATE_WINDOW_SECONDS = "3600" + # Persisted on the mounted volume below (survives restarts/redeploys). + GHRA_WAITLIST_DB = "/data/waitlist.db" + +[http_service] + internal_port = 8080 + force_https = true + auto_stop_machines = "suspend" + auto_start_machines = true + min_machines_running = 0 + + [[http_service.checks]] + interval = "30s" + timeout = "5s" + grace_period = "10s" + method = "GET" + path = "/api/health" + +# Persistent volume for the SQLite waitlist. Create it once with: +# fly volumes create ghra_data --region iad --size 1 +[mounts] + source = "ghra_data" + destination = "/data" + +[[vm]] + size = "shared-cpu-1x" + memory = "512mb" diff --git a/pyproject.toml b/pyproject.toml index 7cbe7ef..e8a4ed6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,11 @@ serve = [ "jinja2>=3.1", "python-multipart>=0.0.9", ] +hosting = [ + # Optional: a shared Redis/Upstash backend for the hosted report cache and + # per-IP throttle. Without it, the in-memory backend is used (single-instance). + "redis>=5.0", +] build = [ "shiv>=1.0", "build>=1.0", diff --git a/src/api_checkout.py b/src/api_checkout.py new file mode 100644 index 0000000..e2f328c --- /dev/null +++ b/src/api_checkout.py @@ -0,0 +1,176 @@ +"""Materialize a sparse, API-sourced repo skeleton for clone-free scoring. + +The audit engine's analyzers read a repo from the local filesystem. To score an +arbitrary public GitHub user *without* cloning every repo (the hosted, multi-tenant +path), this module reconstructs a sparse on-disk skeleton from the GitHub API: + +* one Git Trees API call yields every path → directories are created and files are + ``touch``-ed so presence-based analyzers (structure, testing, CI, docs, build) + see the real shape of the repo; +* a bounded set of high-signal files (README, dependency manifests) are fetched via + the Contents API and written with real content, so content-based analyzers + (README quality, dependency counts, test-framework detection) still work. + +The existing analyzers run against this skeleton unmodified. ``materialize_api_workspace`` +mirrors ``cloner.clone_workspace`` exactly (context manager yielding ``{name: Path}``), +so it is a drop-in replacement for the clone step. + +Materialization is sequential on purpose: it keeps API access well under GitHub's +secondary rate limits (concurrent-request and points-per-minute caps) that a +parallel burst across many repos would trip. +""" + +from __future__ import annotations + +import logging +import tempfile +from contextlib import contextmanager +from pathlib import Path +from typing import TYPE_CHECKING, Callable, Generator + +from src.models import RepoMetadata + +if TYPE_CHECKING: + from src.github_client import GitHubClient + +logger = logging.getLogger(__name__) + +DEFAULT_MAX_FILES = 5000 +DEFAULT_MAX_CONTENT_FILES = 20 + +# Files whose *content* (not just presence) carries real scoring signal. Matched +# case-insensitively by basename; anything starting with ``readme`` also qualifies. +CONTENT_FILE_NAMES = { + "package.json", + "pyproject.toml", + "requirements.txt", + "setup.py", + "setup.cfg", + "pipfile", + "cargo.toml", + "go.mod", + "pom.xml", + "build.gradle", + "gemfile", + "composer.json", +} + + +def _is_content_file(path: str) -> bool: + base = path.rsplit("/", 1)[-1].lower() + return base.startswith("readme") or base in CONTENT_FILE_NAMES + + +def _safe_target(dest: Path, rel: str) -> Path | None: + """Resolve ``rel`` under ``dest``, rejecting traversal/absolute escapes. + + Tree paths come from arbitrary remote repos, so a malicious entry like + ``../../etc/passwd`` or ``/abs/evil`` must never resolve outside ``dest``. + """ + rel = rel.strip() + if not rel or rel in (".", "..") or "\x00" in rel: + return None + candidate = (dest / rel).resolve() + dest_resolved = dest.resolve() + if candidate == dest_resolved: + return None + if dest_resolved not in candidate.parents: + return None + return candidate + + +def materialize_api_checkout( + metadata: RepoMetadata, + client: "GitHubClient", + dest: Path, + *, + max_files: int = DEFAULT_MAX_FILES, + max_content_files: int = DEFAULT_MAX_CONTENT_FILES, +) -> Path: + """Build a sparse skeleton of one repo under ``dest`` from the GitHub API. + + Returns ``dest``. If the repo tree is expectedly unavailable (empty repo, + missing ref, private repo, gone), ``dest`` is created empty so downstream + analyzers score it as a near-empty repo rather than crashing. Transient, + rate-limit, and server errors propagate to the API boundary. + """ + dest = Path(dest) + dest.mkdir(parents=True, exist_ok=True) + + owner, _, repo = metadata.full_name.partition("/") + if not owner or not repo: + logger.warning( + "Cannot materialize %r: full_name is not 'owner/repo'", + metadata.full_name, + ) + return dest + + tree = client.get_repo_tree(owner, repo, metadata.default_branch) + if not tree.get("available"): + return dest + if tree.get("truncated"): + logger.warning( + "Tree truncated for %s — skeleton is incomplete", metadata.full_name + ) + + for rel in tree.get("dirs", []): + target = _safe_target(dest, rel) + if target is not None: + target.mkdir(parents=True, exist_ok=True) + + content_budget = max_content_files + for rel in tree.get("files", [])[:max_files]: + target = _safe_target(dest, rel) + if target is None: + continue + target.parent.mkdir(parents=True, exist_ok=True) + text = "" + if content_budget > 0 and _is_content_file(rel): + fetched = client.get_file_content( + owner, repo, rel, ref=metadata.default_branch + ) + if fetched is not None: + text = fetched + content_budget -= 1 + target.write_text(text, encoding="utf-8") + + return dest + + +@contextmanager +def materialize_api_workspace( + repos: list[RepoMetadata], + client: "GitHubClient", + *, + on_progress: Callable[[int, int, str], None] | None = None, + on_error: Callable[[str, str], None] | None = None, + max_files: int = DEFAULT_MAX_FILES, + max_content_files: int = DEFAULT_MAX_CONTENT_FILES, +) -> Generator[dict[str, Path], None, None]: + """Materialize API skeletons for many repos into a session-unique temp dir. + + Drop-in replacement for ``cloner.clone_workspace``: yields a dict mapping + repo name → skeleton path. A repo that fails to materialize is skipped with + a warning so one bad repo never aborts a portfolio scan. + """ + with tempfile.TemporaryDirectory(prefix="audit-api-") as tmpdir: + root = Path(tmpdir) + workspace: dict[str, Path] = {} + total = len(repos) + for index, repo in enumerate(repos, 1): + if on_progress: + on_progress(index, total, repo.name) + try: + dest = materialize_api_checkout( + repo, + client, + root / repo.name, + max_files=max_files, + max_content_files=max_content_files, + ) + workspace[repo.name] = dest + except Exception as exc: # noqa: BLE001 — one bad repo must not abort the scan + logger.warning("API checkout failed for %s: %s", repo.name, exc) + if on_error: + on_error(repo.name, str(exc)) + yield workspace diff --git a/src/api_only.py b/src/api_only.py new file mode 100644 index 0000000..0f06eb4 --- /dev/null +++ b/src/api_only.py @@ -0,0 +1,223 @@ +"""Clone-free portfolio scoring from the GitHub API alone. + +Lists a user's repos, materializes a sparse API-sourced skeleton for each +(``api_checkout``), runs the *existing, unmodified* analyzer engine against the +skeleton, and scores with ``scorer.score_repo`` — producing a portfolio report +without cloning any repository. + +This is the engine behind the hosted "paste your GitHub username" report. The +result is honestly labelled API-only: structure / testing / CI / docs / README / +dependency presence are recovered from the API, but deep code-quality, +secret-scanning, and dependency-age signals require the full local scan (the OSS +CLI). Security scoring runs offline by default because GitHub Advanced Security +endpoints are not readable on other users' repositories. +""" + +from __future__ import annotations + +import logging +import tempfile +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING, cast + +import requests + +from src.analyzers import run_all_analyzers +from src.api_checkout import materialize_api_checkout +from src.graphql_client import bulk_fetch_repos +from src.models import RepoAudit, RepoMetadata +from src.scorer import score_repo + +if TYPE_CHECKING: + from src.github_client import GitHubClient + +logger = logging.getLogger(__name__) + +# The hosted scan is authenticated (high rate limit) and uses no shared response +# cache, so per-repo work (materialize + analyze + score) runs concurrently. Each +# repo writes to its own temp subdir and the requests.Session is thread-safe. +DEFAULT_SCAN_WORKERS = 8 + +API_ONLY_MODE = "api_only" +API_ONLY_FIDELITY_NOTE = ( + "API-only scan: scored from GitHub API metadata and repository structure " + "without cloning. Deep code-quality, secret-scanning, and dependency-age " + "signals require the full local scan (OSS CLI)." +) + + +class _InteractiveClient: + """Wrap a GitHubClient to skip GitHub's async-computed ``stats/*`` endpoints. + + ``stats/contributors``, ``stats/commit_activity`` and ``stats/participation`` + return ``202 Accepted`` while GitHub computes them, and the client retries + with multi-second backoff — fine for a batch CLI run, far too slow for an + interactive hosted report (it dominated a 5-repo live scan at ~100s). The + analyzers already treat these as "unavailable" (empty list / dict), so scores + degrade gracefully rather than break. Every other method delegates unchanged. + """ + + def __init__(self, inner: GitHubClient) -> None: + self._inner = inner + + def get_contributor_stats(self, *args, **kwargs) -> list: + return [] + + def get_commit_activity(self, *args, **kwargs) -> list: + return [] + + def get_participation_stats(self, *args, **kwargs) -> dict: + return {} + + def __getattr__(self, name: str): + return getattr(self._inner, name) + + +def _portfolio_lang_freq(repos: list[RepoMetadata]) -> dict[str, float]: + """Fraction of repos using each primary language (for novelty discounting).""" + counts: dict[str, int] = {} + for repo in repos: + if repo.language: + counts[repo.language] = counts.get(repo.language, 0) + 1 + total = sum(counts.values()) + if not total: + return {} + return {lang: n / total for lang, n in counts.items()} + + +def score_repos_api_only( + repos: list[RepoMetadata], + client: GitHubClient, + *, + portfolio_lang_freq: dict[str, float] | None = None, + security_offline: bool = True, + fast: bool = True, + max_workers: int = DEFAULT_SCAN_WORKERS, +) -> list[RepoAudit]: + """Score a list of repos from the API alone, returning one audit per repo. + + ``fast`` (default) skips GitHub's slow async ``stats/*`` endpoints so the + scan stays interactive; pass ``fast=False`` for a thorough scan that includes + contributor/commit-activity stats. Per-repo work runs concurrently across + ``max_workers`` threads (see ``DEFAULT_SCAN_WORKERS``). A repo that fails to + materialize or score is skipped with a warning so one bad repo never aborts + the portfolio scan; result order follows the input order. + """ + if not repos: + return [] + if portfolio_lang_freq is None: + portfolio_lang_freq = _portfolio_lang_freq(repos) + + scan_client = cast("GitHubClient", _InteractiveClient(client)) if fast else client + + def _score_one(repo: RepoMetadata, root: Path) -> RepoAudit | None: + try: + repo_path = materialize_api_checkout(repo, scan_client, root / repo.name) + results = run_all_analyzers(repo_path, repo, scan_client) + return score_repo( + repo, + results, + repo_path=repo_path, + portfolio_lang_freq=portfolio_lang_freq, + github_client=scan_client, + security_offline=security_offline, + ) + except requests.HTTPError: + raise + except requests.RequestException: + raise + except Exception as exc: # noqa: BLE001 — one bad repo must not abort the scan + logger.warning( + "API-only scoring failed for one repo (%s)", type(exc).__name__ + ) + return None + + workers = max(1, min(max_workers, len(repos))) + with tempfile.TemporaryDirectory(prefix="audit-api-") as tmpdir: + root = Path(tmpdir) + with ThreadPoolExecutor(max_workers=workers) as pool: + audits = list(pool.map(lambda repo: _score_one(repo, root), repos)) + return [audit for audit in audits if audit is not None] + + +@dataclass +class ApiOnlyReport: + """A clone-free portfolio report, ready for JSON serialization.""" + + username: str + audits: list[RepoAudit] + mode: str = API_ONLY_MODE + fidelity_note: str = API_ONLY_FIDELITY_NOTE + + def to_dict(self) -> dict: + return { + "username": self.username, + "mode": self.mode, + "fidelity_note": self.fidelity_note, + "repo_count": len(self.audits), + "repos": [audit.to_dict() for audit in self.audits], + } + + +def _list_user_repos(username: str, client: GitHubClient) -> list[dict]: + """List a user's repos, preferring GraphQL when a token is available. + + GraphQL fetches the whole repo list (and per-repo language byte breakdowns) + in one paginated query, so it's both cheaper on the rate limit and higher + fidelity than REST. It requires auth, so without a token — or if the query + fails / returns no user — we fall back to REST ``list_repos``, which also + yields a clean 404 for an unknown user (GraphQL returns ``user: null``). + """ + token = getattr(client, "token", None) + if token: + try: + repos = bulk_fetch_repos(username, token, on_progress=lambda *_: None) + except (requests.RequestException, KeyError, TypeError) as exc: + logger.warning( + "GraphQL repo-list failed (%s); falling back to REST", + type(exc).__name__, + ) + else: + if repos: + return repos + return client.list_repos(username) + + +def _select_repos(repos: list[RepoMetadata], limit: int | None) -> list[RepoMetadata]: + """Pick the most report-worthy repos when a user has more than ``limit``. + + Ranks original, active work ahead of forks and archives, then by recency and + stars — so a prolific account's report showcases their best/current repos + rather than an arbitrary slice, and the scan stays bounded. + """ + if limit is None or len(repos) <= limit: + return repos + + def rank(repo: RepoMetadata) -> tuple[bool, bool, float, int]: + pushed = repo.pushed_at.timestamp() if repo.pushed_at else 0.0 + return (not repo.fork, not repo.archived, pushed, repo.stars) + + return sorted(repos, key=rank, reverse=True)[:limit] + + +def audit_user_api_only( + username: str, + client: GitHubClient, + *, + max_repos: int | None = None, + fast: bool = True, +) -> ApiOnlyReport: + """List a user's repos and score them clone-free via the GitHub API.""" + raw = _list_user_repos(username, client) + + # GraphQL supplies a `_languages` byte breakdown per repo; REST does not, so + # `metadata.languages` is populated only on the GraphQL path. Either way the + # primary `language` field drives scoring; the breakdown sharpens it. + repos = [ + RepoMetadata.from_api_response(data, data.get("_languages")) for data in raw + ] + repos = _select_repos(repos, max_repos) + audits = score_repos_api_only(repos, client, fast=fast) + return ApiOnlyReport(username=username, audits=audits) diff --git a/src/github_client.py b/src/github_client.py index 7790dbb..dd8fd33 100644 --- a/src/github_client.py +++ b/src/github_client.py @@ -1,10 +1,12 @@ from __future__ import annotations +import base64 import logging import re import sys import time from collections.abc import Callable +from urllib.parse import quote import requests from requests.adapters import HTTPAdapter @@ -23,6 +25,11 @@ } REST_API_VERSION = "2026-03-10" EXPECTED_SECURITY_ENDPOINT_UNAVAILABLE_STATUSES = {403, 404} +# Statuses that mean "no readable tree" (missing/private repo, empty repo, gone, +# legal hold) — silent fail-soft. Anything else (auth, rate-limit, 5xx) propagates. +EXPECTED_TREE_UNAVAILABLE_STATUSES = {404, 409, 410, 451} +# Contents API: 404 = missing file (expected). Other statuses are logged. +EXPECTED_CONTENT_UNAVAILABLE_STATUSES = {404} class GitHubClientError(Exception): @@ -104,7 +111,9 @@ def _request_method( json_body: dict | list | None = None, ) -> requests.Response: """Make a non-GET request with the same rate-limit handling.""" - response = self.session.request(method, url, params=params, json=json_body, timeout=30) + response = self.session.request( + method, url, params=params, json=json_body, timeout=30 + ) self._check_rate_limit(response) response.raise_for_status() return response @@ -172,7 +181,9 @@ def _fetch_json(self, url: str, params: dict | None = None) -> object: return data - def _fetch_json_with_202_retry(self, url: str, params: dict | None = None) -> object: + def _fetch_json_with_202_retry( + self, url: str, params: dict | None = None + ) -> object: """Fetch JSON with 202 retry, checking cache first.""" if self.cache: cached = self.cache.get(url, params) @@ -216,9 +227,13 @@ def get_community_profile(self, owner: str, repo: str) -> dict: Single API call returns presence of all health files. """ try: - return self._fetch_json(f"{API_BASE}/repos/{owner}/{repo}/community/profile") + return self._fetch_json( + f"{API_BASE}/repos/{owner}/{repo}/community/profile" + ) except requests.HTTPError as exc: - logger.warning("Failed to fetch community profile for %s/%s: %s", owner, repo, exc) + logger.warning( + "Failed to fetch community profile for %s/%s: %s", owner, repo, exc + ) return {} def get_participation_stats(self, owner: str, repo: str) -> dict: @@ -231,7 +246,9 @@ def get_participation_stats(self, owner: str, repo: str) -> dict: f"{API_BASE}/repos/{owner}/{repo}/stats/participation" ) except requests.HTTPError as exc: - logger.warning("Failed to fetch participation for %s/%s: %s", owner, repo, exc) + logger.warning( + "Failed to fetch participation for %s/%s: %s", owner, repo, exc + ) return {} def get_authenticated_user(self) -> str | None: @@ -260,7 +277,9 @@ def _repo_list_cache_scope(self, username: str) -> str: def list_repos(self, username: str) -> list[dict]: """Fetch all repos for a user. Uses /user/repos for the authenticated user.""" # Check cache for the complete repo list - cache_key = f"{API_BASE}/list_repos/{username}/{self._repo_list_cache_scope(username)}" + cache_key = ( + f"{API_BASE}/list_repos/{username}/{self._repo_list_cache_scope(username)}" + ) if self.cache: cached = self.cache.get(cache_key) if cached is not None: @@ -294,7 +313,9 @@ def get_languages(self, owner: str, repo: str) -> dict[str, int]: logger.warning("Failed to fetch languages for %s/%s: %s", owner, repo, exc) return {} - def get_releases(self, owner: str, repo: str, per_page: int = 10) -> tuple[list[dict], bool]: + def get_releases( + self, owner: str, repo: str, per_page: int = 10 + ) -> tuple[list[dict], bool]: """Fetch releases for a repo. Returns a (releases, available) tuple. @@ -311,7 +332,9 @@ def get_releases(self, owner: str, repo: str, per_page: int = 10) -> tuple[list[ except requests.HTTPError as exc: status = self._http_error_status(exc) if status == 404: - logger.debug("Releases endpoint unavailable for %s/%s (404)", owner, repo) + logger.debug( + "Releases endpoint unavailable for %s/%s (404)", owner, repo + ) return [], False logger.warning("Failed to fetch releases for %s/%s: %s", owner, repo, exc) return [], True @@ -377,7 +400,9 @@ def get_repo_security_and_analysis(self, owner: str, repo: str) -> dict: } except requests.HTTPError as exc: status = self._http_error_status(exc) - logger.warning("Failed to fetch repo security metadata for %s/%s: %s", owner, repo, exc) + logger.warning( + "Failed to fetch repo security metadata for %s/%s: %s", owner, repo, exc + ) return { "available": False, "http_status": status, @@ -425,9 +450,7 @@ def get_code_scanning_alert_count(self, owner: str, repo: str) -> dict: for alert in alerts: rule = alert.get("rule", {}) if isinstance(alert, dict) else {} raw = ( - rule.get("security_severity_level") - or rule.get("severity") - or "" + rule.get("security_severity_level") or rule.get("severity") or "" ).lower() rule_id = str(rule.get("id") or "") if raw == "critical": @@ -472,7 +495,9 @@ def get_code_scanning_alert_count(self, owner: str, repo: str) -> dict: def get_sbom_exportability(self, owner: str, repo: str) -> dict: """Check whether the SBOM export endpoint is available for a repo.""" try: - data = self._fetch_json(f"{API_BASE}/repos/{owner}/{repo}/dependency-graph/sbom") + data = self._fetch_json( + f"{API_BASE}/repos/{owner}/{repo}/dependency-graph/sbom" + ) payload = data if isinstance(data, dict) else {} packages = payload.get("sbom", {}).get("packages", []) return { @@ -482,7 +507,9 @@ def get_sbom_exportability(self, owner: str, repo: str) -> dict: } except requests.HTTPError as exc: status = self._http_error_status(exc) - logger.warning("Failed to fetch SBOM exportability for %s/%s: %s", owner, repo, exc) + logger.warning( + "Failed to fetch SBOM exportability for %s/%s: %s", owner, repo, exc + ) return { "available": False, "http_status": status, @@ -531,7 +558,9 @@ def get_dependency_sbom(self, owner: str, repo: str) -> dict: logger.warning("Failed to fetch SBOM for %s/%s: %s", owner, repo, exc) return {"available": False, "http_status": status, "reason": str(exc)} except requests.RequestException as exc: - logger.warning("Network error fetching SBOM for %s/%s: %s", owner, repo, exc) + logger.warning( + "Network error fetching SBOM for %s/%s: %s", owner, repo, exc + ) return {"available": False, "http_status": None, "reason": str(exc)} payload = data if isinstance(data, dict) else {} @@ -662,6 +691,95 @@ def get_file_sha( return None raise + def get_repo_tree( + self, + owner: str, + repo: str, + ref: str, + ) -> dict: + """List every file and directory path in a repo via the Git Trees API. + + A single recursive call returns the whole tree, powering clone-free + structure / testing / CI / docs presence signals for API-only scoring. + Fails soft (``available=False``) when the tree is unreadable (missing or + private repo, empty repo, gone). Unexpected statuses (auth, rate-limit, + 5xx) propagate so hosted reports can return 429/502 instead of caching + an inaccurate empty skeleton. + """ + encoded_ref = quote(ref, safe="") + url = f"{API_BASE}/repos/{owner}/{repo}/git/trees/{encoded_ref}" + try: + data = self._fetch_json(url, params={"recursive": "1"}) + except requests.HTTPError as exc: + status = self._http_error_status(exc) + if status in EXPECTED_TREE_UNAVAILABLE_STATUSES: + return { + "available": False, + "files": [], + "dirs": [], + "truncated": False, + } + raise + + entries = data.get("tree", []) if isinstance(data, dict) else [] + files = [ + e["path"] for e in entries if e.get("type") == "blob" and e.get("path") + ] + dirs = [e["path"] for e in entries if e.get("type") == "tree" and e.get("path")] + truncated = ( + bool(data.get("truncated", False)) if isinstance(data, dict) else False + ) + return { + "available": True, + "files": files, + "dirs": dirs, + "truncated": truncated, + } + + def get_file_content( + self, + owner: str, + repo: str, + path: str, + *, + ref: str | None = None, + max_bytes: int = 1_000_000, + ) -> str | None: + """Fetch and base64-decode a single file's text via the Contents API. + + Returns ``None`` when the file is missing (404), larger than + ``max_bytes`` (the Contents API returns empty content above ~1MB and + directs callers to the blob API), non-base64, or not valid UTF-8 text. + """ + url = f"{API_BASE}/repos/{owner}/{repo}/contents/{path}" + params: dict = {} + if ref is not None: + params["ref"] = ref + try: + data = self._fetch_json(url, params=params or None) + except requests.HTTPError as exc: + status = self._http_error_status(exc) + if status not in EXPECTED_CONTENT_UNAVAILABLE_STATUSES: + logger.warning( + "Failed to fetch %s from %s/%s (HTTP %s)", path, owner, repo, status + ) + return None + + if not isinstance(data, dict) or data.get("type") != "file": + return None + if int(data.get("size", 0)) > max_bytes: + return None + if data.get("encoding") != "base64": + return None + try: + decoded = base64.b64decode(data.get("content") or "") + except ValueError: + return None + try: + return decoded.decode("utf-8") + except UnicodeDecodeError: + return None + def update_repo_file( self, owner: str, @@ -690,14 +808,18 @@ def update_repo_file( } except requests.HTTPError as exc: status = self._http_error_status(exc) - logger.warning("Failed to update file %s in %s/%s: %s", path, owner, repo, exc) + logger.warning( + "Failed to update file %s in %s/%s: %s", path, owner, repo, exc + ) return { "ok": False, "http_status": status, "sha": "", } - def list_repo_issues(self, owner: str, repo: str, state: str = "open") -> list[dict]: + def list_repo_issues( + self, owner: str, repo: str, state: str = "open" + ) -> list[dict]: """List repository issues for managed issue reconciliation.""" try: return self._paginate( @@ -733,7 +855,9 @@ def create_issue(self, owner: str, repo: str, payload: dict) -> dict: "node_id": None, } - def update_issue(self, owner: str, repo: str, issue_number: int, payload: dict) -> dict: + def update_issue( + self, owner: str, repo: str, issue_number: int, payload: dict + ) -> dict: """Update an existing managed issue.""" try: response = self._request_method( @@ -752,7 +876,9 @@ def update_issue(self, owner: str, repo: str, issue_number: int, payload: dict) except requests.HTTPError as exc: status = self._http_error_status(exc) logger.warning( - "Failed to update issue %s for %s/%s: %s", issue_number, owner, repo, exc + "Failed to update issue %s (HTTP %s)", + issue_number, + status, ) return { "ok": False, @@ -764,7 +890,9 @@ def update_issue(self, owner: str, repo: str, issue_number: int, payload: dict) def get_repo_custom_property_values(self, owner: str, repo: str) -> dict: """Get current repository custom property values when available.""" try: - data = self._fetch_json(f"{API_BASE}/repos/{owner}/{repo}/properties/values") + data = self._fetch_json( + f"{API_BASE}/repos/{owner}/{repo}/properties/values" + ) values = {} if isinstance(data, list): for item in data: @@ -775,7 +903,9 @@ def get_repo_custom_property_values(self, owner: str, repo: str) -> dict: } except requests.HTTPError as exc: status = self._http_error_status(exc) - logger.warning("Failed to fetch custom properties for %s/%s: %s", owner, repo, exc) + logger.warning( + "Failed to fetch custom properties for %s/%s: %s", owner, repo, exc + ) return { "available": False, "http_status": status, @@ -792,7 +922,9 @@ def list_org_custom_properties(self, owner: str) -> dict: } except requests.HTTPError as exc: status = self._http_error_status(exc) - logger.warning("Failed to list custom property schema for %s: %s", owner, exc) + logger.warning( + "Failed to list custom property schema for %s: %s", owner, exc + ) return { "available": False, "http_status": status, @@ -818,7 +950,9 @@ def update_repo_custom_property_values( for item in schema.get("properties", []) if item.get("property_name") } - to_update = {name: value for name, value in properties.items() if name in allowed} + to_update = { + name: value for name, value in properties.items() if name in allowed + } before = self.get_repo_custom_property_values(owner, repo) if not to_update: return { @@ -831,7 +965,8 @@ def update_repo_custom_property_values( payload = { "properties": [ - {"property_name": name, "value": value} for name, value in to_update.items() + {"property_name": name, "value": value} + for name, value in to_update.items() ] } try: @@ -851,7 +986,9 @@ def update_repo_custom_property_values( } except requests.HTTPError as exc: status = self._http_error_status(exc) - logger.warning("Failed to update custom properties for %s/%s: %s", owner, repo, exc) + logger.warning( + "Failed to update custom properties for %s/%s: %s", owner, repo, exc + ) return { "ok": False, "status": "failed", @@ -920,10 +1057,15 @@ def get_project_v2(self, owner: str, project_number: int) -> dict: } """ try: - data = self._graphql_query(query, {"login": owner, "number": int(project_number)}) + data = self._graphql_query( + query, {"login": owner, "number": int(project_number)} + ) except (requests.HTTPError, GitHubClientError) as exc: logger.warning( - "Failed to resolve GitHub Project %s #%s: %s", owner, project_number, exc + "Failed to resolve GitHub Project %s #%s: %s", + owner, + project_number, + exc, ) return { "available": False, @@ -932,9 +1074,9 @@ def get_project_v2(self, owner: str, project_number: int) -> dict: "fields": {}, } - project = (data.get("user") or {}).get("projectV2") or (data.get("organization") or {}).get( - "projectV2" - ) + project = (data.get("user") or {}).get("projectV2") or ( + data.get("organization") or {} + ).get("projectV2") if not isinstance(project, dict): return { "available": False, @@ -1006,7 +1148,9 @@ def find_project_v2_item_by_issue( after: str | None = None try: while True: - data = self._graphql_query(query, {"projectId": project_id, "after": after}) + data = self._graphql_query( + query, {"projectId": project_id, "after": after} + ) items = ((data.get("node") or {}).get("items") or {}).get("nodes") or [] for item in items: content = (item or {}).get("content") or {} @@ -1021,13 +1165,17 @@ def find_project_v2_item_by_issue( "issue_url": content.get("url", ""), }, } - page_info = ((data.get("node") or {}).get("items") or {}).get("pageInfo") or {} + page_info = ((data.get("node") or {}).get("items") or {}).get( + "pageInfo" + ) or {} if not page_info.get("hasNextPage"): break after = page_info.get("endCursor") except (requests.HTTPError, GitHubClientError) as exc: logger.warning( - "Failed to inspect GitHub Project item for issue %s: %s", issue_node_id, exc + "Failed to inspect GitHub Project item for issue %s: %s", + issue_node_id, + exc, ) return {"available": False, "item": None} return {"available": True, "item": None} @@ -1063,7 +1211,9 @@ def find_project_v2_item_by_id(self, project_id: str, item_id: str) -> dict: after: str | None = None try: while True: - data = self._graphql_query(query, {"projectId": project_id, "after": after}) + data = self._graphql_query( + query, {"projectId": project_id, "after": after} + ) items = ((data.get("node") or {}).get("items") or {}).get("nodes") or [] for item in items: if (item or {}).get("id") == item_id: @@ -1078,7 +1228,9 @@ def find_project_v2_item_by_id(self, project_id: str, item_id: str) -> dict: "issue_url": content.get("url", ""), }, } - page_info = ((data.get("node") or {}).get("items") or {}).get("pageInfo") or {} + page_info = ((data.get("node") or {}).get("items") or {}).get( + "pageInfo" + ) or {} if not page_info.get("hasNextPage"): break after = page_info.get("endCursor") @@ -1110,7 +1262,10 @@ def add_issue_to_project_v2(self, project_id: str, issue_node_id: str) -> dict: } except (requests.HTTPError, GitHubClientError) as exc: logger.warning( - "Failed to add issue %s to project %s: %s", issue_node_id, project_id, exc + "Failed to add issue %s to project %s: %s", + issue_node_id, + project_id, + exc, ) return {"ok": False, "status": "failed", "item_id": ""} @@ -1175,7 +1330,10 @@ def update_project_v2_item_field( return {"ok": True, "status": "updated"} except (requests.HTTPError, GitHubClientError) as exc: logger.warning( - "Failed to update project field %s on item %s: %s", field_id, item_id, exc + "Failed to update project field %s on item %s: %s", + field_id, + item_id, + exc, ) return {"ok": False, "status": "failed"} @@ -1215,7 +1373,9 @@ def get_commit_activity(self, owner: str, repo: str) -> list[dict]: ) return data if isinstance(data, list) else [] except requests.HTTPError as exc: - logger.warning("Failed to fetch commit activity for %s/%s: %s", owner, repo, exc) + logger.warning( + "Failed to fetch commit activity for %s/%s: %s", owner, repo, exc + ) return [] def get_contributor_stats(self, owner: str, repo: str) -> list[dict]: @@ -1229,7 +1389,9 @@ def get_contributor_stats(self, owner: str, repo: str) -> list[dict]: ) return data if isinstance(data, list) else [] except requests.HTTPError as exc: - logger.warning("Failed to fetch contributor stats for %s/%s: %s", owner, repo, exc) + logger.warning( + "Failed to fetch contributor stats for %s/%s: %s", owner, repo, exc + ) return [] def get_repo_metadata( diff --git a/src/serve/api.py b/src/serve/api.py new file mode 100644 index 0000000..0fb1776 --- /dev/null +++ b/src/serve/api.py @@ -0,0 +1,220 @@ +"""Hosted clone-free report endpoint — the HTTP surface over ``audit_user_api_only``. + +Exposes ``GET /api/report/{username}`` returning :meth:`ApiOnlyReport.to_dict` +JSON. This is the free "paste your GitHub username" report's backend: it lists a +user's repos and scores them from the GitHub API alone (no cloning), via the +existing engine in :mod:`src.api_only`. + +The route is defined as a plain ``def`` so FastAPI runs the blocking, +network-bound scan in a worker thread rather than on the event loop. The +:class:`~src.github_client.GitHubClient` is supplied through a FastAPI +dependency so tests can override it and a deployment can inject a shared +server-side token. +""" + +from __future__ import annotations + +import os +from typing import Any + +import requests +from fastapi import APIRouter, Depends, HTTPException, Path, Request +from pydantic import BaseModel, Field + +from src.api_only import audit_user_api_only +from src.github_client import GitHubClient, GitHubClientError +from src.serve.hosting import RateLimiter, ReportCache +from src.serve.runner import validate_username +from src.serve.waitlist import WaitlistStore, is_valid_email + +router = APIRouter(prefix="/api", tags=["report"]) + +# Bound the interactive scan: score the top-ranked repos (by recency/stars) so a +# prolific account stays fast and the report leads with the user's best work. +MAX_REPOS_CAP = 20 + +# Env var for the shared server-side GitHub App / PAT token. Absent in tests +# (the dependency is overridden) and acceptable locally (public, unauthenticated +# requests still work, just at a lower rate limit). +TOKEN_ENV_VAR = "GHRA_GITHUB_TOKEN" + +# When set, trust X-Forwarded-For for the throttle key (a trusted proxy is in +# front). Default off — XFF is spoofable, so we key on the direct peer instead. +TRUST_FORWARDED_ENV_VAR = "GHRA_TRUST_FORWARDED_FOR" + +# Comma-separated allowed CORS origins for the browser frontend. Defaults to the +# local Next.js dev server; set to the deployed origin (or "*") in production. +CORS_ORIGINS_ENV_VAR = "GHRA_CORS_ORIGINS" +DEFAULT_CORS_ORIGINS = ("http://localhost:3000", "http://127.0.0.1:3000") + + +def cors_origins() -> list[str]: + """Resolve allowed CORS origins from env, falling back to the dev server.""" + raw = os.environ.get(CORS_ORIGINS_ENV_VAR, "").strip() + if not raw: + return list(DEFAULT_CORS_ORIGINS) + return [origin.strip() for origin in raw.split(",") if origin.strip()] + + +def get_github_client() -> GitHubClient: + """Provide a GitHubClient for the request (overridable in tests/deploys). + + A fresh client (and ``requests.Session``) is built per request on purpose: + the route runs in FastAPI's threadpool, so a shared Session would be touched + by concurrent worker threads. Connection-pool reuse via a shared server-side + client is a future optimization; the report cache and per-IP throttle below + already bound how often this client actually reaches GitHub. + """ + return GitHubClient(token=os.environ.get(TOKEN_ENV_VAR)) + + +def get_report_cache(request: Request) -> ReportCache: + """Return the app-wide report cache (built once in the app factory).""" + return request.app.state.report_cache + + +def get_rate_limiter(request: Request) -> RateLimiter: + """Return the app-wide per-IP rate limiter (built once in the app factory).""" + return request.app.state.rate_limiter + + +def _trust_forwarded_for() -> bool: + return os.environ.get(TRUST_FORWARDED_ENV_VAR, "").strip().lower() in { + "1", + "true", + "yes", + } + + +def client_ip(request: Request) -> str: + """Best-effort client IP used as the throttle key. + + X-Forwarded-For is client-spoofable, so honoring it blindly would let a + caller pick a fresh throttle bucket per request. We only trust it when + GHRA_TRUST_FORWARDED_FOR is set — i.e. a known proxy that overwrites the + header sits in front. Otherwise we use the direct peer address. (When the + ASGI transport supplies no client, all such requests share one bucket.) + """ + if _trust_forwarded_for(): + forwarded = request.headers.get("x-forwarded-for") + if forwarded: + return forwarded.split(",")[0].strip() + return request.client.host if request.client else "unknown" + + +def get_waitlist_store(request: Request) -> WaitlistStore: + """Return the app-wide waitlist store (built once in the app factory).""" + return request.app.state.waitlist_store + + +def _is_rate_limited(status: int | None, response: requests.Response | None) -> bool: + """True when a GitHub error is rate-limiting (429, or 403 with quota at 0).""" + if status == 429: + return True + if status == 403 and response is not None: + return response.headers.get("X-RateLimit-Remaining") == "0" + return False + + +def _http_exception(exc: requests.HTTPError, username: str) -> HTTPException: + """Map a GitHub HTTP error onto the endpoint's client-facing status.""" + response = getattr(exc, "response", None) + status = getattr(response, "status_code", None) + if status == 404: + return HTTPException( + status_code=404, detail=f"GitHub user '{username}' not found" + ) + if _is_rate_limited(status, response): + return HTTPException( + status_code=429, detail="GitHub rate limit reached; try again later" + ) + if status == 403: + return HTTPException( + status_code=403, detail="GitHub denied access to this resource" + ) + return HTTPException(status_code=502, detail="Upstream GitHub error") + + +@router.get("/health") +def health( + client: GitHubClient = Depends(get_github_client), +) -> dict[str, Any]: + """Liveness/readiness probe for the deployment platform. + + Reports whether a server-side GitHub token is configured — without it the + endpoint runs on the unauthenticated 60 req/hr limit and degrades fast. + """ + return {"status": "ok", "github_token": bool(getattr(client, "token", None))} + + +@router.get("/report/{username}") +def report( + request: Request, + username: str = Path(..., description="GitHub username or org name"), + client: GitHubClient = Depends(get_github_client), + cache: ReportCache = Depends(get_report_cache), + limiter: RateLimiter = Depends(get_rate_limiter), +) -> dict[str, Any]: + """Score a user's portfolio clone-free and return the report as JSON. + + Always scans up to ``MAX_REPOS_CAP`` repos; there is no per-request repo + knob, so a username fully determines the cached report. + """ + # Throttle first — cheap, and it covers cache hits and garbage input alike. + if not limiter.allow(client_ip(request)): + raise HTTPException( + status_code=429, detail="Rate limit exceeded; try again later" + ) + + try: + safe_username = validate_username(username) + except ValueError as exc: + raise HTTPException(status_code=422, detail=str(exc)) from exc + + cached = cache.get(safe_username) + if cached is not None: + return cached + + try: + result = audit_user_api_only( + safe_username, client, max_repos=MAX_REPOS_CAP, fast=True + ) + except requests.HTTPError as exc: + raise _http_exception(exc, safe_username) from exc + except (requests.RequestException, GitHubClientError) as exc: + # Network failures (DNS, timeout, connection reset) and non-HTTP client + # errors surface as a clean 502 rather than an unstructured 500. + raise HTTPException(status_code=502, detail="Upstream GitHub error") from exc + + payload = result.to_dict() + cache.put(safe_username, payload) + return payload + + +class WaitlistSignup(BaseModel): + """Body for the monitoring-waitlist capture.""" + + email: str = Field(..., max_length=254) # RFC 5321 max; matches is_valid_email + # Optional context — e.g. the username whose report the visitor was viewing. + source: str | None = Field(default=None, max_length=120) + + +@router.post("/waitlist", status_code=201) +def join_waitlist( + request: Request, + signup: WaitlistSignup, + store: WaitlistStore = Depends(get_waitlist_store), + limiter: RateLimiter = Depends(get_rate_limiter), +) -> dict[str, Any]: + """Capture an email for the monitoring waitlist (idempotent on email).""" + # Separate throttle bucket so browsing reports never exhausts signup budget. + if not limiter.allow(client_ip(request), bucket="waitlist"): + raise HTTPException( + status_code=429, detail="Rate limit exceeded; try again later" + ) + if not is_valid_email(signup.email): + raise HTTPException(status_code=422, detail="Enter a valid email address") + + created = store.add(signup.email, source=signup.source) + # Idempotent: a repeat email is a success, not an error. + return {"status": "joined" if created else "already_joined"} diff --git a/src/serve/app.py b/src/serve/app.py index 7b026cf..7a60d53 100644 --- a/src/serve/app.py +++ b/src/serve/app.py @@ -8,9 +8,18 @@ def create_app(output_dir: Path | None = None) -> "FastAPI": # noqa: F821 """Create and configure the FastAPI application.""" from fastapi import FastAPI + from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles + from src.serve.api import cors_origins + from src.serve.api import router as api_router + from src.serve.hosting import ( + build_kv_store, + build_rate_limiter, + build_report_cache, + ) from src.serve.routes import router + from src.serve.waitlist import build_waitlist_store app = FastAPI( title="Audit Serve", @@ -18,17 +27,37 @@ def create_app(output_dir: Path | None = None) -> "FastAPI": # noqa: F821 version="1.0.0", ) + # CORS so the Next.js frontend can call the API from the browser: GET for + # reports, POST for the waitlist. No credentials (public, unauthenticated). + app.add_middleware( + CORSMiddleware, + allow_origins=cors_origins(), + allow_methods=["GET", "POST"], + allow_headers=["*"], + ) + # Resolve output dir — default to ./output relative to cwd app.state.output_dir = output_dir or (Path.cwd() / "output") + # Hosting guards for the public report endpoint: one shared KV store backs + # both the report cache and the per-IP throttle (in-memory unless a Redis + # URL is configured). Built once per app so state is shared across requests. + kv_store = build_kv_store() + app.state.report_cache = build_report_cache(kv_store) + app.state.rate_limiter = build_rate_limiter(kv_store) + app.state.waitlist_store = build_waitlist_store(app.state.output_dir) + static_dir = Path(__file__).parent / "static" app.mount("/static", StaticFiles(directory=str(static_dir)), name="static") + app.include_router(api_router) app.include_router(router) return app -def run_serve(port: int = 8080, host: str = "127.0.0.1", output_dir: Path | None = None) -> None: +def run_serve( + port: int = 8080, host: str = "127.0.0.1", output_dir: Path | None = None +) -> None: """Launch uvicorn with the audit serve app.""" import uvicorn diff --git a/src/serve/hosting.py b/src/serve/hosting.py new file mode 100644 index 0000000..ec8ffa1 --- /dev/null +++ b/src/serve/hosting.py @@ -0,0 +1,186 @@ +"""Hosting resilience: a pluggable KV store, report cache, and per-IP throttle. + +A public free endpoint that scans the GitHub API cannot survive without two +guards: caching (so the same username isn't re-scanned on every hit, which +otherwise burns the shared rate limit) and per-IP throttling (so one client +can't exhaust it). Both sit behind a small ``KVStore`` protocol so the default +in-process backend works locally and in tests, while a Redis/Upstash backend +drops in via ``GHRA_REDIS_URL`` for a multi-instance deployment. + +The in-memory store is thread-safe because the report route runs in FastAPI's +threadpool — concurrent workers share one store instance. +""" + +from __future__ import annotations + +import json +import os +import threading +import time +from typing import Callable, Protocol, TypeVar, runtime_checkable + +_V = TypeVar("_V") + +# Defaults chosen for a free public tier; all overridable via env. +DEFAULT_REPORT_TTL_SECONDS = 3600 # 1h — within the 1–6h cache window. +DEFAULT_RATE_LIMIT = 20 # requests per window, per IP. +DEFAULT_RATE_WINDOW_SECONDS = 3600 # 1h. + +REPORT_TTL_ENV_VAR = "GHRA_REPORT_TTL_SECONDS" +RATE_LIMIT_ENV_VAR = "GHRA_RATE_LIMIT" +RATE_WINDOW_ENV_VAR = "GHRA_RATE_WINDOW_SECONDS" +REDIS_URL_ENV_VAR = "GHRA_REDIS_URL" + + +@runtime_checkable +class KVStore(Protocol): + """Minimal string KV interface backing both the cache and the throttle.""" + + def get(self, key: str) -> str | None: + raise NotImplementedError + + def set(self, key: str, value: str, ttl_seconds: int) -> None: + raise NotImplementedError + + def incr(self, key: str, ttl_seconds: int) -> int: + """Increment a counter, setting its TTL on first creation; return count.""" + raise NotImplementedError + + +class InMemoryKVStore: + """Thread-safe, expiring in-process KV store (default backend). + + Entries expire lazily on access; to keep memory bounded under churn (e.g. a + counter per unique client IP), each dict is swept of expired entries once it + grows past ``reap_threshold``. The Redis backend relies on native TTL instead. + """ + + def __init__( + self, + clock: Callable[[], float] = time.monotonic, + reap_threshold: int = 10_000, + ) -> None: + self._clock = clock + self._reap_threshold = reap_threshold + self._lock = threading.Lock() + self._values: dict[str, tuple[float, str]] = {} + self._counters: dict[str, tuple[float, int]] = {} + + def _reap_locked(self, store: dict[str, tuple[float, _V]], now: float) -> None: + """Drop expired entries when a store outgrows the threshold (lock held).""" + if len(store) <= self._reap_threshold: + return + for key in [k for k, (expiry, _) in store.items() if expiry <= now]: + del store[key] + + def get(self, key: str) -> str | None: + with self._lock: + now = self._clock() + entry = self._values.get(key) + if entry is None: + return None + expiry, value = entry + if expiry <= now: + del self._values[key] + return None + return value + + def set(self, key: str, value: str, ttl_seconds: int) -> None: + with self._lock: + now = self._clock() + self._reap_locked(self._values, now) + self._values[key] = (now + ttl_seconds, value) + + def incr(self, key: str, ttl_seconds: int) -> int: + with self._lock: + now = self._clock() + self._reap_locked(self._counters, now) + entry = self._counters.get(key) + if entry is None or entry[0] <= now: + # New window: start at 1 and stamp the expiry. + self._counters[key] = (now + ttl_seconds, 1) + return 1 + expiry, count = entry + count += 1 + self._counters[key] = (expiry, count) # keep the original window + return count + + +class ReportCache: + """Cache serialized report payloads by (normalized) username.""" + + def __init__(self, store: KVStore, ttl_seconds: int) -> None: + self._store = store + self._ttl = ttl_seconds + + @property + def enabled(self) -> bool: + return self._ttl > 0 + + @staticmethod + def _key(username: str) -> str: + return f"report:{username.lower()}" + + def get(self, username: str) -> dict | None: + if not self.enabled: + return None + raw = self._store.get(self._key(username)) + return json.loads(raw) if raw is not None else None + + def put(self, username: str, payload: dict) -> None: + if not self.enabled: + return + self._store.set(self._key(username), json.dumps(payload), self._ttl) + + +class RateLimiter: + """Fixed-window per-IP throttle. A non-positive limit disables it.""" + + def __init__(self, store: KVStore, limit: int, window_seconds: int) -> None: + self._store = store + self._limit = limit + self._window = window_seconds + + @property + def enabled(self) -> bool: + return self._limit > 0 + + def allow(self, ip: str, bucket: str = "default") -> bool: + """Whether this IP is within budget for ``bucket`` (separate quotas).""" + if not self.enabled: + return True + count = self._store.incr(f"rl:{bucket}:{ip}", self._window) + return count <= self._limit + + +def _env_int(name: str, default: int) -> int: + raw = os.environ.get(name) + if raw is None or not raw.strip(): + return default + try: + return int(raw) + except ValueError: + return default + + +def build_kv_store() -> KVStore: + """Select the KV backend: Redis when ``GHRA_REDIS_URL`` is set, else memory.""" + url = os.environ.get(REDIS_URL_ENV_VAR, "").strip() + if url: + # Lazy import so the redis dependency is only needed when configured. + from src.serve.redis_store import RedisKVStore + + return RedisKVStore(url) + return InMemoryKVStore() + + +def build_report_cache(store: KVStore) -> ReportCache: + return ReportCache(store, _env_int(REPORT_TTL_ENV_VAR, DEFAULT_REPORT_TTL_SECONDS)) + + +def build_rate_limiter(store: KVStore) -> RateLimiter: + return RateLimiter( + store, + _env_int(RATE_LIMIT_ENV_VAR, DEFAULT_RATE_LIMIT), + _env_int(RATE_WINDOW_ENV_VAR, DEFAULT_RATE_WINDOW_SECONDS), + ) diff --git a/src/serve/redis_store.py b/src/serve/redis_store.py new file mode 100644 index 0000000..9887d17 --- /dev/null +++ b/src/serve/redis_store.py @@ -0,0 +1,36 @@ +"""Redis/Upstash KVStore backend — selected when ``GHRA_REDIS_URL`` is set. + +Kept in its own module so the optional ``redis`` dependency is imported only +when a Redis URL is configured (see ``hosting.build_kv_store``). Works against +Upstash via a ``rediss://`` TLS URL, suitable for the persistent FastAPI server +the engine deploys as (not a serverless function). +""" + +from __future__ import annotations + + +class RedisKVStore: + """KVStore backed by redis-py, satisfying the ``hosting.KVStore`` protocol.""" + + def __init__(self, url: str) -> None: + import redis # type: ignore[import-not-found] # optional dep; installed only when Redis is configured + + # decode_responses=True so get/set round-trip str, matching KVStore. + self._client = redis.Redis.from_url(url, decode_responses=True) + + def get(self, key: str) -> str | None: + value = self._client.get(key) + return value if value is None else str(value) + + def set(self, key: str, value: str, ttl_seconds: int) -> None: + self._client.setex(key, ttl_seconds, value) + + def incr(self, key: str, ttl_seconds: int) -> int: + # Fixed-window: INCR, then stamp the TTL only on the first hit of the + # window. Plain EXPIRE (no NX) works on every Redis server version, and + # only the request that creates the key (count == 1) sets the expiry, so + # the window is never extended by later increments. + count = int(self._client.incr(key)) + if count == 1: + self._client.expire(key, ttl_seconds) + return count diff --git a/src/serve/waitlist.py b/src/serve/waitlist.py new file mode 100644 index 0000000..f85284e --- /dev/null +++ b/src/serve/waitlist.py @@ -0,0 +1,99 @@ +"""Monitoring-waitlist email capture — the 'earn the tier' demand signal. + +A durable, deduplicated store of emails captured from the free report's +"notify me about monitoring" CTA. SQLite-backed by default (stdlib, survives +restart); a deployment can point ``GHRA_WAITLIST_DB`` at a persistent volume or +swap the store for Postgres later. Writes are serialized with a lock and a +fresh connection per call, so it is safe under FastAPI's threadpool. +""" + +from __future__ import annotations + +import os +import sqlite3 +import threading +from contextlib import closing +from datetime import datetime, timezone +from pathlib import Path +from typing import Protocol, runtime_checkable + +WAITLIST_DB_ENV_VAR = "GHRA_WAITLIST_DB" +DEFAULT_WAITLIST_DB = "waitlist.db" + +# Pragmatic email shape check — this gates a waitlist, not authentication. +MAX_EMAIL_LEN = 254 # RFC 5321 maximum. + + +def is_valid_email(email: str) -> bool: + candidate = email.strip() + if not candidate or len(candidate) > MAX_EMAIL_LEN: + return False + if any(char.isspace() for char in candidate): + return False + local, separator, domain = candidate.partition("@") + if separator != "@" or "@" in domain: + return False + if not local or not domain or "." not in domain: + return False + return all(part for part in domain.split(".")) + + +@runtime_checkable +class WaitlistStore(Protocol): + def add(self, email: str, source: str | None = None) -> bool: + """Record an email; return True if newly added, False if already present.""" + raise NotImplementedError + + def count(self) -> int: + raise NotImplementedError + + +class SqliteWaitlistStore: + """SQLite-backed waitlist with email as the dedup key.""" + + def __init__(self, path: str) -> None: + self._path = path + self._lock = threading.Lock() + # Ensure the parent dir exists so the DB can be created on a fresh host + # (e.g. a container before its volume path is populated). + parent = Path(path).parent + if parent != Path(""): + parent.mkdir(parents=True, exist_ok=True) + with self._lock, closing(self._connect()) as conn, conn: + conn.execute( + "CREATE TABLE IF NOT EXISTS waitlist (" + "email TEXT PRIMARY KEY, source TEXT, created_at TEXT NOT NULL)" + ) + + def _connect(self) -> sqlite3.Connection: + return sqlite3.connect(self._path) + + def add(self, email: str, source: str | None = None) -> bool: + normalized = email.strip().lower() + now = datetime.now(timezone.utc).isoformat() + # closing() guarantees the fd is released; the inner `conn` context + # manager commits the transaction. The lock serializes writers. + with self._lock, closing(self._connect()) as conn, conn: + cur = conn.execute( + "INSERT OR IGNORE INTO waitlist(email, source, created_at) " + "VALUES(?, ?, ?)", + (normalized, source, now), + ) + return cur.rowcount > 0 + + def count(self) -> int: + with self._lock, closing(self._connect()) as conn: + return int(conn.execute("SELECT COUNT(*) FROM waitlist").fetchone()[0]) + + +def build_waitlist_store(default_dir: str | Path | None = None) -> WaitlistStore: + """Build the waitlist store. ``GHRA_WAITLIST_DB`` wins; otherwise the DB lives + under ``default_dir`` (the app's output dir) so it never lands in the cwd.""" + configured = os.environ.get(WAITLIST_DB_ENV_VAR) + if configured: + path = configured + elif default_dir is not None: + path = os.path.join(str(default_dir), DEFAULT_WAITLIST_DB) + else: + path = DEFAULT_WAITLIST_DB + return SqliteWaitlistStore(path) diff --git a/tests/test_api_checkout.py b/tests/test_api_checkout.py new file mode 100644 index 0000000..63e52f7 --- /dev/null +++ b/tests/test_api_checkout.py @@ -0,0 +1,194 @@ +from __future__ import annotations + +from datetime import datetime, timezone +from pathlib import Path + +from src.api_checkout import materialize_api_checkout, materialize_api_workspace +from src.models import RepoMetadata + + +def _meta(name: str = "demo", full_name: str = "octocat/demo") -> RepoMetadata: + dt = datetime(2020, 1, 1, tzinfo=timezone.utc) + return RepoMetadata( + name=name, + full_name=full_name, + description="d", + language="Python", + languages={"Python": 100}, + private=False, + fork=False, + archived=False, + created_at=dt, + updated_at=dt, + pushed_at=dt, + default_branch="main", + stars=1, + forks=0, + open_issues=0, + size_kb=10, + html_url="https://example/x", + clone_url="https://example/x.git", + topics=[], + ) + + +class _FakeClient: + """Duck-typed stand-in for GitHubClient — no HTTP.""" + + def __init__(self, tree: dict, contents: dict[str, str] | None = None) -> None: + self._tree = tree + self._contents = contents or {} + self.content_requests: list[str] = [] + + def get_repo_tree(self, owner: str, repo: str, ref: str) -> dict: + return self._tree + + def get_file_content( + self, + owner: str, + repo: str, + path: str, + *, + ref: str | None = None, + max_bytes: int = 1_000_000, + ) -> str | None: + self.content_requests.append(path) + return self._contents.get(path) + + +def test_materialize_creates_skeleton_dirs_and_files(tmp_path): + tree = { + "available": True, + "truncated": False, + "files": ["README.md", "src/main.py", "tests/test_main.py"], + "dirs": ["src", "tests"], + } + client = _FakeClient(tree, contents={"README.md": "# Demo\nHello\n"}) + dest = tmp_path / "demo" + + result = materialize_api_checkout(_meta(), client, dest) + + assert result == dest + assert (dest / "src").is_dir() + assert (dest / "tests").is_dir() + assert (dest / "src" / "main.py").is_file() + assert (dest / "tests" / "test_main.py").is_file() + + +def test_curated_content_files_are_written_with_real_content(tmp_path): + tree = { + "available": True, + "truncated": False, + "files": ["README.md", "pyproject.toml", "src/main.py"], + "dirs": ["src"], + } + client = _FakeClient( + tree, + contents={ + "README.md": "# Title\n\nLong readme body.\n", + "pyproject.toml": "[project]\nname='demo'\n", + }, + ) + dest = tmp_path / "demo" + + materialize_api_checkout(_meta(), client, dest) + + assert (dest / "README.md").read_text() == "# Title\n\nLong readme body.\n" + assert "name='demo'" in (dest / "pyproject.toml").read_text() + # Source files are presence-only (empty) — never content-fetched. + assert (dest / "src" / "main.py").read_text() == "" + assert "src/main.py" not in client.content_requests + + +def test_unavailable_tree_yields_empty_dir(tmp_path): + client = _FakeClient( + {"available": False, "files": [], "dirs": [], "truncated": False} + ) + dest = tmp_path / "empty" + + result = materialize_api_checkout(_meta(), client, dest) + + assert result == dest + assert dest.is_dir() + assert list(dest.iterdir()) == [] + + +def test_path_traversal_entries_are_rejected(tmp_path): + tree = { + "available": True, + "truncated": False, + "files": ["../escape.txt", "/abs/evil.txt", "ok.py"], + "dirs": ["../evildir"], + } + client = _FakeClient(tree) + dest = tmp_path / "demo" + + materialize_api_checkout(_meta(), client, dest) + + # Nothing escaped the destination directory. + assert not (tmp_path / "escape.txt").exists() + assert not Path("/abs/evil.txt").exists() + assert not (tmp_path / "evildir").exists() + # The safe file still materialized. + assert (dest / "ok.py").is_file() + + +def test_max_files_cap_is_respected(tmp_path): + files = [f"f{i}.py" for i in range(50)] + tree = {"available": True, "truncated": False, "files": files, "dirs": []} + client = _FakeClient(tree) + dest = tmp_path / "demo" + + materialize_api_checkout(_meta(), client, dest, max_files=10) + + created = list(dest.rglob("*.py")) + assert len(created) == 10 + + +def test_content_fetch_budget_is_bounded(tmp_path): + # Many README-like content files, but only a bounded number get fetched. + files = [f"pkg{i}/README.md" for i in range(30)] + dirs = [f"pkg{i}" for i in range(30)] + tree = {"available": True, "truncated": False, "files": files, "dirs": dirs} + contents = {f: "# readme\n" for f in files} + client = _FakeClient(tree, contents=contents) + dest = tmp_path / "demo" + + materialize_api_checkout(_meta(), client, dest, max_content_files=5) + + assert len(client.content_requests) == 5 + + +def test_workspace_yields_paths_and_cleans_up(): + tree = {"available": True, "truncated": False, "files": ["README.md"], "dirs": []} + client = _FakeClient(tree, contents={"README.md": "# hi\n"}) + repos = [_meta(name="a", full_name="o/a"), _meta(name="b", full_name="o/b")] + + captured: dict[str, Path] = {} + with materialize_api_workspace(repos, client) as workspace: + assert set(workspace.keys()) == {"a", "b"} + for name, path in workspace.items(): + assert path.is_dir() + captured[name] = path + assert (workspace["a"] / "README.md").read_text() == "# hi\n" + + # Temp dirs are removed when the context exits. + for path in captured.values(): + assert not path.exists() + + +def test_null_byte_paths_are_rejected(tmp_path): + tree = { + "available": True, + "truncated": False, + "files": ["ok.py", "evil\x00.py"], + "dirs": [], + } + client = _FakeClient(tree) + dest = tmp_path / "demo" + + materialize_api_checkout(_meta(), client, dest) + + assert (dest / "ok.py").is_file() + # The null-byte entry is rejected at the guard, not written. + assert len(list(dest.rglob("*.py"))) == 1 diff --git a/tests/test_api_only.py b/tests/test_api_only.py new file mode 100644 index 0000000..6f5816f --- /dev/null +++ b/tests/test_api_only.py @@ -0,0 +1,332 @@ +from __future__ import annotations + +import json +from datetime import datetime, timezone +from unittest.mock import patch + +import pytest +import requests + +from src.api_only import ( + API_ONLY_MODE, + ApiOnlyReport, + _list_user_repos, + _select_repos, + audit_user_api_only, + score_repos_api_only, +) +from src.models import RepoMetadata + + +def _meta_ranked( + name: str, + *, + fork: bool = False, + archived: bool = False, + pushed_year: int = 2024, + stars: int = 0, +) -> RepoMetadata: + m = _meta(name=name, full_name=f"octocat/{name}") + m.fork = fork + m.archived = archived + m.pushed_at = datetime(pushed_year, 1, 1, tzinfo=timezone.utc) + m.stars = stars + return m + + +def test_select_repos_returns_all_when_under_limit() -> None: + repos = [_meta_ranked("a"), _meta_ranked("b")] + assert _select_repos(repos, 5) == repos + + +def test_select_repos_ranks_original_active_work_first() -> None: + fork = _meta_ranked("fork", fork=True, pushed_year=2026, stars=999) + archived = _meta_ranked("archived", archived=True, pushed_year=2026, stars=999) + recent = _meta_ranked("recent", pushed_year=2025, stars=1) + older = _meta_ranked("older", pushed_year=2020, stars=500) + picked = _select_repos([fork, archived, recent, older], 2) + # Non-fork non-archived win regardless of stars; recency breaks the tie. + assert [r.name for r in picked] == ["recent", "older"] + + +def test_select_repos_handles_missing_pushed_at() -> None: + a = _meta_ranked("a", pushed_year=2025) + b = _meta_ranked("b") + b.pushed_at = None # must not crash the sort + picked = _select_repos([a, b], 1) + assert picked[0].name == "a" + + +class _RepoListClient: + """Stub exposing only the repo-list surface (.token + .list_repos).""" + + def __init__(self, token: str | None, rest_repos: list[dict]) -> None: + self.token = token + self._rest_repos = rest_repos + self.list_repos_calls = 0 + + def list_repos(self, username: str) -> list[dict]: + self.list_repos_calls += 1 + return self._rest_repos + + +def test_list_user_repos_prefers_graphql_with_token() -> None: + client = _RepoListClient(token="t", rest_repos=[{"name": "rest"}]) + gql = [{"name": "graphql"}] + with patch("src.api_only.bulk_fetch_repos", return_value=gql) as mock_gql: + result = _list_user_repos("octocat", client) # type: ignore[arg-type] + assert result == gql + assert client.list_repos_calls == 0 + mock_gql.assert_called_once() + + +def test_list_user_repos_uses_rest_without_token() -> None: + client = _RepoListClient(token=None, rest_repos=[{"name": "rest"}]) + with patch("src.api_only.bulk_fetch_repos") as mock_gql: + result = _list_user_repos("octocat", client) # type: ignore[arg-type] + assert result == [{"name": "rest"}] + assert client.list_repos_calls == 1 + mock_gql.assert_not_called() + + +def test_list_user_repos_falls_back_when_graphql_user_null() -> None: + # GraphQL returns user: null → mapping raises TypeError → fall back to REST. + client = _RepoListClient(token="t", rest_repos=[{"name": "rest"}]) + with patch("src.api_only.bulk_fetch_repos", side_effect=TypeError("user is None")): + result = _list_user_repos("ghost", client) # type: ignore[arg-type] + assert result == [{"name": "rest"}] + assert client.list_repos_calls == 1 + + +def test_list_user_repos_falls_back_on_graphql_http_error() -> None: + client = _RepoListClient(token="t", rest_repos=[{"name": "rest"}]) + with patch( + "src.api_only.bulk_fetch_repos", + side_effect=requests.ConnectionError("boom"), + ): + result = _list_user_repos("octocat", client) # type: ignore[arg-type] + assert result == [{"name": "rest"}] + assert client.list_repos_calls == 1 + + +def _meta( + name: str = "demo", full_name: str = "octocat/demo", language: str = "Python" +) -> RepoMetadata: + dt = datetime(2024, 6, 1, tzinfo=timezone.utc) + return RepoMetadata( + name=name, + full_name=full_name, + description="A demo project", + language=language, + languages={}, + private=False, + fork=False, + archived=False, + created_at=dt, + updated_at=dt, + pushed_at=dt, + default_branch="main", + stars=12, + forks=2, + open_issues=1, + size_kb=200, + html_url="https://github.com/octocat/demo", + clone_url="https://github.com/octocat/demo.git", + topics=["cli"], + ) + + +def _rich_tree() -> dict: + return { + "available": True, + "truncated": False, + "files": [ + "README.md", + "pyproject.toml", + "src/app.py", + "tests/test_app.py", + ".github/workflows/ci.yml", + ], + "dirs": ["src", "tests", ".github", ".github/workflows"], + } + + +class _FakeClient: + """Minimal duck-typed client: tree + content + repo list. No HTTP. + + Analyzers that reach for API-only endpoints (activity, community, security) + fail soft inside ``run_all_analyzers`` — exactly the API-only fidelity floor. + """ + + def __init__( + self, + tree: dict, + contents: dict[str, str] | None = None, + repos: list[dict] | None = None, + ) -> None: + self._tree = tree + self._contents = contents or {} + self._repos = repos or [] + + def get_repo_tree(self, owner: str, repo: str, ref: str) -> dict: + return self._tree + + def get_file_content( + self, owner, repo, path, *, ref=None, max_bytes=1_000_000 + ) -> str | None: + return self._contents.get(path) + + def list_repos(self, username: str) -> list[dict]: + return self._repos + + +def test_score_repos_api_only_runs_real_engine_without_clone(): + contents = { + "README.md": ( + "# App\n\nA real project that does a real thing.\n\n" + "## Usage\n\nRun it.\n\n## Install\n\npip install app\n" + ), + "pyproject.toml": "[project]\nname = 'app'\n\n[tool.pytest.ini_options]\n", + } + client = _FakeClient(_rich_tree(), contents) + + audits = score_repos_api_only([_meta()], client) + + assert len(audits) == 1 + audit = audits[0] + assert audit.metadata.name == "demo" + assert 0.0 <= audit.overall_score <= 1.0 + + dims = {r.dimension: r.score for r in audit.analyzer_results} + # Presence signals recovered from the API tree alone — no clone: + assert dims["testing"] > 0 # tests/ dir + test file present + assert dims["readme"] > 0 # README present, with content + assert dims["cicd"] > 0 # .github/workflows/ci.yml present + assert dims["structure"] > 0 + + +def test_bare_repo_is_detected_as_having_no_tests(): + tree = {"available": True, "truncated": False, "files": ["README.md"], "dirs": []} + client = _FakeClient(tree, {"README.md": "# bare\n"}) + + audits = score_repos_api_only([_meta(name="bare", full_name="o/bare")], client) + + audit = audits[0] + dims = {r.dimension: r.score for r in audit.analyzer_results} + assert dims["testing"] == 0.0 + assert "no-tests" in audit.flags + + +def test_score_repos_api_only_propagates_tree_http_errors(): + response = requests.Response() + response.status_code = 500 + error = requests.HTTPError(response=response) + + class _FailingTreeClient(_FakeClient): + def get_repo_tree(self, owner: str, repo: str, ref: str) -> dict: + raise error + + client = _FailingTreeClient(_rich_tree()) + + with pytest.raises(requests.HTTPError): + score_repos_api_only([_meta()], client) + + +def test_audit_user_api_only_lists_then_scores(): + repo_dict = { + "name": "demo", + "full_name": "octocat/demo", + "description": "A demo project", + "language": "Python", + "private": False, + "fork": False, + "archived": False, + "created_at": "2024-01-01T00:00:00Z", + "updated_at": "2024-06-01T00:00:00Z", + "pushed_at": "2024-06-01T00:00:00Z", + "default_branch": "main", + "stargazers_count": 12, + "forks_count": 2, + "open_issues_count": 1, + "size": 200, + "html_url": "https://github.com/octocat/demo", + "clone_url": "https://github.com/octocat/demo.git", + "topics": ["cli"], + } + client = _FakeClient( + _rich_tree(), {"README.md": "# Demo\n\nbody\n"}, repos=[repo_dict] + ) + + report = audit_user_api_only("octocat", client) + + assert isinstance(report, ApiOnlyReport) + assert report.username == "octocat" + assert report.mode == API_ONLY_MODE + assert len(report.audits) == 1 + + payload = report.to_dict() + assert payload["mode"] == "api_only" + assert payload["repo_count"] == 1 + assert payload["fidelity_note"] # honest API-only caveat is present + assert payload["repos"][0]["metadata"]["name"] == "demo" + + +def test_report_to_dict_is_json_serializable(): + client = _FakeClient(_rich_tree(), {"README.md": "# x\n\nbody\n"}) + audits = score_repos_api_only([_meta()], client) + report = ApiOnlyReport(username="octocat", audits=audits) + + # Must serialize cleanly for the hosted (Next.js) consumer. + encoded = json.dumps(report.to_dict()) + assert '"mode": "api_only"' in encoded + + +class _RecordingClient(_FakeClient): + """Records calls to the slow async-stats endpoints.""" + + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.stats_calls: list[str] = [] + + def get_contributor_stats(self, owner, repo): + self.stats_calls.append("contributor") + return [] + + def get_commit_activity(self, owner, repo): + self.stats_calls.append("commit_activity") + return [] + + def get_participation_stats(self, owner, repo): + self.stats_calls.append("participation") + return {} + + # Fast (non-202) endpoints the analyzers also touch — provided so they + # delegate cleanly rather than fail-soft. + def get_releases(self, owner, repo, per_page=10): + return ([], True) + + def get_recent_commits(self, owner, repo, per_page=10): + return [] + + def get_pull_requests(self, owner, repo, state="all", per_page=30): + return [] + + def get_community_profile(self, owner, repo): + return {"available": False} + + +def test_fast_mode_skips_async_stats_endpoints(): + client = _RecordingClient(_rich_tree(), {"README.md": "# x\n"}) + + score_repos_api_only([_meta()], client, fast=True) + + assert client.stats_calls == [] + + +def test_thorough_mode_uses_async_stats_endpoints(): + client = _RecordingClient(_rich_tree(), {"README.md": "# x\n"}) + + score_repos_api_only([_meta()], client, fast=False) + + assert "contributor" in client.stats_calls + assert "commit_activity" in client.stats_calls diff --git a/tests/test_github_client.py b/tests/test_github_client.py index 5f5d15b..a859a1c 100644 --- a/tests/test_github_client.py +++ b/tests/test_github_client.py @@ -1,5 +1,8 @@ from __future__ import annotations +import base64 + +import pytest import requests from src.github_client import REST_API_VERSION, GitHubClient @@ -41,10 +44,16 @@ def test_repo_list_cache_key_includes_owner_private_scope(self, monkeypatch): repos = client.list_repos("octocat") assert repos == [{"name": "private-repo", "private": True}] - assert any("/list_repos/octocat/owner-private" in call[0] for call in cache.get_calls) - assert any("/list_repos/octocat/owner-private" in call[0] for call in cache.put_calls) + assert any( + "/list_repos/octocat/owner-private" in call[0] for call in cache.get_calls + ) + assert any( + "/list_repos/octocat/owner-private" in call[0] for call in cache.put_calls + ) - def test_public_and_private_repo_list_cache_entries_do_not_collide(self, monkeypatch): + def test_public_and_private_repo_list_cache_entries_do_not_collide( + self, monkeypatch + ): cache = _MemoryCache() owner_client = GitHubClient(token="secret", cache=cache) @@ -66,8 +75,13 @@ def test_public_and_private_repo_list_cache_entries_do_not_collide(self, monkeyp assert owner_result == [{"name": "private-repo", "private": True}] assert anon_result == [{"name": "public-repo", "private": False}] - assert any("/list_repos/octocat/owner-private" in call[0] for call in cache.put_calls) - assert any("/list_repos/octocat/public-anonymous" in call[0] for call in cache.put_calls) + assert any( + "/list_repos/octocat/owner-private" in call[0] for call in cache.put_calls + ) + assert any( + "/list_repos/octocat/public-anonymous" in call[0] + for call in cache.put_calls + ) def test_security_endpoints_return_counts_when_available(self, monkeypatch): client = GitHubClient() @@ -79,7 +93,10 @@ def _fake_fetch(url, params=None): return [ {"number": 1, "rule": {"security_severity_level": "critical"}}, {"number": 2, "rule": {"id": "CodeReviewID", "severity": "error"}}, - {"number": 5, "rule": {"id": "ConcreteRuleID", "severity": "error"}}, + { + "number": 5, + "rule": {"id": "ConcreteRuleID", "severity": "error"}, + }, {"number": 3, "rule": {"severity": "medium"}}, {"number": 4, "rule": {"severity": "note"}}, ] @@ -111,13 +128,17 @@ def test_security_endpoints_fail_soft_on_http_error(self, monkeypatch): response.status_code = 404 error = requests.HTTPError(response=response) - monkeypatch.setattr(client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error)) + monkeypatch.setattr( + client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error) + ) assert client.get_secret_scanning_alert_count("o", "r")["available"] is False assert client.get_code_scanning_alert_count("o", "r")["http_status"] == 404 assert client.get_sbom_exportability("o", "r")["available"] is False - def test_security_alert_endpoint_403_and_404_are_not_warnings(self, monkeypatch, caplog): + def test_security_alert_endpoint_403_and_404_are_not_warnings( + self, monkeypatch, caplog + ): client = GitHubClient() def _raise(status_code): @@ -128,7 +149,9 @@ def _raise(status_code): monkeypatch.setattr(client, "_fetch_json", lambda *a, **k: _raise(403)) with caplog.at_level("WARNING"): - assert client.get_secret_scanning_alert_count("o", "r")["http_status"] == 403 + assert ( + client.get_secret_scanning_alert_count("o", "r")["http_status"] == 403 + ) assert "secret scanning alerts" not in caplog.text @@ -147,7 +170,9 @@ def test_security_alert_endpoint_unexpected_http_errors_still_warn( response = requests.Response() response.status_code = 500 error = requests.HTTPError(response=response) - monkeypatch.setattr(client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error)) + monkeypatch.setattr( + client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error) + ) with caplog.at_level("WARNING"): assert client.get_code_scanning_alert_count("o", "r")["http_status"] == 500 @@ -156,16 +181,35 @@ def test_security_alert_endpoint_unexpected_http_errors_still_warn( def test_get_repo_topics_reads_names_payload(self, monkeypatch): client = GitHubClient() - monkeypatch.setattr(client, "_fetch_json", lambda *a, **k: {"names": ["python", "ghra-showcase"]}) + monkeypatch.setattr( + client, + "_fetch_json", + lambda *a, **k: {"names": ["python", "ghra-showcase"]}, + ) topics = client.get_repo_topics("o", "r") assert topics["available"] is True assert topics["topics"] == ["python", "ghra-showcase"] - def test_update_repo_custom_property_values_skips_missing_definitions(self, monkeypatch): + def test_update_repo_custom_property_values_skips_missing_definitions( + self, monkeypatch + ): client = GitHubClient() - monkeypatch.setattr(client, "list_org_custom_properties", lambda owner: {"available": True, "properties": []}) - monkeypatch.setattr(client, "get_repo_custom_property_values", lambda owner, repo: {"available": True, "values": {"portfolio_call": "old"}}) - result = client.update_repo_custom_property_values("o", "r", {"portfolio_call": "new"}) + monkeypatch.setattr( + client, + "list_org_custom_properties", + lambda owner: {"available": True, "properties": []}, + ) + monkeypatch.setattr( + client, + "get_repo_custom_property_values", + lambda owner, repo: { + "available": True, + "values": {"portfolio_call": "old"}, + }, + ) + result = client.update_repo_custom_property_values( + "o", "r", {"portfolio_call": "new"} + ) assert result["status"] == "skipped" assert result["before"] == {"portfolio_call": "old"} @@ -218,7 +262,11 @@ def test_find_project_v2_item_by_issue_reads_linked_issue(self, monkeypatch): { "id": "PVTI_1", "isArchived": False, - "content": {"id": "ISSUE_1", "number": 12, "url": "https://github.com/o/r/issues/12"}, + "content": { + "id": "ISSUE_1", + "number": 12, + "url": "https://github.com/o/r/issues/12", + }, } ], "pageInfo": {"hasNextPage": False, "endCursor": None}, @@ -232,12 +280,18 @@ def test_find_project_v2_item_by_issue_reads_linked_issue(self, monkeypatch): assert result["available"] is True assert result["item"]["id"] == "PVTI_1" - def test_add_and_archive_project_v2_item_return_normalized_payloads(self, monkeypatch): + def test_add_and_archive_project_v2_item_return_normalized_payloads( + self, monkeypatch + ): client = GitHubClient() responses = iter( [ {"addProjectV2ItemById": {"item": {"id": "PVTI_1"}}}, - {"archiveProjectV2Item": {"item": {"id": "PVTI_1", "isArchived": True}}}, + { + "archiveProjectV2Item": { + "item": {"id": "PVTI_1", "isArchived": True} + } + }, ] ) monkeypatch.setattr(client, "_graphql_query", lambda *_a, **_k: next(responses)) @@ -248,3 +302,183 @@ def test_add_and_archive_project_v2_item_return_normalized_payloads(self, monkey assert created["ok"] is True assert created["item_id"] == "PVTI_1" assert archived["status"] == "archived" + + +class TestGitHubClientTreeAndContents: + """Git Trees API + Contents API support for clone-free (API-only) scoring.""" + + def test_get_repo_tree_returns_files_and_dirs(self, monkeypatch): + client = GitHubClient() + payload = { + "tree": [ + {"path": "README.md", "type": "blob"}, + {"path": "src", "type": "tree"}, + {"path": "src/main.py", "type": "blob"}, + {"path": "tests", "type": "tree"}, + {"path": "tests/test_main.py", "type": "blob"}, + ], + "truncated": False, + } + monkeypatch.setattr(client, "_fetch_json", lambda url, params=None: payload) + + tree = client.get_repo_tree("o", "r", "main") + + assert tree["available"] is True + assert tree["truncated"] is False + assert "README.md" in tree["files"] + assert "src/main.py" in tree["files"] + assert "tests/test_main.py" in tree["files"] + assert "src" in tree["dirs"] + assert "tests" in tree["dirs"] + # tree entries are NOT files + assert "src" not in tree["files"] + + def test_get_repo_tree_requests_recursive(self, monkeypatch): + client = GitHubClient() + seen: dict = {} + + def _fake(url, params=None): + seen["url"] = url + seen["params"] = params + return {"tree": [], "truncated": False} + + monkeypatch.setattr(client, "_fetch_json", _fake) + + client.get_repo_tree("o", "r", "main") + + assert "/repos/o/r/git/trees/main" in seen["url"] + assert seen["params"] == {"recursive": "1"} + + def test_get_repo_tree_encodes_branch_ref_path_segment(self, monkeypatch): + client = GitHubClient() + seen: dict = {} + + def _fake(url, params=None): + seen["url"] = url + return {"tree": [], "truncated": False} + + monkeypatch.setattr(client, "_fetch_json", _fake) + + client.get_repo_tree("o", "r", "release/1.x") + + assert "/repos/o/r/git/trees/release%2F1.x" in seen["url"] + + def test_get_repo_tree_fails_soft_on_http_error(self, monkeypatch): + client = GitHubClient() + response = requests.Response() + response.status_code = 404 + error = requests.HTTPError(response=response) + monkeypatch.setattr( + client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error) + ) + + tree = client.get_repo_tree("o", "r", "main") + + assert tree["available"] is False + assert tree["files"] == [] + assert tree["dirs"] == [] + + def test_get_repo_tree_flags_truncation(self, monkeypatch): + client = GitHubClient() + monkeypatch.setattr( + client, + "_fetch_json", + lambda url, params=None: { + "tree": [{"path": "a.py", "type": "blob"}], + "truncated": True, + }, + ) + + tree = client.get_repo_tree("o", "r", "main") + + assert tree["truncated"] is True + assert tree["available"] is True + + def test_get_file_content_decodes_base64(self, monkeypatch): + client = GitHubClient() + raw = b"# Title\nbody text\n" + encoded = base64.b64encode(raw).decode("ascii") + monkeypatch.setattr( + client, + "_fetch_json", + lambda url, params=None: { + "type": "file", + "encoding": "base64", + "content": encoded, + "size": len(raw), + }, + ) + + content = client.get_file_content("o", "r", "README.md") + + assert content == "# Title\nbody text\n" + + def test_get_file_content_returns_none_on_404(self, monkeypatch): + client = GitHubClient() + response = requests.Response() + response.status_code = 404 + error = requests.HTTPError(response=response) + monkeypatch.setattr( + client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error) + ) + + assert client.get_file_content("o", "r", "missing.txt") is None + + def test_get_file_content_skips_oversize_files(self, monkeypatch): + client = GitHubClient() + # Contents API returns content="" for files >1MB; we must not treat that + # as a real (empty) file. Skip anything over the byte cap. + monkeypatch.setattr( + client, + "_fetch_json", + lambda url, params=None: { + "type": "file", + "encoding": "base64", + "content": "", + "size": 5_000_000, + }, + ) + + assert client.get_file_content("o", "r", "huge.bin", max_bytes=1_000_000) is None + + def test_get_repo_tree_propagates_unexpected_status(self, monkeypatch): + client = GitHubClient() + response = requests.Response() + response.status_code = 500 + error = requests.HTTPError(response=response) + monkeypatch.setattr( + client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error) + ) + + with pytest.raises(requests.HTTPError): + client.get_repo_tree("o", "r", "main") + + def test_get_repo_tree_empty_repo_409_is_silent(self, monkeypatch, caplog): + client = GitHubClient() + response = requests.Response() + response.status_code = 409 + error = requests.HTTPError(response=response) + monkeypatch.setattr( + client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error) + ) + + with caplog.at_level("WARNING"): + tree = client.get_repo_tree("o", "r", "main") + + assert tree["available"] is False + assert "Failed to fetch tree" not in caplog.text + + def test_get_file_content_logs_unexpected_status(self, monkeypatch, caplog): + client = GitHubClient() + response = requests.Response() + response.status_code = 500 + error = requests.HTTPError(response=response) + monkeypatch.setattr( + client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error) + ) + + with caplog.at_level("WARNING"): + result = client.get_file_content("o", "r", "README.md") + + assert result is None + assert "Failed to fetch README.md" in caplog.text diff --git a/tests/test_serve_api.py b/tests/test_serve_api.py new file mode 100644 index 0000000..6f46b30 --- /dev/null +++ b/tests/test_serve_api.py @@ -0,0 +1,309 @@ +"""Tests for src/serve/api.py — hosted clone-free report JSON endpoint (Phase 2 S1).""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import patch + +import pytest + +# --------------------------------------------------------------------------- +# Dependency guard — skip entire module if [serve] extra not installed +# --------------------------------------------------------------------------- +pytest.importorskip("fastapi", reason="[serve] extra not installed") + +import requests # noqa: E402 +from fastapi.testclient import TestClient # noqa: E402 + +from src.api_only import ApiOnlyReport # noqa: E402 +from src.serve.api import get_github_client # noqa: E402 +from src.serve.app import create_app # noqa: E402 + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- +def _sentinel_github_client() -> object: + return object() + + +def _make_client(tmp_path) -> TestClient: + """Build a TestClient with the GitHub client dependency stubbed to a sentinel. + + The endpoint's network work is exercised through a patched + ``audit_user_api_only`` in each test, so the dependency only needs to avoid + constructing a real client (which would read env / open a session). + """ + app = create_app(output_dir=tmp_path) + app.dependency_overrides[get_github_client] = _sentinel_github_client + return TestClient(app) + + +@pytest.fixture() +def client(tmp_path) -> TestClient: + return _make_client(tmp_path) + + +def _http_error( + status: int, headers: dict[str, str] | None = None +) -> requests.HTTPError: + response = requests.Response() + response.status_code = status + if headers: + response.headers.update(headers) + return requests.HTTPError(f"{status} error", response=response) + + +# --------------------------------------------------------------------------- +# Health +# --------------------------------------------------------------------------- +def test_health_ok(client: TestClient) -> None: + resp = client.get("/api/health") + assert resp.status_code == 200 + body = resp.json() + assert body["status"] == "ok" + assert "github_token" in body + + +# --------------------------------------------------------------------------- +# Happy path +# --------------------------------------------------------------------------- +def test_report_returns_serialized_report(client: TestClient) -> None: + report = ApiOnlyReport(username="octocat", audits=[]) + with patch("src.serve.api.audit_user_api_only", return_value=report) as mock_audit: + resp = client.get("/api/report/octocat") + + assert resp.status_code == 200 + body = resp.json() + # Fast mode is the interactive default for the hosted endpoint. + assert mock_audit.call_args.kwargs["fast"] is True + assert body["username"] == "octocat" + assert body["mode"] == "api_only" + assert body["repo_count"] == 0 + assert body["repos"] == [] + assert "fidelity_note" in body + + +def test_report_passes_validated_username(client: TestClient) -> None: + report = ApiOnlyReport(username="octocat", audits=[]) + with patch("src.serve.api.audit_user_api_only", return_value=report) as mock_audit: + resp = client.get("/api/report/octocat") + + assert resp.status_code == 200 + # username is the first positional arg to audit_user_api_only + assert mock_audit.call_args.args[0] == "octocat" + + +# --------------------------------------------------------------------------- +# Validation +# --------------------------------------------------------------------------- +@pytest.mark.parametrize("bad", ["bad--name", "has space", "-leading", "a" * 40]) +def test_invalid_username_returns_422(client: TestClient, bad: str) -> None: + with patch("src.serve.api.audit_user_api_only") as mock_audit: + resp = client.get(f"/api/report/{bad}") + assert resp.status_code == 422 + mock_audit.assert_not_called() + + +# --------------------------------------------------------------------------- +# Error mapping +# --------------------------------------------------------------------------- +def test_unknown_user_returns_404(client: TestClient) -> None: + with patch("src.serve.api.audit_user_api_only", side_effect=_http_error(404)): + resp = client.get("/api/report/ghost") + assert resp.status_code == 404 + + +def test_rate_limited_403_with_zero_quota_returns_429(client: TestClient) -> None: + err = _http_error(403, headers={"X-RateLimit-Remaining": "0"}) + with patch("src.serve.api.audit_user_api_only", side_effect=err): + resp = client.get("/api/report/octocat") + assert resp.status_code == 429 + + +def test_rate_limited_429_returns_429(client: TestClient) -> None: + with patch("src.serve.api.audit_user_api_only", side_effect=_http_error(429)): + resp = client.get("/api/report/octocat") + assert resp.status_code == 429 + + +def test_forbidden_403_without_quota_header_returns_403(client: TestClient) -> None: + # A 403 that is NOT rate-limiting (e.g. private resource) stays a 403, not 429. + with patch("src.serve.api.audit_user_api_only", side_effect=_http_error(403)): + resp = client.get("/api/report/octocat") + assert resp.status_code == 403 + + +def test_upstream_error_returns_502(client: TestClient) -> None: + with patch("src.serve.api.audit_user_api_only", side_effect=_http_error(500)): + resp = client.get("/api/report/octocat") + assert resp.status_code == 502 + + +def test_network_error_returns_502(client: TestClient) -> None: + err = requests.ConnectionError("connection reset") + with patch("src.serve.api.audit_user_api_only", side_effect=err): + resp = client.get("/api/report/octocat") + assert resp.status_code == 502 + + +def test_github_client_error_returns_502(client: TestClient) -> None: + from src.github_client import GitHubClientError + + with patch( + "src.serve.api.audit_user_api_only", + side_effect=GitHubClientError("graphql failed"), + ): + resp = client.get("/api/report/octocat") + assert resp.status_code == 502 + + +# --------------------------------------------------------------------------- +# Cost bound +# --------------------------------------------------------------------------- +def test_scan_is_capped_at_max_repos(client: TestClient) -> None: + from src.serve.api import MAX_REPOS_CAP + + report = ApiOnlyReport(username="octocat", audits=[]) + with patch("src.serve.api.audit_user_api_only", return_value=report) as mock_audit: + # No per-request repo knob — a stray query param is ignored and the + # server always bounds the scan at MAX_REPOS_CAP. + resp = client.get("/api/report/octocat?max_repos=9999") + + assert resp.status_code == 200 + assert mock_audit.call_args.kwargs["max_repos"] == MAX_REPOS_CAP + + +# --------------------------------------------------------------------------- +# CORS (browser frontend reachability) +# --------------------------------------------------------------------------- +def test_cors_allows_frontend_origin(client: TestClient) -> None: + report = ApiOnlyReport(username="octocat", audits=[]) + origin = "http://localhost:3000" + with patch("src.serve.api.audit_user_api_only", return_value=report): + resp = client.get("/api/report/octocat", headers={"Origin": origin}) + assert resp.status_code == 200 + assert resp.headers.get("access-control-allow-origin") == origin + + +def test_cors_preflight_allows_waitlist_post(client: TestClient) -> None: + resp = client.options( + "/api/waitlist", + headers={ + "Origin": "http://localhost:3000", + "Access-Control-Request-Method": "POST", + }, + ) + assert resp.status_code == 200 + assert "POST" in resp.headers.get("access-control-allow-methods", "") + + +def test_cors_origins_reads_env(monkeypatch) -> None: + from src.serve.api import cors_origins + + monkeypatch.setenv("GHRA_CORS_ORIGINS", "https://a.example, https://b.example") + assert cors_origins() == ["https://a.example", "https://b.example"] + monkeypatch.delenv("GHRA_CORS_ORIGINS", raising=False) + assert cors_origins() == ["http://localhost:3000", "http://127.0.0.1:3000"] + + +# --------------------------------------------------------------------------- +# Caching + throttle (hosting guards) +# --------------------------------------------------------------------------- +def test_cache_hit_skips_second_scan(client: TestClient) -> None: + report = ApiOnlyReport(username="octocat", audits=[]) + with patch("src.serve.api.audit_user_api_only", return_value=report) as mock_audit: + first = client.get("/api/report/octocat") + second = client.get("/api/report/octocat") + + assert first.status_code == 200 + assert second.status_code == 200 + assert first.json() == second.json() + # The second identical request is served from cache — no re-scan. + assert mock_audit.call_count == 1 + + +def test_rate_limit_returns_429_past_limit(tmp_path, monkeypatch) -> None: + monkeypatch.setenv("GHRA_RATE_LIMIT", "2") + monkeypatch.setenv("GHRA_RATE_WINDOW_SECONDS", "3600") + local_client = _make_client(tmp_path) + + report = ApiOnlyReport(username="octocat", audits=[]) + with patch("src.serve.api.audit_user_api_only", return_value=report) as mock_audit: + codes = [local_client.get("/api/report/octocat").status_code for _ in range(3)] + assert codes == [200, 200, 429] + # The 2nd request was a cache hit but still consumed throttle budget, so the + # scan ran exactly once across the two allowed requests. + assert mock_audit.call_count == 1 + + +class _FakeRequest: + """Minimal Request stand-in for client_ip unit tests.""" + + def __init__(self, headers: dict[str, str], host: str | None) -> None: + self.headers = headers + self.client = SimpleNamespace(host=host) if host is not None else None + + +def test_client_ip_ignores_forwarded_by_default(monkeypatch) -> None: + from src.serve.api import client_ip + + monkeypatch.delenv("GHRA_TRUST_FORWARDED_FOR", raising=False) + req = _FakeRequest({"x-forwarded-for": "9.9.9.9"}, host="1.2.3.4") + # Spoofable XFF is ignored — keyed on the direct peer. + assert client_ip(req) == "1.2.3.4" # type: ignore[arg-type] + + +def test_client_ip_honors_forwarded_when_trusted(monkeypatch) -> None: + from src.serve.api import client_ip + + monkeypatch.setenv("GHRA_TRUST_FORWARDED_FOR", "true") + req = _FakeRequest({"x-forwarded-for": "9.9.9.9, 1.2.3.4"}, host="1.2.3.4") + assert client_ip(req) == "9.9.9.9" # type: ignore[arg-type] + + +# --------------------------------------------------------------------------- +# Waitlist capture +# --------------------------------------------------------------------------- +def test_waitlist_accepts_valid_email(client: TestClient) -> None: + resp = client.post( + "/api/waitlist", json={"email": "dev@example.com", "source": "octocat"} + ) + assert resp.status_code == 201 + assert resp.json()["status"] == "joined" + + +def test_waitlist_is_idempotent(client: TestClient) -> None: + client.post("/api/waitlist", json={"email": "dev@example.com"}) + resp = client.post("/api/waitlist", json={"email": "dev@example.com"}) + assert resp.status_code == 201 + assert resp.json()["status"] == "already_joined" + + +def test_waitlist_dedupes_case_insensitively_through_endpoint( + client: TestClient, +) -> None: + first = client.post("/api/waitlist", json={"email": "Dev@Example.com"}) + second = client.post("/api/waitlist", json={"email": "dev@example.com"}) + assert first.json()["status"] == "joined" + assert second.json()["status"] == "already_joined" + + +def test_waitlist_rejects_invalid_email(client: TestClient) -> None: + resp = client.post("/api/waitlist", json={"email": "not-an-email"}) + assert resp.status_code == 422 + + +def test_waitlist_requires_email_field(client: TestClient) -> None: + resp = client.post("/api/waitlist", json={"source": "octocat"}) + assert resp.status_code == 422 # pydantic: missing required field + + +def test_waitlist_throttled(tmp_path, monkeypatch) -> None: + monkeypatch.setenv("GHRA_RATE_LIMIT", "1") + monkeypatch.setenv("GHRA_RATE_WINDOW_SECONDS", "3600") + local_client = _make_client(tmp_path) + first = local_client.post("/api/waitlist", json={"email": "a@b.co"}) + second = local_client.post("/api/waitlist", json={"email": "c@d.co"}) + assert first.status_code == 201 + assert second.status_code == 429 diff --git a/tests/test_serve_hosting.py b/tests/test_serve_hosting.py new file mode 100644 index 0000000..7ceb7d2 --- /dev/null +++ b/tests/test_serve_hosting.py @@ -0,0 +1,162 @@ +"""Tests for src/serve/hosting.py — KV store, report cache, per-IP throttle.""" + +from __future__ import annotations + +import pytest + +from src.serve.hosting import ( + InMemoryKVStore, + RateLimiter, + ReportCache, + build_rate_limiter, + build_report_cache, +) + + +class FakeClock: + """Manually-advanced monotonic clock for deterministic expiry tests.""" + + def __init__(self) -> None: + self.now = 1000.0 + + def __call__(self) -> float: + return self.now + + def advance(self, seconds: float) -> None: + self.now += seconds + + +# --------------------------------------------------------------------------- +# InMemoryKVStore +# --------------------------------------------------------------------------- +def test_kv_set_get_roundtrip() -> None: + store = InMemoryKVStore() + store.set("k", "v", ttl_seconds=60) + assert store.get("k") == "v" + + +def test_kv_missing_key_returns_none() -> None: + assert InMemoryKVStore().get("nope") is None + + +def test_kv_value_expires() -> None: + clock = FakeClock() + store = InMemoryKVStore(clock=clock) + store.set("k", "v", ttl_seconds=30) + clock.advance(29) + assert store.get("k") == "v" + clock.advance(2) # now 31s elapsed, past the 30s TTL + assert store.get("k") is None + + +def test_kv_incr_counts_within_window_then_resets() -> None: + clock = FakeClock() + store = InMemoryKVStore(clock=clock) + assert store.incr("c", ttl_seconds=10) == 1 + assert store.incr("c", ttl_seconds=10) == 2 + assert store.incr("c", ttl_seconds=10) == 3 + clock.advance(11) # window elapsed + assert store.incr("c", ttl_seconds=10) == 1 + + +def test_kv_incr_keeps_original_window() -> None: + clock = FakeClock() + store = InMemoryKVStore(clock=clock) + store.incr("c", ttl_seconds=10) + clock.advance(6) + store.incr("c", ttl_seconds=10) # must NOT extend the window + clock.advance(5) # 11s total — original window has elapsed + assert store.incr("c", ttl_seconds=10) == 1 + + +def test_kv_reaps_expired_counters_past_threshold() -> None: + clock = FakeClock() + store = InMemoryKVStore(clock=clock, reap_threshold=3) + for i in range(4): # 4 distinct keys, each a 10s window + store.incr(f"ip-{i}", ttl_seconds=10) + assert len(store._counters) == 4 # below/at threshold, not yet swept + clock.advance(11) # all four windows expire + store.incr("trigger", ttl_seconds=10) # size > threshold → sweep runs + # The four expired counters are gone; only the fresh "trigger" remains. + assert set(store._counters) == {"trigger"} + + +# --------------------------------------------------------------------------- +# ReportCache +# --------------------------------------------------------------------------- +def test_report_cache_roundtrips_dict() -> None: + cache = ReportCache(InMemoryKVStore(), ttl_seconds=3600) + payload = {"username": "octocat", "repos": [{"x": 1}]} + cache.put("octocat", payload) + assert cache.get("octocat") == payload + + +def test_report_cache_is_case_insensitive() -> None: + cache = ReportCache(InMemoryKVStore(), ttl_seconds=3600) + cache.put("OctoCat", {"username": "OctoCat", "repos": []}) + assert cache.get("octocat") is not None + + +def test_report_cache_miss_returns_none() -> None: + cache = ReportCache(InMemoryKVStore(), ttl_seconds=3600) + assert cache.get("ghost") is None + + +def test_report_cache_disabled_when_ttl_zero() -> None: + cache = ReportCache(InMemoryKVStore(), ttl_seconds=0) + assert cache.enabled is False + cache.put("octocat", {"username": "octocat", "repos": []}) + assert cache.get("octocat") is None + + +# --------------------------------------------------------------------------- +# RateLimiter +# --------------------------------------------------------------------------- +def test_rate_limiter_allows_up_to_limit_then_blocks() -> None: + limiter = RateLimiter(InMemoryKVStore(), limit=3, window_seconds=60) + assert [limiter.allow("1.2.3.4") for _ in range(4)] == [True, True, True, False] + + +def test_rate_limiter_is_per_ip() -> None: + limiter = RateLimiter(InMemoryKVStore(), limit=1, window_seconds=60) + assert limiter.allow("1.1.1.1") is True + assert limiter.allow("2.2.2.2") is True # different IP, own budget + assert limiter.allow("1.1.1.1") is False + + +def test_rate_limiter_window_resets() -> None: + clock = FakeClock() + limiter = RateLimiter(InMemoryKVStore(clock=clock), limit=1, window_seconds=60) + assert limiter.allow("1.1.1.1") is True + assert limiter.allow("1.1.1.1") is False + clock.advance(61) + assert limiter.allow("1.1.1.1") is True + + +def test_rate_limiter_disabled_when_limit_nonpositive() -> None: + limiter = RateLimiter(InMemoryKVStore(), limit=0, window_seconds=60) + assert limiter.enabled is False + assert all(limiter.allow("1.1.1.1") for _ in range(100)) + + +# --------------------------------------------------------------------------- +# Env-driven builders +# --------------------------------------------------------------------------- +def test_builders_read_env(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("GHRA_REPORT_TTL_SECONDS", "120") + monkeypatch.setenv("GHRA_RATE_LIMIT", "5") + monkeypatch.setenv("GHRA_RATE_WINDOW_SECONDS", "30") + store = InMemoryKVStore() + cache = build_report_cache(store) + limiter = build_rate_limiter(store) + cache.put("u", {"username": "u", "repos": []}) + assert cache.get("u") is not None + assert [limiter.allow("ip") for _ in range(6)] == [True] * 5 + [False] + + +def test_builders_fall_back_to_defaults_on_bad_env( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("GHRA_RATE_LIMIT", "not-a-number") + limiter = build_rate_limiter(InMemoryKVStore()) + assert limiter.enabled is True # default 20, not crashed diff --git a/tests/test_serve_waitlist.py b/tests/test_serve_waitlist.py new file mode 100644 index 0000000..f7f2bad --- /dev/null +++ b/tests/test_serve_waitlist.py @@ -0,0 +1,80 @@ +"""Tests for src/serve/waitlist.py — email capture store + validation.""" + +from __future__ import annotations + +import pytest + +from src.serve.waitlist import ( + SqliteWaitlistStore, + build_waitlist_store, + is_valid_email, +) + + +@pytest.mark.parametrize( + "email", ["a@b.co", "first.last@example.com", "dev+tag@sub.domain.io"] +) +def test_valid_emails(email: str) -> None: + assert is_valid_email(email) is True + + +@pytest.mark.parametrize( + "email", + [ + "", + "no-at-sign", + "a@b", + "a b@c.com", + "a@@example.com", + "@example.com", + "a@.com", + "a@example..com", + ], +) +def test_invalid_emails(email: str) -> None: + assert is_valid_email(email) is False + + +def test_email_length_capped() -> None: + assert is_valid_email("a" * 250 + "@example.com") is False + + +def test_store_add_and_count(tmp_path) -> None: + store = SqliteWaitlistStore(str(tmp_path / "wl.db")) + assert store.add("dev@example.com") is True + assert store.count() == 1 + + +def test_store_creates_missing_parent_dir(tmp_path) -> None: + # Parent dir does not exist yet — the store must create it, not crash. + store = SqliteWaitlistStore(str(tmp_path / "nested" / "dir" / "wl.db")) + assert store.add("a@b.co") is True + + +def test_store_dedupes_case_insensitively(tmp_path) -> None: + store = SqliteWaitlistStore(str(tmp_path / "wl.db")) + assert store.add("Dev@Example.com") is True + assert store.add("dev@example.com") is False # same email, normalized + assert store.count() == 1 + + +def test_store_persists_across_instances(tmp_path) -> None: + path = str(tmp_path / "wl.db") + SqliteWaitlistStore(path).add("a@b.co", source="octocat") + # A fresh instance over the same file sees the prior write. + assert SqliteWaitlistStore(path).count() == 1 + + +def test_builder_prefers_env(tmp_path, monkeypatch) -> None: + target = str(tmp_path / "from-env.db") + monkeypatch.setenv("GHRA_WAITLIST_DB", target) + store = build_waitlist_store(default_dir=tmp_path / "ignored") + store.add("a@b.co") + assert (tmp_path / "from-env.db").exists() + + +def test_builder_uses_default_dir(tmp_path, monkeypatch) -> None: + monkeypatch.delenv("GHRA_WAITLIST_DB", raising=False) + store = build_waitlist_store(default_dir=tmp_path) + store.add("a@b.co") + assert (tmp_path / "waitlist.db").exists() diff --git a/uv.lock b/uv.lock index aaf6b4e..91846b7 100644 --- a/uv.lock +++ b/uv.lock @@ -99,6 +99,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/45/19/cc8bd127d28a43da249aa955cfd164cf8fd534e79e42cea96c4854d72fd0/ast_serialize-0.5.0-cp39-abi3-win_arm64.whl", hash = "sha256:92a31c9c20d25a076edaeec76b128a3535d74a24f340b9a8a7e96c9b86dc9642", size = 1081181, upload-time = "2026-05-17T17:48:28.122Z" }, ] +[[package]] +name = "async-timeout" +version = "5.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a5/ae/136395dfbfe00dfc94da3f3e136d0b13f394cba8f4841120e34226265780/async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3", size = 9274, upload-time = "2024-11-06T16:41:39.6Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fe/ba/e2081de779ca30d473f21f5b30e0e737c438205440784c7dfc81efc2b029/async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c", size = 6233, upload-time = "2024-11-06T16:41:37.9Z" }, +] + [[package]] name = "backports-tarfile" version = "1.2.0" @@ -659,6 +668,9 @@ dev = [ { name = "ruff" }, { name = "types-pyyaml" }, ] +hosting = [ + { name = "redis" }, +] semantic = [ { name = "sentence-transformers" }, { name = "sqlite-vec" }, @@ -687,6 +699,7 @@ requires-dist = [ { name = "pyyaml", specifier = ">=6.0" }, { name = "pyyaml", marker = "extra == 'config'", specifier = ">=6.0" }, { name = "radon", specifier = ">=6.0.0" }, + { name = "redis", marker = "extra == 'hosting'", specifier = ">=5.0" }, { name = "requests", specifier = ">=2.31.0" }, { name = "responses", marker = "extra == 'dev'", specifier = ">=0.25" }, { name = "rich", specifier = ">=13.0" }, @@ -698,7 +711,7 @@ requires-dist = [ { name = "types-pyyaml", marker = "extra == 'dev'", specifier = ">=6.0" }, { name = "uvicorn", extras = ["standard"], marker = "extra == 'serve'", specifier = ">=0.30" }, ] -provides-extras = ["dev", "config", "semantic", "serve", "build"] +provides-extras = ["dev", "config", "semantic", "serve", "hosting", "build"] [[package]] name = "h11" @@ -2145,6 +2158,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/97/1b/295bf2fa3e740131778065e5ffa2c481f0e7210182d408e9a2c244ff5b0c/readme_renderer-45.0-py3-none-any.whl", hash = "sha256:3385ed220117104a2bceb4a9dac8c5fdf6d1f96890d7ea2a9c7174fd5c84091f", size = 14134, upload-time = "2026-06-09T21:05:15.85Z" }, ] +[[package]] +name = "redis" +version = "8.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "async-timeout", marker = "python_full_version < '3.11.3'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/53/ae/ed461cca5780b5fc8b9fe8ca0ed98d89508645fb9d880c24cc42c087678f/redis-8.0.0.tar.gz", hash = "sha256:a00c5355432051ac14e593b8b197fc76c887ee12d55a0984f69328a1115fdc49", size = 5101591, upload-time = "2026-05-28T12:45:13.5Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/e3/b519734372d305bd547534a9f32e4ce9f98552af753dce72cf3483a0ff0b/redis-8.0.0-py3-none-any.whl", hash = "sha256:c938c18338585009f0bc310f4c7e4e4b4d37639356c4ac072cedf3af570c8dc7", size = 499870, upload-time = "2026-05-28T12:45:11.697Z" }, +] + [[package]] name = "regex" version = "2026.5.9" diff --git a/web/.gitignore b/web/.gitignore new file mode 100644 index 0000000..6051d15 --- /dev/null +++ b/web/.gitignore @@ -0,0 +1,10 @@ +/node_modules +/.next +/out +/build +next-env.d.ts +*.tsbuildinfo +.env.local +.vercel +.DS_Store +/output diff --git a/web/README.md b/web/README.md new file mode 100644 index 0000000..5eb8360 --- /dev/null +++ b/web/README.md @@ -0,0 +1,39 @@ +# Portfolio Health — web frontend + +Next.js (App Router) paste-a-username frontend for the clone-free portfolio +report. The form calls the FastAPI engine's `GET /api/report/{username}` +endpoint (from `src/serve/api.py`) and renders the result with a "top fixes" +framing: grades, repo health, and the highest-leverage actions per repo. + +## Develop + +Run the Python API and the web app side by side. + +**1. Start the report API** (from the repo root): + +```bash +uv run --extra serve python -m uvicorn --factory src.serve.app:create_app --port 8080 +``` + +**2. Start the frontend** (from `web/`): + +```bash +pnpm install +pnpm dev # http://localhost:3000 +``` + +## Configuration + +| Env var | Default | Purpose | +| ---------------------- | ----------------------- | ---------------------------------------- | +| `NEXT_PUBLIC_API_BASE` | `http://127.0.0.1:8080` | Base URL of the FastAPI report API. | + +The API must allow the frontend origin via CORS (`GHRA_CORS_ORIGINS` on the API +side; defaults already include `http://localhost:3000`). + +## Build + +```bash +pnpm typecheck +pnpm build +``` diff --git a/web/app/globals.css b/web/app/globals.css new file mode 100644 index 0000000..54c603c --- /dev/null +++ b/web/app/globals.css @@ -0,0 +1,468 @@ +:root { + --bg: #0e1014; + --panel: #171a21; + --panel-2: #1f232c; + --ink: #e8eaed; + --ink-dim: #9aa3af; + --ink-faint: #6b7280; + --line: #2a2f3a; + --accent: #7dd3fc; + --accent-ink: #082f49; + + --grade-a: #34d399; + --grade-b: #6ee7b7; + --grade-c: #fbbf24; + --grade-d: #fb923c; + --grade-f: #f87171; + + --radius: 14px; + --mono: var(--font-mono), ui-monospace, "SF Mono", Menlo, monospace; + --sans: var(--font-sans), ui-sans-serif, system-ui, -apple-system, sans-serif; +} + +* { + box-sizing: border-box; +} + +html, +body { + margin: 0; + padding: 0; +} + +body { + background: var(--bg); + color: var(--ink); + font-family: var(--sans); + line-height: 1.55; + -webkit-font-smoothing: antialiased; +} + +a { + color: var(--accent); + text-decoration: none; +} +a:hover { + text-decoration: underline; +} + +.wrap { + max-width: 880px; + margin: 0 auto; + padding: 0 24px; +} + +/* ---- Hero / form ---- */ +.hero { + padding: 72px 0 40px; +} +.eyebrow { + font-family: var(--mono); + font-size: 12px; + letter-spacing: 0.14em; + text-transform: uppercase; + color: var(--accent); + margin: 0 0 14px; +} +.hero h1 { + font-size: clamp(30px, 5vw, 44px); + line-height: 1.08; + letter-spacing: -0.02em; + margin: 0 0 14px; + font-weight: 680; +} +.hero p.lede { + font-size: 17px; + color: var(--ink-dim); + max-width: 60ch; + margin: 0 0 32px; +} + +.form-row { + display: flex; + gap: 10px; + align-items: stretch; + max-width: 520px; +} +.input-shell { + display: flex; + align-items: center; + flex: 1; + background: var(--panel); + border: 1px solid var(--line); + border-radius: var(--radius); + padding: 0 14px; + transition: border-color 0.15s ease; +} +.input-shell:focus-within { + border-color: var(--accent); +} +.input-shell .at { + color: var(--ink-faint); + font-family: var(--mono); + margin-right: 2px; +} +.input-shell input { + flex: 1; + background: transparent; + border: 0; + outline: 0; + color: var(--ink); + font-size: 16px; + font-family: var(--mono); + padding: 14px 0; +} +button.go { + background: var(--accent); + color: var(--accent-ink); + border: 0; + border-radius: var(--radius); + font-weight: 640; + font-size: 15px; + padding: 0 22px; + cursor: pointer; + transition: filter 0.15s ease; +} +button.go:hover:not(:disabled) { + filter: brightness(1.08); +} +button.go:disabled { + opacity: 0.55; + cursor: progress; +} + +.error { + margin-top: 18px; + color: var(--grade-f); + font-size: 15px; +} + +/* ---- Loading ---- */ +.loading { + margin-top: 28px; + color: var(--ink-dim); + font-family: var(--mono); + font-size: 14px; + display: flex; + align-items: center; + gap: 10px; +} +.dot { + width: 8px; + height: 8px; + border-radius: 50%; + background: var(--accent); + animation: pulse 1s ease-in-out infinite; +} +@keyframes pulse { + 0%, + 100% { + opacity: 0.25; + } + 50% { + opacity: 1; + } +} + +/* ---- Report ---- */ +.report-head { + padding: 8px 0 6px; + border-top: 1px solid var(--line); + margin-top: 40px; +} +.report-head h2 { + font-size: 22px; + margin: 22px 0 4px; + letter-spacing: -0.01em; +} +.report-head .sub { + color: var(--ink-dim); + font-size: 14px; +} +.fidelity { + margin: 16px 0 8px; + font-size: 13px; + color: var(--ink-faint); + background: var(--panel); + border: 1px solid var(--line); + border-left: 3px solid var(--accent); + border-radius: 8px; + padding: 12px 14px; +} + +.cards { + display: flex; + flex-direction: column; + gap: 16px; + margin: 24px 0 64px; +} + +.card { + background: var(--panel); + border: 1px solid var(--line); + border-radius: var(--radius); + padding: 20px 22px; +} +.card-top { + display: flex; + align-items: flex-start; + gap: 14px; +} +.grade { + flex: none; + width: 46px; + height: 46px; + border-radius: 11px; + display: grid; + place-items: center; + font-family: var(--mono); + font-weight: 700; + font-size: 20px; + color: #0c0d10; +} +.grade.g-a { + background: var(--grade-a); +} +.grade.g-b { + background: var(--grade-b); +} +.grade.g-c { + background: var(--grade-c); +} +.grade.g-d { + background: var(--grade-d); +} +.grade.g-f { + background: var(--grade-f); +} + +.card-title { + flex: 1; + min-width: 0; +} +.card-title h3 { + margin: 0; + font-size: 17px; + font-family: var(--mono); + font-weight: 600; + word-break: break-word; +} +.card-title .desc { + margin: 4px 0 0; + color: var(--ink-dim); + font-size: 14px; +} +.card-meta { + display: flex; + flex-wrap: wrap; + gap: 8px 14px; + margin-top: 8px; + font-size: 12.5px; + color: var(--ink-faint); + font-family: var(--mono); +} +.score { + flex: none; + text-align: right; + font-family: var(--mono); +} +.score .pct { + font-size: 20px; + font-weight: 680; +} +.score .pct-label { + font-size: 11px; + color: var(--ink-faint); + text-transform: uppercase; + letter-spacing: 0.08em; +} + +.flags { + display: flex; + flex-wrap: wrap; + gap: 6px; + margin-top: 14px; +} +.flag { + font-family: var(--mono); + font-size: 11.5px; + color: var(--ink-dim); + background: var(--panel-2); + border: 1px solid var(--line); + border-radius: 999px; + padding: 3px 9px; +} + +.fixes { + margin-top: 16px; + border-top: 1px dashed var(--line); + padding-top: 14px; +} +.fixes h4 { + margin: 0 0 10px; + font-size: 12px; + text-transform: uppercase; + letter-spacing: 0.1em; + color: var(--accent); + font-family: var(--mono); +} +.fix { + display: flex; + gap: 12px; + padding: 9px 0; +} +.fix + .fix { + border-top: 1px solid var(--line); +} +.fix-rank { + flex: none; + font-family: var(--mono); + color: var(--ink-faint); + font-size: 13px; + width: 18px; + padding-top: 1px; +} +.fix-body { + flex: 1; +} +.fix-body .fix-title { + font-weight: 600; + font-size: 14.5px; +} +.fix-body .fix-action { + color: var(--ink-dim); + font-size: 14px; + margin-top: 2px; +} +.fix-tags { + display: flex; + gap: 10px; + margin-top: 6px; + font-family: var(--mono); + font-size: 11.5px; + color: var(--ink-faint); +} +.fix-tags .impact { + color: var(--grade-a); +} +.clean { + color: var(--ink-dim); + font-size: 14px; + margin-top: 14px; +} + +/* ---- CTA / footer ---- */ +.cta { + border-top: 1px solid var(--line); + margin: 8px 0 80px; + padding-top: 24px; + display: flex; + flex-wrap: wrap; + gap: 14px 28px; + align-items: baseline; +} +.cta .label { + font-size: 14px; + color: var(--ink-dim); +} +.cta code { + font-family: var(--mono); + background: var(--panel); + border: 1px solid var(--line); + border-radius: 7px; + padding: 3px 8px; + font-size: 13px; + color: var(--ink); +} + +.foot { + color: var(--ink-faint); + font-size: 13px; + padding: 24px 0 60px; + border-top: 1px solid var(--line); +} + +/* ---- Report nav (shareable page) ---- */ +.report-nav { + padding: 32px 0 0; +} +.report-nav .back { + font-family: var(--mono); + font-size: 13px; + color: var(--ink-dim); +} +.report-nav .back:hover { + color: var(--accent); + text-decoration: none; +} + +/* ---- Waitlist capture ---- */ +.waitlist { + margin: 0 0 80px; + padding: 22px 24px; + background: linear-gradient(180deg, var(--panel) 0%, var(--panel-2) 100%); + border: 1px solid var(--line); + border-radius: var(--radius); +} +.waitlist-copy { + font-size: 15px; + color: var(--ink-dim); + max-width: 56ch; + margin-bottom: 14px; +} +.waitlist-copy strong { + color: var(--ink); + font-weight: 600; +} +.waitlist-row { + display: flex; + gap: 10px; + max-width: 480px; +} +.waitlist-row input { + flex: 1; + background: var(--bg); + border: 1px solid var(--line); + border-radius: var(--radius); + padding: 13px 14px; + color: var(--ink); + font-size: 15px; + font-family: var(--sans); + outline: 0; + transition: border-color 0.15s ease; +} +.waitlist-row input:focus { + border-color: var(--accent); +} +.waitlist-row button { + background: transparent; + color: var(--accent); + border: 1px solid var(--accent); + border-radius: var(--radius); + font-family: var(--sans); + font-weight: 600; + font-size: 15px; + padding: 0 20px; + cursor: pointer; + transition: + background 0.15s ease, + color 0.15s ease; +} +.waitlist-row button:hover:not(:disabled) { + background: var(--accent); + color: var(--accent-ink); +} +.waitlist-row button:disabled { + opacity: 0.55; + cursor: progress; +} +.waitlist-done { + margin: 0 0 80px; + padding: 20px 24px; + background: var(--panel); + border: 1px solid var(--line); + border-left: 3px solid var(--grade-a); + border-radius: var(--radius); + color: var(--ink); + font-size: 15px; +} diff --git a/web/app/layout.tsx b/web/app/layout.tsx new file mode 100644 index 0000000..7141df8 --- /dev/null +++ b/web/app/layout.tsx @@ -0,0 +1,35 @@ +import type { Metadata } from "next"; +import { JetBrains_Mono, Space_Grotesk } from "next/font/google"; +import "./globals.css"; + +const sans = Space_Grotesk({ + subsets: ["latin"], + weight: ["300", "400", "500", "700"], + variable: "--font-sans", + display: "swap", +}); + +const mono = JetBrains_Mono({ + subsets: ["latin"], + weight: ["400", "500", "700"], + variable: "--font-mono", + display: "swap", +}); + +export const metadata: Metadata = { + title: "Portfolio Health — clone-free GitHub report", + description: + "Paste a GitHub username and get a clone-free portfolio health report: grades, the biggest drags, and the concrete fixes that move each repo forward.", +}; + +export default function RootLayout({ + children, +}: { + children: React.ReactNode; +}) { + return ( + + {children} + + ); +} diff --git a/web/app/page.tsx b/web/app/page.tsx new file mode 100644 index 0000000..11a6f4d --- /dev/null +++ b/web/app/page.tsx @@ -0,0 +1,24 @@ +import UsernameForm from "@/components/UsernameForm"; + +export default function Home() { + return ( +
+
+

Portfolio health · clone-free

+

See what your GitHub portfolio is missing.

+

+ Paste a username. We read the GitHub API — no cloning — score every + public repo, and surface the highest-leverage fixes for each one. + Free, no signup. +

+ +
+ +
+ Scored from public API metadata and repository structure. Deep + code-quality, secret-scanning, and dependency-age signals need the full + local scan (the open-source CLI). +
+
+ ); +} diff --git a/web/app/u/[username]/page.tsx b/web/app/u/[username]/page.tsx new file mode 100644 index 0000000..a2fc82e --- /dev/null +++ b/web/app/u/[username]/page.tsx @@ -0,0 +1,22 @@ +import Link from "next/link"; +import ReportLoader from "@/components/ReportLoader"; + +export default async function UserReport({ + params, +}: { + params: Promise<{ username: string }>; +}) { + // Next.js already URL-decodes dynamic segment values. + const { username } = await params; + + return ( +
+ + +
+ ); +} diff --git a/web/components/RepoCard.tsx b/web/components/RepoCard.tsx new file mode 100644 index 0000000..2a2e612 --- /dev/null +++ b/web/components/RepoCard.tsx @@ -0,0 +1,97 @@ +import type { RepoAudit } from "@/lib/types"; +import { safeHttpUrl } from "@/lib/url"; + +function gradeClass(grade: string): string { + const g = grade.trim().toUpperCase().charAt(0); + return ["A", "B", "C", "D", "F"].includes(g) ? `g-${g.toLowerCase()}` : "g-f"; +} + +function pct(score01: number): string { + return `${Math.round(score01 * 100)}%`; +} + +function impactLabel(delta: number): string { + return `+${Math.round(delta * 100)} pts`; +} + +export default function RepoCard({ repo }: { repo: RepoAudit }) { + const { metadata, grade, overall_score, flags, action_candidates } = repo; + // Lead with fixes, not the number: the top 3 highest-leverage actions. + const fixes = action_candidates.slice(0, 3); + const repoUrl = safeHttpUrl(metadata.html_url); + + return ( +
+
+
+ {grade.charAt(0)} +
+ +
+

+ {repoUrl ? ( + + {metadata.name} + + ) : ( + metadata.name + )} +

+ {metadata.description && ( +

{metadata.description}

+ )} +
+ {metadata.language && {metadata.language}} + ★ {metadata.stars} + {metadata.fork && fork} + {metadata.archived && archived} +
+
+ +
+
{pct(overall_score)}
+
health
+
+
+ + {flags.length > 0 && ( +
+ {flags.map((flag) => ( + + {flag} + + ))} +
+ )} + + {fixes.length > 0 ? ( +
+

Top fixes

+ {fixes.map((fix, i) => ( +
+
{i + 1}
+
+
{fix.title}
+
{fix.action}
+
+ + {impactLabel(fix.expected_lens_delta)} + + {fix.effort} effort + {fix.expected_tier_movement} +
+
+
+ ))} +
+ ) : ( +

+ No high-leverage fixes flagged — this one is in good shape. +

+ )} +
+ ); +} diff --git a/web/components/ReportLoader.tsx b/web/components/ReportLoader.tsx new file mode 100644 index 0000000..05b6aa8 --- /dev/null +++ b/web/components/ReportLoader.tsx @@ -0,0 +1,52 @@ +"use client"; + +import { useEffect, useState } from "react"; +import { fetchReport, ReportError } from "@/lib/api"; +import type { Report } from "@/lib/types"; +import ReportView from "./ReportView"; + +type State = + | { phase: "loading" } + | { phase: "done"; report: Report } + | { phase: "error"; message: string }; + +/** Fetches and renders the report for `username` (client-side, with loading UX). */ +export default function ReportLoader({ username }: { username: string }) { + const [state, setState] = useState({ phase: "loading" }); + + useEffect(() => { + const controller = new AbortController(); + setState({ phase: "loading" }); + fetchReport(username, controller.signal) + .then((report) => setState({ phase: "done", report })) + .catch((err: unknown) => { + // Aborted on unmount / username change — drop it silently. + if (controller.signal.aborted) return; + let message = "Something went wrong. Try again."; + if (err instanceof ReportError) { + message = err.message; + } else { + console.error("Unexpected error fetching report:", err); + } + setState({ phase: "error", message }); + }); + return () => controller.abort(); + }, [username]); + + if (state.phase === "loading") { + return ( +
+ + Scoring @{username} from the GitHub API — no cloning, ~10s… +
+ ); + } + if (state.phase === "error") { + return ( +
+ {state.message} +
+ ); + } + return ; +} diff --git a/web/components/ReportView.tsx b/web/components/ReportView.tsx new file mode 100644 index 0000000..d39ec93 --- /dev/null +++ b/web/components/ReportView.tsx @@ -0,0 +1,46 @@ +import type { Report } from "@/lib/types"; +import RepoCard from "./RepoCard"; +import WaitlistForm from "./WaitlistForm"; + +export default function ReportView({ report }: { report: Report }) { + // Worst-graded first: the report's job is to point at what to fix. + const repos = [...report.repos].sort( + (a, b) => a.overall_score - b.overall_score, + ); + + return ( +
+
+

+ + @{report.username} + +

+
+ {report.repo_count} {report.repo_count === 1 ? "repo" : "repos"}{" "} + scored clone-free +
+

{report.fidelity_note}

+
+ +
+ {repos.map((repo) => ( + + ))} +
+ +
+ + Want the deep scan — code quality, secrets, dependency age? + + pipx run github-repo-auditor audit {report.username} +
+ + +
+ ); +} diff --git a/web/components/UsernameForm.tsx b/web/components/UsernameForm.tsx new file mode 100644 index 0000000..94d21c1 --- /dev/null +++ b/web/components/UsernameForm.tsx @@ -0,0 +1,50 @@ +"use client"; + +import { useRouter } from "next/navigation"; +import { type FormEvent, useState, useTransition } from "react"; + +/** Username input that routes to the shareable report URL on submit. */ +export default function UsernameForm({ + initial = "", + cta = "Score it", +}: { + initial?: string; + cta?: string; +}) { + const [username, setUsername] = useState(initial); + // useTransition's pending flag tracks the navigation and resets itself, so + // the button never gets stuck disabled if the push is cancelled. + const [submitting, startTransition] = useTransition(); + const router = useRouter(); + + function onSubmit(e: FormEvent) { + e.preventDefault(); + const handle = username.trim().replace(/^@/, ""); + if (!handle) return; + startTransition(() => { + router.push(`/u/${encodeURIComponent(handle)}`); + }); + } + + return ( +
+ + +
+ ); +} diff --git a/web/components/WaitlistForm.tsx b/web/components/WaitlistForm.tsx new file mode 100644 index 0000000..f5b2c3d --- /dev/null +++ b/web/components/WaitlistForm.tsx @@ -0,0 +1,72 @@ +"use client"; + +import { type FormEvent, useState } from "react"; +import { joinWaitlist, ReportError } from "@/lib/api"; + +type State = + | { phase: "idle" } + | { phase: "submitting" } + | { phase: "done"; already: boolean } + | { phase: "error"; message: string }; + +/** Email capture for the monitoring waitlist — the "earn the tier" signal. */ +export default function WaitlistForm({ source }: { source?: string }) { + const [email, setEmail] = useState(""); + const [state, setState] = useState({ phase: "idle" }); + + async function onSubmit(e: FormEvent) { + e.preventDefault(); + const value = email.trim(); + if (!value || state.phase === "submitting") return; + setState({ phase: "submitting" }); + try { + const result = await joinWaitlist(value, source); + setState({ phase: "done", already: result === "already_joined" }); + } catch (err: unknown) { + const message = + err instanceof ReportError ? err.message : "Something went wrong."; + if (!(err instanceof ReportError)) console.error(err); + setState({ phase: "error", message }); + } + } + + if (state.phase === "done") { + return ( +

+ {state.already + ? "You're already on the list — we'll be in touch." + : "You're on the list. We'll email you when monitoring ships."} +

+ ); + } + + const submitting = state.phase === "submitting"; + return ( +
+
+ Want this tracked over time? Get notified when + portfolio monitoring + trend alerts ship. +
+
+ setEmail(e.target.value)} + aria-label="Email for the monitoring waitlist" + required + /> + +
+ {state.phase === "error" && ( +
+ {state.message} +
+ )} +
+ ); +} diff --git a/web/lib/api.ts b/web/lib/api.ts new file mode 100644 index 0000000..d143b07 --- /dev/null +++ b/web/lib/api.ts @@ -0,0 +1,104 @@ +import type { Report } from "./types"; + +const API_BASE = ( + process.env.NEXT_PUBLIC_API_BASE ?? "http://127.0.0.1:8080" +).replace(/\/$/, ""); + +export class ReportError extends Error { + constructor( + message: string, + readonly status: number, + ) { + super(message); + this.name = "ReportError"; + } +} + +/** Fetch a clone-free portfolio report for `username` from the FastAPI engine. */ +export async function fetchReport( + username: string, + signal?: AbortSignal, +): Promise { + const url = `${API_BASE}/api/report/${encodeURIComponent(username)}`; + + let resp: Response; + try { + resp = await fetch(url, { + headers: { Accept: "application/json" }, + signal, + }); + } catch (err) { + // A caller-initiated abort isn't an error to surface — rethrow it so the + // effect cleanup can swallow it rather than showing a failure message. + if (err instanceof DOMException && err.name === "AbortError") throw err; + throw new ReportError( + "Could not reach the report service. Is the API running?", + 0, + ); + } + + if (!resp.ok) { + throw new ReportError(messageForStatus(resp.status, username), resp.status); + } + + const data: unknown = await resp.json(); + if (!isReport(data)) { + throw new ReportError( + "The report service returned an unexpected response.", + 502, + ); + } + return data; +} + +export type WaitlistResult = "joined" | "already_joined"; + +/** Submit an email to the monitoring waitlist. `source` is optional context. */ +export async function joinWaitlist( + email: string, + source?: string, +): Promise { + let resp: Response; + try { + resp = await fetch(`${API_BASE}/api/waitlist`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ email, source }), + }); + } catch { + throw new ReportError("Could not reach the service. Try again.", 0); + } + if (!resp.ok) { + if (resp.status === 422) { + throw new ReportError("Enter a valid email address.", 422); + } + if (resp.status === 429) { + throw new ReportError("Too many requests — try again shortly.", 429); + } + throw new ReportError("Something went wrong. Try again.", resp.status); + } + const data = (await resp.json()) as { status?: string }; + return data.status === "already_joined" ? "already_joined" : "joined"; +} + +/** Minimal boundary check that the payload has the shape we render. */ +function isReport(value: unknown): value is Report { + if (typeof value !== "object" || value === null) return false; + const v = value as Record; + return typeof v.username === "string" && Array.isArray(v.repos); +} + +function messageForStatus(status: number, username: string): string { + switch (status) { + case 404: + return `No GitHub user named "${username}" was found.`; + case 422: + return "That doesn't look like a valid GitHub username."; + case 429: + return "We're being rate-limited by GitHub right now. Try again in a minute."; + case 502: + return "GitHub is having a moment. Try again shortly."; + default: + return `Something went wrong (HTTP ${status}).`; + } +} diff --git a/web/lib/types.ts b/web/lib/types.ts new file mode 100644 index 0000000..fc91d22 --- /dev/null +++ b/web/lib/types.ts @@ -0,0 +1,53 @@ +// Mirrors the JSON shape emitted by the Python engine's +// `ApiOnlyReport.to_dict()` / `RepoAudit.to_dict()` (src/api_only.py, src/models.py). +// Only the fields the UI renders are typed; the payload carries more. + +export interface RepoMetadata { + name: string; + full_name: string; + description: string | null; + language: string | null; + html_url: string; + stars: number; + forks: number; + archived: boolean; + fork: boolean; + pushed_at: string | null; +} + +export interface AnalyzerResult { + dimension: string; + score: number; + max_score: number; +} + +// One entry of `action_candidates` — the engine's ranked, concrete fixes. +export interface ActionCandidate { + key: string; + title: string; + action: string; + lens: string; + effort: string; + confidence: number; + expected_lens_delta: number; + expected_tier_movement: string; + rationale: string; +} + +export interface RepoAudit { + metadata: RepoMetadata; + analyzer_results: AnalyzerResult[]; + overall_score: number; // 0..1 + grade: string; + completeness_tier: string; + flags: string[]; + action_candidates: ActionCandidate[]; +} + +export interface Report { + username: string; + mode: string; + fidelity_note: string; + repo_count: number; + repos: RepoAudit[]; +} diff --git a/web/lib/url.ts b/web/lib/url.ts new file mode 100644 index 0000000..be8dd2c --- /dev/null +++ b/web/lib/url.ts @@ -0,0 +1,19 @@ +/** Return `url` only if it is a safe http(s) link, else undefined. + * + * Report strings (e.g. a repo's html_url) come from the GitHub API via the + * relay; an unexpected `javascript:`/`data:` scheme would otherwise become an + * href-based XSS vector. Callers render plain text when this returns undefined. + */ +export function safeHttpUrl( + url: string | null | undefined, +): string | undefined { + if (!url) return undefined; + try { + const parsed = new URL(url); + return parsed.protocol === "https:" || parsed.protocol === "http:" + ? url + : undefined; + } catch { + return undefined; + } +} diff --git a/web/next.config.mjs b/web/next.config.mjs new file mode 100644 index 0000000..6de7fac --- /dev/null +++ b/web/next.config.mjs @@ -0,0 +1,9 @@ +/** @type {import('next').NextConfig} */ +const nextConfig = { + reactStrictMode: true, + // Pin the tracing root to this app — the repo has sibling lockfiles that + // otherwise make Next infer the wrong workspace root (and warn on every boot). + outputFileTracingRoot: import.meta.dirname, +}; + +export default nextConfig; diff --git a/web/package.json b/web/package.json new file mode 100644 index 0000000..7f2c0a6 --- /dev/null +++ b/web/package.json @@ -0,0 +1,23 @@ +{ + "name": "ghra-web", + "version": "0.1.0", + "private": true, + "scripts": { + "dev": "next dev", + "build": "next build", + "start": "next start", + "lint": "next lint", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "next": "^15.5.0", + "react": "^19.1.0", + "react-dom": "^19.1.0" + }, + "devDependencies": { + "@types/node": "^22.10.0", + "@types/react": "^19.1.0", + "@types/react-dom": "^19.1.0", + "typescript": "^5.7.0" + } +} diff --git a/web/pnpm-lock.yaml b/web/pnpm-lock.yaml new file mode 100644 index 0000000..21bd0b0 --- /dev/null +++ b/web/pnpm-lock.yaml @@ -0,0 +1,599 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + dependencies: + next: + specifier: ^15.5.0 + version: 15.5.19(react-dom@19.2.7(react@19.2.7))(react@19.2.7) + react: + specifier: ^19.1.0 + version: 19.2.7 + react-dom: + specifier: ^19.1.0 + version: 19.2.7(react@19.2.7) + devDependencies: + '@types/node': + specifier: ^22.10.0 + version: 22.19.21 + '@types/react': + specifier: ^19.1.0 + version: 19.2.17 + '@types/react-dom': + specifier: ^19.1.0 + version: 19.2.3(@types/react@19.2.17) + typescript: + specifier: ^5.7.0 + version: 5.9.3 + +packages: + + '@emnapi/runtime@1.11.1': + resolution: {integrity: sha512-vgj7R3y3Wgx24IQaGPA/R6YFXLHVMOZ0uVEyIQPaWs+rd1AzfEMXlAC22FYwO1XkKR6NPsq7mUandH8oIRdZFw==} + + '@img/colour@1.1.0': + resolution: {integrity: sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ==} + engines: {node: '>=18'} + + '@img/sharp-darwin-arm64@0.34.5': + resolution: {integrity: sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [darwin] + + '@img/sharp-darwin-x64@0.34.5': + resolution: {integrity: sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [darwin] + + '@img/sharp-libvips-darwin-arm64@1.2.4': + resolution: {integrity: sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==} + cpu: [arm64] + os: [darwin] + + '@img/sharp-libvips-darwin-x64@1.2.4': + resolution: {integrity: sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==} + cpu: [x64] + os: [darwin] + + '@img/sharp-libvips-linux-arm64@1.2.4': + resolution: {integrity: sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==} + cpu: [arm64] + os: [linux] + libc: [glibc] + + '@img/sharp-libvips-linux-arm@1.2.4': + resolution: {integrity: sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==} + cpu: [arm] + os: [linux] + libc: [glibc] + + '@img/sharp-libvips-linux-ppc64@1.2.4': + resolution: {integrity: sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==} + cpu: [ppc64] + os: [linux] + libc: [glibc] + + '@img/sharp-libvips-linux-riscv64@1.2.4': + resolution: {integrity: sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==} + cpu: [riscv64] + os: [linux] + libc: [glibc] + + '@img/sharp-libvips-linux-s390x@1.2.4': + resolution: {integrity: sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==} + cpu: [s390x] + os: [linux] + libc: [glibc] + + '@img/sharp-libvips-linux-x64@1.2.4': + resolution: {integrity: sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==} + cpu: [x64] + os: [linux] + libc: [glibc] + + '@img/sharp-libvips-linuxmusl-arm64@1.2.4': + resolution: {integrity: sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==} + cpu: [arm64] + os: [linux] + libc: [musl] + + '@img/sharp-libvips-linuxmusl-x64@1.2.4': + resolution: {integrity: sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==} + cpu: [x64] + os: [linux] + libc: [musl] + + '@img/sharp-linux-arm64@0.34.5': + resolution: {integrity: sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [linux] + libc: [glibc] + + '@img/sharp-linux-arm@0.34.5': + resolution: {integrity: sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm] + os: [linux] + libc: [glibc] + + '@img/sharp-linux-ppc64@0.34.5': + resolution: {integrity: sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [ppc64] + os: [linux] + libc: [glibc] + + '@img/sharp-linux-riscv64@0.34.5': + resolution: {integrity: sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [riscv64] + os: [linux] + libc: [glibc] + + '@img/sharp-linux-s390x@0.34.5': + resolution: {integrity: sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [s390x] + os: [linux] + libc: [glibc] + + '@img/sharp-linux-x64@0.34.5': + resolution: {integrity: sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [linux] + libc: [glibc] + + '@img/sharp-linuxmusl-arm64@0.34.5': + resolution: {integrity: sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [linux] + libc: [musl] + + '@img/sharp-linuxmusl-x64@0.34.5': + resolution: {integrity: sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [linux] + libc: [musl] + + '@img/sharp-wasm32@0.34.5': + resolution: {integrity: sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [wasm32] + + '@img/sharp-win32-arm64@0.34.5': + resolution: {integrity: sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [win32] + + '@img/sharp-win32-ia32@0.34.5': + resolution: {integrity: sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [ia32] + os: [win32] + + '@img/sharp-win32-x64@0.34.5': + resolution: {integrity: sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [win32] + + '@next/env@15.5.19': + resolution: {integrity: sha512-sWWluFvcv5v3Fxznmf2ZfjyoVQt/64oCnYqS90inQWGzMPK1VjvekPiz3OPHKmFT30EnHrjlbyaHLt3M0vWabw==} + + '@next/swc-darwin-arm64@15.5.19': + resolution: {integrity: sha512-jx9wWlTKueHKPvVOndyr7WuaevWCkuYqsQ8gC0TMPKAVWG3MhcdMrjfo9tvIZNXd0QOUYXXvAcZ325y8Uq7uzg==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [darwin] + + '@next/swc-darwin-x64@15.5.19': + resolution: {integrity: sha512-291KFcsIQ3OenRdiUDFOR6W3wezzH4auENXm1gbm1Bjd4ANMMRgxPrWTUztQN43BnVoVuMnHCrLeECIMwgFKbA==} + engines: {node: '>= 10'} + cpu: [x64] + os: [darwin] + + '@next/swc-linux-arm64-gnu@15.5.19': + resolution: {integrity: sha512-WeH+nelQyyMeE2f8FxBRZNrGipya5zHZV2vjzfCOAYyiI6am+NbnWAAldOBFQBB2w0DjJcsvrKqoFT2b7+5YoA==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [linux] + libc: [glibc] + + '@next/swc-linux-arm64-musl@15.5.19': + resolution: {integrity: sha512-5xTOE0lDlDCSSfp+BAif7j17VRRCjWp//ZPZy6NI0QpdrhxtQnsZguSx0xAAZ0c9XZLrLLwCe/XVe5YPrRilKw==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [linux] + libc: [musl] + + '@next/swc-linux-x64-gnu@15.5.19': + resolution: {integrity: sha512-LTxRmMgqqMv05Had879W00Fm53quiJd3Zuz8h1JSNJ3nGSlbZ/7Tjs1tKyScgN3Au3t3MyPsjPlq60fMmSHLsg==} + engines: {node: '>= 10'} + cpu: [x64] + os: [linux] + libc: [glibc] + + '@next/swc-linux-x64-musl@15.5.19': + resolution: {integrity: sha512-eoNQSpA5PQfB9wBO4RA47MTDXWz1fizy9Y3Z6e4DetYIF3dvjuu8sj7aIGn/bFCU6lnFzTK34NtCaffP4NsQ7Q==} + engines: {node: '>= 10'} + cpu: [x64] + os: [linux] + libc: [musl] + + '@next/swc-win32-arm64-msvc@15.5.19': + resolution: {integrity: sha512-6UNt2dFuCHOe446sm/Kp69nUe8/wIhnh9bm6Xcqw4qEWCOppLMOvhTBVgvM7invVUNr4SPpP6NOQsACtn2IN9Q==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [win32] + + '@next/swc-win32-x64-msvc@15.5.19': + resolution: {integrity: sha512-PhmojAHyqMne56HBLGu9dhDnHPuFmEjrXSQMM/nW0J6j849lk3ESrVtqNJcCk8CKOV7brpTTbaYAjwKPzKM69w==} + engines: {node: '>= 10'} + cpu: [x64] + os: [win32] + + '@swc/helpers@0.5.15': + resolution: {integrity: sha512-JQ5TuMi45Owi4/BIMAJBoSQoOJu12oOk/gADqlcUL9JEdHB8vyjUSsxqeNXnmXHjYKMi2WcYtezGEEhqUI/E2g==} + + '@types/node@22.19.21': + resolution: {integrity: sha512-VMeFBSCKQKmm2swI2kW51SFusDqekC6q9trBCvJ/JliDchFSuoYYKN7yVNjPthP1HKZcx3U1gI/wTcEBjEFKTA==} + + '@types/react-dom@19.2.3': + resolution: {integrity: sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ==} + peerDependencies: + '@types/react': ^19.2.0 + + '@types/react@19.2.17': + resolution: {integrity: sha512-MXfmqaVPEVgkBT/aY0aGCkRWWtByiYQXo3xdQ8r5RzuFrPiRn8Gar2tQdXSUQ2GKV3bkXckek89V8wQBY2Q/Aw==} + + caniuse-lite@1.0.30001799: + resolution: {integrity: sha512-hG1bReV+OUU+MOqK4t/ZWI0tZOyz3rqS9XuhOUz1cIcbwBKjOyJEJuw9ER5JuNyqxNk8u/JUVbGibBOL1yrjFw==} + + client-only@0.0.1: + resolution: {integrity: sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==} + + csstype@3.2.3: + resolution: {integrity: sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==} + + detect-libc@2.1.2: + resolution: {integrity: sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==} + engines: {node: '>=8'} + + nanoid@3.3.13: + resolution: {integrity: sha512-sPdqC6ByMVVGvF1ynvvMo0/o+oD1VX7DaHhijt1bFgjvBkHBib4t49GoNDhf2NDta4oeUNlaGbSt5K7qjZ955Q==} + engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} + hasBin: true + + next@15.5.19: + resolution: {integrity: sha512-xNOW6tYshGX1/Oi3F8uuk4gpDeWsSUE/1Z0G5uUMekIxaQ0xc03UXd9II0VQHYMWviMeA0OHpJFAKsHf8bTYVg==} + engines: {node: ^18.18.0 || ^19.8.0 || >= 20.0.0} + hasBin: true + peerDependencies: + '@opentelemetry/api': ^1.1.0 + '@playwright/test': ^1.51.1 + babel-plugin-react-compiler: '*' + react: ^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0 + react-dom: ^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0 + sass: ^1.3.0 + peerDependenciesMeta: + '@opentelemetry/api': + optional: true + '@playwright/test': + optional: true + babel-plugin-react-compiler: + optional: true + sass: + optional: true + + picocolors@1.1.1: + resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==} + + postcss@8.4.31: + resolution: {integrity: sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ==} + engines: {node: ^10 || ^12 || >=14} + + react-dom@19.2.7: + resolution: {integrity: sha512-t0BRVXvbiE/o20Hfw669rLbMCDWtYZLvmJigy2f0MxsXF+71pxhR3xOkspmsO8h3ZlNzyibAmtCa3l4lYKk6gQ==} + peerDependencies: + react: ^19.2.7 + + react@19.2.7: + resolution: {integrity: sha512-HNe9WslTbXmFK8o8cmwgAeJFSBvt1bPdHCVKtaaV+WlAN36mpT4hcRpwbf3fY56ar2oIXzsBpOAiIRHAdY0OlQ==} + engines: {node: '>=0.10.0'} + + scheduler@0.27.0: + resolution: {integrity: sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==} + + semver@7.8.4: + resolution: {integrity: sha512-rUCObTnP32Q08R2uuIrt7r9PlEonuTmtuXYcW6s5kjdlj3xbnwe+21yXptAUYcMAABLkYYTtnmzb3w3EDZfueA==} + engines: {node: '>=10'} + hasBin: true + + sharp@0.34.5: + resolution: {integrity: sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + + source-map-js@1.2.1: + resolution: {integrity: sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==} + engines: {node: '>=0.10.0'} + + styled-jsx@5.1.6: + resolution: {integrity: sha512-qSVyDTeMotdvQYoHWLNGwRFJHC+i+ZvdBRYosOFgC+Wg1vx4frN2/RG/NA7SYqqvKNLf39P2LSRA2pu6n0XYZA==} + engines: {node: '>= 12.0.0'} + peerDependencies: + '@babel/core': '*' + babel-plugin-macros: '*' + react: '>= 16.8.0 || 17.x.x || ^18.0.0-0 || ^19.0.0-0' + peerDependenciesMeta: + '@babel/core': + optional: true + babel-plugin-macros: + optional: true + + tslib@2.8.1: + resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==} + + typescript@5.9.3: + resolution: {integrity: sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==} + engines: {node: '>=14.17'} + hasBin: true + + undici-types@6.21.0: + resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==} + +snapshots: + + '@emnapi/runtime@1.11.1': + dependencies: + tslib: 2.8.1 + optional: true + + '@img/colour@1.1.0': + optional: true + + '@img/sharp-darwin-arm64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-darwin-arm64': 1.2.4 + optional: true + + '@img/sharp-darwin-x64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-darwin-x64': 1.2.4 + optional: true + + '@img/sharp-libvips-darwin-arm64@1.2.4': + optional: true + + '@img/sharp-libvips-darwin-x64@1.2.4': + optional: true + + '@img/sharp-libvips-linux-arm64@1.2.4': + optional: true + + '@img/sharp-libvips-linux-arm@1.2.4': + optional: true + + '@img/sharp-libvips-linux-ppc64@1.2.4': + optional: true + + '@img/sharp-libvips-linux-riscv64@1.2.4': + optional: true + + '@img/sharp-libvips-linux-s390x@1.2.4': + optional: true + + '@img/sharp-libvips-linux-x64@1.2.4': + optional: true + + '@img/sharp-libvips-linuxmusl-arm64@1.2.4': + optional: true + + '@img/sharp-libvips-linuxmusl-x64@1.2.4': + optional: true + + '@img/sharp-linux-arm64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-arm64': 1.2.4 + optional: true + + '@img/sharp-linux-arm@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-arm': 1.2.4 + optional: true + + '@img/sharp-linux-ppc64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-ppc64': 1.2.4 + optional: true + + '@img/sharp-linux-riscv64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-riscv64': 1.2.4 + optional: true + + '@img/sharp-linux-s390x@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-s390x': 1.2.4 + optional: true + + '@img/sharp-linux-x64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-x64': 1.2.4 + optional: true + + '@img/sharp-linuxmusl-arm64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linuxmusl-arm64': 1.2.4 + optional: true + + '@img/sharp-linuxmusl-x64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linuxmusl-x64': 1.2.4 + optional: true + + '@img/sharp-wasm32@0.34.5': + dependencies: + '@emnapi/runtime': 1.11.1 + optional: true + + '@img/sharp-win32-arm64@0.34.5': + optional: true + + '@img/sharp-win32-ia32@0.34.5': + optional: true + + '@img/sharp-win32-x64@0.34.5': + optional: true + + '@next/env@15.5.19': {} + + '@next/swc-darwin-arm64@15.5.19': + optional: true + + '@next/swc-darwin-x64@15.5.19': + optional: true + + '@next/swc-linux-arm64-gnu@15.5.19': + optional: true + + '@next/swc-linux-arm64-musl@15.5.19': + optional: true + + '@next/swc-linux-x64-gnu@15.5.19': + optional: true + + '@next/swc-linux-x64-musl@15.5.19': + optional: true + + '@next/swc-win32-arm64-msvc@15.5.19': + optional: true + + '@next/swc-win32-x64-msvc@15.5.19': + optional: true + + '@swc/helpers@0.5.15': + dependencies: + tslib: 2.8.1 + + '@types/node@22.19.21': + dependencies: + undici-types: 6.21.0 + + '@types/react-dom@19.2.3(@types/react@19.2.17)': + dependencies: + '@types/react': 19.2.17 + + '@types/react@19.2.17': + dependencies: + csstype: 3.2.3 + + caniuse-lite@1.0.30001799: {} + + client-only@0.0.1: {} + + csstype@3.2.3: {} + + detect-libc@2.1.2: + optional: true + + nanoid@3.3.13: {} + + next@15.5.19(react-dom@19.2.7(react@19.2.7))(react@19.2.7): + dependencies: + '@next/env': 15.5.19 + '@swc/helpers': 0.5.15 + caniuse-lite: 1.0.30001799 + postcss: 8.4.31 + react: 19.2.7 + react-dom: 19.2.7(react@19.2.7) + styled-jsx: 5.1.6(react@19.2.7) + optionalDependencies: + '@next/swc-darwin-arm64': 15.5.19 + '@next/swc-darwin-x64': 15.5.19 + '@next/swc-linux-arm64-gnu': 15.5.19 + '@next/swc-linux-arm64-musl': 15.5.19 + '@next/swc-linux-x64-gnu': 15.5.19 + '@next/swc-linux-x64-musl': 15.5.19 + '@next/swc-win32-arm64-msvc': 15.5.19 + '@next/swc-win32-x64-msvc': 15.5.19 + sharp: 0.34.5 + transitivePeerDependencies: + - '@babel/core' + - babel-plugin-macros + + picocolors@1.1.1: {} + + postcss@8.4.31: + dependencies: + nanoid: 3.3.13 + picocolors: 1.1.1 + source-map-js: 1.2.1 + + react-dom@19.2.7(react@19.2.7): + dependencies: + react: 19.2.7 + scheduler: 0.27.0 + + react@19.2.7: {} + + scheduler@0.27.0: {} + + semver@7.8.4: + optional: true + + sharp@0.34.5: + dependencies: + '@img/colour': 1.1.0 + detect-libc: 2.1.2 + semver: 7.8.4 + optionalDependencies: + '@img/sharp-darwin-arm64': 0.34.5 + '@img/sharp-darwin-x64': 0.34.5 + '@img/sharp-libvips-darwin-arm64': 1.2.4 + '@img/sharp-libvips-darwin-x64': 1.2.4 + '@img/sharp-libvips-linux-arm': 1.2.4 + '@img/sharp-libvips-linux-arm64': 1.2.4 + '@img/sharp-libvips-linux-ppc64': 1.2.4 + '@img/sharp-libvips-linux-riscv64': 1.2.4 + '@img/sharp-libvips-linux-s390x': 1.2.4 + '@img/sharp-libvips-linux-x64': 1.2.4 + '@img/sharp-libvips-linuxmusl-arm64': 1.2.4 + '@img/sharp-libvips-linuxmusl-x64': 1.2.4 + '@img/sharp-linux-arm': 0.34.5 + '@img/sharp-linux-arm64': 0.34.5 + '@img/sharp-linux-ppc64': 0.34.5 + '@img/sharp-linux-riscv64': 0.34.5 + '@img/sharp-linux-s390x': 0.34.5 + '@img/sharp-linux-x64': 0.34.5 + '@img/sharp-linuxmusl-arm64': 0.34.5 + '@img/sharp-linuxmusl-x64': 0.34.5 + '@img/sharp-wasm32': 0.34.5 + '@img/sharp-win32-arm64': 0.34.5 + '@img/sharp-win32-ia32': 0.34.5 + '@img/sharp-win32-x64': 0.34.5 + optional: true + + source-map-js@1.2.1: {} + + styled-jsx@5.1.6(react@19.2.7): + dependencies: + client-only: 0.0.1 + react: 19.2.7 + + tslib@2.8.1: {} + + typescript@5.9.3: {} + + undici-types@6.21.0: {} diff --git a/web/pnpm-workspace.yaml b/web/pnpm-workspace.yaml new file mode 100644 index 0000000..520eb9a --- /dev/null +++ b/web/pnpm-workspace.yaml @@ -0,0 +1,2 @@ +allowBuilds: + sharp: true diff --git a/web/tsconfig.json b/web/tsconfig.json new file mode 100644 index 0000000..2ba14b2 --- /dev/null +++ b/web/tsconfig.json @@ -0,0 +1,21 @@ +{ + "compilerOptions": { + "target": "ES2022", + "lib": ["dom", "dom.iterable", "ES2022"], + "allowJs": false, + "skipLibCheck": true, + "strict": true, + "noEmit": true, + "esModuleInterop": true, + "module": "esnext", + "moduleResolution": "bundler", + "resolveJsonModule": true, + "isolatedModules": true, + "jsx": "preserve", + "incremental": true, + "plugins": [{ "name": "next" }], + "paths": { "@/*": ["./*"] } + }, + "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], + "exclude": ["node_modules"] +}