|
| 1 | +"""PostHog usage-event client for OpenAdapt packages. |
| 2 | +
|
| 3 | +This module captures lightweight, privacy-safe usage counters (for example: |
| 4 | +`agent_run`, `action_executed`, `demo_recorded`) to PostHog ingestion. |
| 5 | +""" |
| 6 | + |
| 7 | +from __future__ import annotations |
| 8 | + |
| 9 | +import json |
| 10 | +import os |
| 11 | +import platform |
| 12 | +import queue |
| 13 | +import threading |
| 14 | +import time |
| 15 | +import urllib.error |
| 16 | +import urllib.request |
| 17 | +import uuid |
| 18 | +from importlib import metadata |
| 19 | +from pathlib import Path |
| 20 | +from typing import Any |
| 21 | + |
| 22 | +from .client import is_ci_environment |
| 23 | +from .privacy import scrub_dict |
| 24 | + |
| 25 | +DEFAULT_POSTHOG_HOST = "https://us.i.posthog.com" |
| 26 | +DEFAULT_POSTHOG_PROJECT_API_KEY = "phc_935iWKc6O7u6DCp2eFAmK5WmCwv35QXMa6LulTJ3uqh" |
| 27 | +DISTINCT_ID_FILE = Path.home() / ".openadapt" / "telemetry_distinct_id" |
| 28 | +MAX_STRING_LEN = 256 |
| 29 | +QUEUE_MAXSIZE = 2048 |
| 30 | + |
| 31 | +_event_queue: queue.Queue[dict[str, Any]] | None = None |
| 32 | +_worker_started = False |
| 33 | +_worker_lock = threading.Lock() |
| 34 | + |
| 35 | + |
| 36 | +def _is_truthy(raw: str | None) -> bool: |
| 37 | + return str(raw or "").strip().lower() in {"1", "true", "yes", "on"} |
| 38 | + |
| 39 | + |
| 40 | +def _usage_enabled() -> bool: |
| 41 | + if _is_truthy(os.getenv("DO_NOT_TRACK")): |
| 42 | + return False |
| 43 | + |
| 44 | + explicit = os.getenv("OPENADAPT_TELEMETRY_ENABLED") |
| 45 | + if explicit is not None: |
| 46 | + return _is_truthy(explicit) |
| 47 | + |
| 48 | + if is_ci_environment() and not _is_truthy(os.getenv("OPENADAPT_TELEMETRY_IN_CI")): |
| 49 | + return False |
| 50 | + |
| 51 | + return True |
| 52 | + |
| 53 | + |
| 54 | +def _posthog_host() -> str: |
| 55 | + return os.getenv("OPENADAPT_POSTHOG_HOST", DEFAULT_POSTHOG_HOST).rstrip("/") |
| 56 | + |
| 57 | + |
| 58 | +def _posthog_project_api_key() -> str: |
| 59 | + return os.getenv("OPENADAPT_POSTHOG_PROJECT_API_KEY", DEFAULT_POSTHOG_PROJECT_API_KEY) |
| 60 | + |
| 61 | + |
| 62 | +def _get_distinct_id() -> str: |
| 63 | + env_id = os.getenv("OPENADAPT_TELEMETRY_DISTINCT_ID") |
| 64 | + if env_id: |
| 65 | + return env_id |
| 66 | + |
| 67 | + try: |
| 68 | + if DISTINCT_ID_FILE.exists(): |
| 69 | + existing = DISTINCT_ID_FILE.read_text(encoding="utf-8").strip() |
| 70 | + if existing: |
| 71 | + return existing |
| 72 | + DISTINCT_ID_FILE.parent.mkdir(parents=True, exist_ok=True) |
| 73 | + generated = str(uuid.uuid4()) |
| 74 | + DISTINCT_ID_FILE.write_text(generated, encoding="utf-8") |
| 75 | + return generated |
| 76 | + except OSError: |
| 77 | + return str(uuid.uuid4()) |
| 78 | + |
| 79 | + |
| 80 | +def _normalize_value(value: Any) -> Any: |
| 81 | + if value is None or isinstance(value, (int, float, bool)): |
| 82 | + return value |
| 83 | + return str(value)[:MAX_STRING_LEN] |
| 84 | + |
| 85 | + |
| 86 | +def _sanitize_properties(properties: dict[str, Any] | None) -> dict[str, Any]: |
| 87 | + if not properties: |
| 88 | + return {} |
| 89 | + normalized = {str(k): _normalize_value(v) for k, v in properties.items() if str(k).strip()} |
| 90 | + redacted = scrub_dict(normalized, deep=True, scrub_values=False) |
| 91 | + return {k: v for k, v in redacted.items() if v != "[REDACTED]"} |
| 92 | + |
| 93 | + |
| 94 | +def _package_version(package_name: str) -> str: |
| 95 | + try: |
| 96 | + return metadata.version(package_name) |
| 97 | + except metadata.PackageNotFoundError: |
| 98 | + return "unknown" |
| 99 | + |
| 100 | + |
| 101 | +def _base_properties(package_name: str) -> dict[str, Any]: |
| 102 | + return { |
| 103 | + "package": package_name, |
| 104 | + "version": _package_version(package_name), |
| 105 | + "python_version": platform.python_version(), |
| 106 | + "platform": platform.system().lower(), |
| 107 | + "timestamp": int(time.time()), |
| 108 | + } |
| 109 | + |
| 110 | + |
| 111 | +def _send_payload(payload: dict[str, Any]) -> None: |
| 112 | + timeout_seconds = float(os.getenv("OPENADAPT_TELEMETRY_TIMEOUT_SECONDS", "1.0")) |
| 113 | + req = urllib.request.Request( |
| 114 | + f"{_posthog_host()}/capture/", |
| 115 | + data=json.dumps(payload).encode("utf-8"), |
| 116 | + headers={ |
| 117 | + "Content-Type": "application/json", |
| 118 | + "User-Agent": "openadapt-telemetry-posthog/1", |
| 119 | + }, |
| 120 | + method="POST", |
| 121 | + ) |
| 122 | + try: |
| 123 | + with urllib.request.urlopen(req, timeout=timeout_seconds): |
| 124 | + return |
| 125 | + except (urllib.error.URLError, TimeoutError, OSError, ValueError): |
| 126 | + return |
| 127 | + |
| 128 | + |
| 129 | +def _worker_loop() -> None: |
| 130 | + assert _event_queue is not None |
| 131 | + while True: |
| 132 | + payload = _event_queue.get() |
| 133 | + _send_payload(payload) |
| 134 | + _event_queue.task_done() |
| 135 | + |
| 136 | + |
| 137 | +def _ensure_worker() -> queue.Queue[dict[str, Any]]: |
| 138 | + global _event_queue |
| 139 | + global _worker_started |
| 140 | + |
| 141 | + with _worker_lock: |
| 142 | + if _event_queue is None: |
| 143 | + _event_queue = queue.Queue(maxsize=QUEUE_MAXSIZE) |
| 144 | + if not _worker_started: |
| 145 | + thread = threading.Thread(target=_worker_loop, daemon=True, name="oa-posthog") |
| 146 | + thread.start() |
| 147 | + _worker_started = True |
| 148 | + return _event_queue |
| 149 | + |
| 150 | + |
| 151 | +def capture_event( |
| 152 | + event: str, |
| 153 | + properties: dict[str, Any] | None = None, |
| 154 | + package_name: str = "openadapt", |
| 155 | +) -> bool: |
| 156 | + """Queue a usage event for PostHog ingestion. |
| 157 | +
|
| 158 | + Returns True when queued; False when disabled or dropped. |
| 159 | + """ |
| 160 | + event_name = str(event or "").strip() |
| 161 | + if not event_name or not _usage_enabled(): |
| 162 | + return False |
| 163 | + |
| 164 | + payload = { |
| 165 | + "api_key": _posthog_project_api_key(), |
| 166 | + "event": event_name, |
| 167 | + "distinct_id": _get_distinct_id(), |
| 168 | + "properties": { |
| 169 | + **_base_properties(package_name), |
| 170 | + **_sanitize_properties(properties), |
| 171 | + }, |
| 172 | + } |
| 173 | + |
| 174 | + try: |
| 175 | + _ensure_worker().put_nowait(payload) |
| 176 | + return True |
| 177 | + except queue.Full: |
| 178 | + return False |
| 179 | + |
| 180 | + |
| 181 | +def capture_usage_event( |
| 182 | + event: str, |
| 183 | + properties: dict[str, Any] | None = None, |
| 184 | + package_name: str = "openadapt", |
| 185 | +) -> bool: |
| 186 | + """Alias for capture_event to make usage intent explicit.""" |
| 187 | + return capture_event(event=event, properties=properties, package_name=package_name) |
0 commit comments