Skip to content

Commit 153d3cf

Browse files
authored
Merge pull request #3 from OpenAdaptAI/fix/add-readme-badges
feat: make telemetry opt-out and enforce anonymized user IDs
2 parents 052cabf + 31d5d58 commit 153d3cf

File tree

7 files changed

+670
-86
lines changed

7 files changed

+670
-86
lines changed

README.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Unified telemetry and error tracking for OpenAdapt packages.
1616
- **Usage Counters (PostHog)**: Lightweight product usage events for adoption metrics
1717
- **Privacy-First Design**: Automatic PII scrubbing and path sanitization
1818
- **Configurable Opt-Out**: Respects `DO_NOT_TRACK` and custom environment variables
19-
- **CI/Dev Mode Detection**: Automatically tags internal usage for filtering
19+
- **Internal Usage Tagging**: Explicit flags + CI detection with optional git heuristic
2020
- **GlitchTip/Sentry Compatible**: Uses the Sentry SDK for maximum compatibility
2121

2222
## Installation
@@ -111,6 +111,7 @@ with TelemetrySpan("indexing", "build_faiss_index") as span:
111111
| `OPENADAPT_TELEMETRY_ENABLED` | `true` | Enable/disable telemetry |
112112
| `OPENADAPT_INTERNAL` | `false` | Tag as internal usage |
113113
| `OPENADAPT_DEV` | `false` | Development mode |
114+
| `OPENADAPT_INTERNAL_FROM_GIT` | `false` | Optional: tag as internal when running from a git checkout |
114115
| `OPENADAPT_TELEMETRY_DSN` | - | GlitchTip/Sentry DSN |
115116
| `OPENADAPT_POSTHOG_PROJECT_API_KEY` | embedded default | PostHog ingestion project token (`phc_...`) |
116117
| `OPENADAPT_POSTHOG_HOST` | `https://us.i.posthog.com` | PostHog ingestion host |
@@ -120,6 +121,7 @@ with TelemetrySpan("indexing", "build_faiss_index") as span:
120121
| `OPENADAPT_TELEMETRY_ENVIRONMENT` | `production` | Environment name |
121122
| `OPENADAPT_TELEMETRY_SAMPLE_RATE` | `1.0` | Error sampling rate (0.0-1.0) |
122123
| `OPENADAPT_TELEMETRY_TRACES_SAMPLE_RATE` | `0.01` | Performance sampling rate |
124+
| `OPENADAPT_TELEMETRY_ANON_SALT` | generated | Optional anonymization salt override (advanced use only) |
123125

124126
### Configuration File
125127

@@ -178,16 +180,19 @@ export OPENADAPT_TELEMETRY_ENABLED=false
178180
- File paths have usernames replaced with `<user>`
179181
- Sensitive fields (password, token, api_key, etc.) are redacted
180182
- Email addresses and phone numbers are scrubbed from messages
183+
- Top-level event messages/logentry strings are scrubbed
184+
- Tag keys are validated, sensitive/invalid keys are dropped, and values are scrubbed before upload
185+
- User IDs are HMAC-anonymized before upload (`anon:v2:<hash>`)
186+
- `send_default_pii` is enforced to `false` by the client
181187

182188
## Internal Usage Tagging
183189

184190
Internal/developer usage is automatically detected via:
185191

186192
1. `OPENADAPT_INTERNAL=true` environment variable
187193
2. `OPENADAPT_DEV=true` environment variable
188-
3. Running from source (not frozen executable)
189-
4. Git repository present in working directory
190-
5. CI environment detected (GitHub Actions, GitLab CI, etc.)
194+
3. CI environment detected (GitHub Actions, GitLab CI, etc.)
195+
4. Optional git repository heuristic when `OPENADAPT_INTERNAL_FROM_GIT=true`
191196

192197
Filter in GlitchTip:
193198
```

src/openadapt_telemetry/client.py

Lines changed: 62 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,17 @@
99
import os
1010
import platform
1111
import sys
12+
import warnings
1213
from pathlib import Path
13-
from typing import Any, Dict, Optional
14+
from typing import Any, Callable, Dict, Optional
1415

1516
import sentry_sdk
17+
from sentry_sdk.types import Event, Hint
1618

1719
from .config import TelemetryConfig, load_config
18-
from .privacy import create_before_send_filter
20+
from .privacy import anonymize_identifier, create_before_send_filter
21+
22+
BeforeSendFn = Callable[[Event, Hint], Optional[Event]]
1923

2024

2125
def is_running_from_executable() -> bool:
@@ -56,12 +60,11 @@ def is_ci_environment() -> bool:
5660
def is_internal_user() -> bool:
5761
"""Determine if current usage is from internal team.
5862
59-
Uses multiple heuristics to detect internal/developer usage:
63+
Uses multiple signals to detect internal/developer usage:
6064
1. Explicit OPENADAPT_INTERNAL environment variable
6165
2. OPENADAPT_DEV environment variable
62-
3. Not running from frozen executable
63-
4. Git repository present in current directory
64-
5. CI environment detected
66+
3. CI environment detected
67+
4. Optional git repository heuristic when OPENADAPT_INTERNAL_FROM_GIT=true
6568
6669
Returns:
6770
True if this appears to be internal usage.
@@ -74,21 +77,30 @@ def is_internal_user() -> bool:
7477
if os.getenv("OPENADAPT_DEV", "").lower() in ("true", "1", "yes"):
7578
return True
7679

77-
# Method 3: Not running from executable (indicates dev mode)
78-
if not is_running_from_executable():
79-
return True
80-
81-
# Method 4: Git repository present (development checkout)
82-
if Path(".git").exists() or Path("../.git").exists():
83-
return True
84-
85-
# Method 5: CI/CD environment
80+
# Method 3: CI/CD environment
8681
if is_ci_environment():
8782
return True
8883

84+
# Method 4: optional git heuristic
85+
if os.getenv("OPENADAPT_INTERNAL_FROM_GIT", "").lower() in ("true", "1", "yes"):
86+
if Path(".git").exists() or Path("../.git").exists():
87+
return True
88+
8989
return False
9090

9191

92+
def _compose_before_send(base: BeforeSendFn, extra: BeforeSendFn) -> BeforeSendFn:
93+
"""Compose custom before_send before final privacy filtering."""
94+
95+
def composed(event: Event, hint: Hint) -> Optional[Event]:
96+
modified = extra(event, hint)
97+
if modified is None:
98+
return None
99+
return base(modified, hint)
100+
101+
return composed
102+
103+
92104
class TelemetryClient:
93105
"""Unified telemetry client for all OpenAdapt packages.
94106
@@ -128,20 +140,13 @@ def reset_instance(cls) -> None:
128140
def _check_enabled(self) -> bool:
129141
"""Check if telemetry should be enabled.
130142
131-
Checks environment variables for opt-out signals.
143+
Uses merged config with defaults/env/file precedence.
132144
133145
Returns:
134146
True if telemetry should be enabled.
135147
"""
136-
# Universal opt-out (DO_NOT_TRACK standard)
137-
if os.getenv("DO_NOT_TRACK", "").lower() in ("1", "true"):
138-
return False
139-
140-
# Package-specific opt-out
141-
if os.getenv("OPENADAPT_TELEMETRY_ENABLED", "").lower() in ("false", "0", "no"):
142-
return False
143-
144-
return True
148+
self._config = load_config()
149+
return bool(self._config.enabled)
145150

146151
@property
147152
def enabled(self) -> bool:
@@ -187,10 +192,8 @@ def initialize(
187192
Returns:
188193
True if initialization succeeded, False if disabled or already initialized.
189194
"""
190-
if not self._enabled:
191-
return False
192-
193-
if self._initialized and not kwargs.get("force", False):
195+
force = bool(kwargs.pop("force", False))
196+
if self._initialized and not force:
194197
return True
195198

196199
# Load configuration
@@ -201,28 +204,49 @@ def initialize(
201204
self._config.dsn = dsn
202205
if environment:
203206
self._config.environment = environment
207+
self._enabled = bool(self._config.enabled)
208+
209+
if not self._enabled:
210+
return False
204211

205212
# Skip if no DSN configured
206213
if not self._config.dsn:
207214
return False
208215

209-
# Create privacy filter
210-
before_send = create_before_send_filter()
216+
# Always enforce privacy scrubber first; optional custom filter can run afterward.
217+
base_before_send = create_before_send_filter()
218+
custom_before_send = kwargs.pop("before_send", None)
219+
if custom_before_send is not None:
220+
if not callable(custom_before_send):
221+
raise TypeError("before_send must be callable")
222+
warnings.warn(
223+
"Custom before_send runs before OpenAdapt privacy filtering; final payload is always scrubbed.",
224+
stacklevel=2,
225+
)
226+
before_send = _compose_before_send(base_before_send, custom_before_send)
227+
else:
228+
before_send = base_before_send
229+
230+
if "send_default_pii" in kwargs:
231+
kwargs.pop("send_default_pii")
232+
warnings.warn(
233+
"Ignoring sentry init override for send_default_pii; OpenAdapt telemetry enforces send_default_pii=False.",
234+
stacklevel=2,
235+
)
211236

212237
# Initialize Sentry SDK
213238
sentry_kwargs = {
214239
"dsn": self._config.dsn,
215240
"environment": self._config.environment,
216241
"sample_rate": self._config.sample_rate,
217242
"traces_sample_rate": self._config.traces_sample_rate,
218-
"send_default_pii": self._config.send_default_pii,
243+
# Enforced for privacy safety across all callers/configs.
244+
"send_default_pii": False,
219245
"before_send": before_send,
220246
}
221247

222248
# Merge in any additional kwargs
223249
sentry_kwargs.update(kwargs)
224-
# Remove our internal kwargs
225-
sentry_kwargs.pop("force", None)
226250

227251
sentry_sdk.init(**sentry_kwargs)
228252

@@ -314,12 +338,13 @@ def set_user(
314338
Note: Only sets anonymous user ID. Never set email, name, or other PII.
315339
316340
Args:
317-
user_id: Anonymous user identifier.
318-
**kwargs: Additional user properties (id only recommended).
341+
user_id: User identifier to hash before sending.
342+
**kwargs: Ignored. Additional user fields are dropped.
319343
"""
320344
if not self._enabled or not self._initialized:
321345
return
322-
sentry_sdk.set_user({"id": user_id, **kwargs})
346+
_ = kwargs
347+
sentry_sdk.set_user({"id": anonymize_identifier(user_id)})
323348

324349
def set_tag(self, key: str, value: str) -> None:
325350
"""Set a custom tag for all subsequent events.

src/openadapt_telemetry/config.py

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
import json
1313
import os
14+
import secrets
15+
import warnings
1416
from dataclasses import dataclass, field
1517
from pathlib import Path
1618
from typing import Any, Optional
@@ -27,11 +29,13 @@
2729
"performance_tracking": True,
2830
"feature_usage": True,
2931
"send_default_pii": False,
32+
"anon_salt": None,
3033
}
3134

3235
# Config file location
3336
CONFIG_DIR = Path.home() / ".config" / "openadapt"
3437
CONFIG_FILE = CONFIG_DIR / "telemetry.json"
38+
_INVALID_ANON_SALT_WARNED = False
3539

3640

3741
@dataclass
@@ -48,6 +52,7 @@ class TelemetryConfig:
4852
performance_tracking: bool = True
4953
feature_usage: bool = True
5054
send_default_pii: bool = False
55+
anon_salt: Optional[str] = None
5156

5257
_loaded: bool = field(default=False, repr=False)
5358

@@ -73,7 +78,8 @@ def _load_config_file() -> dict[str, Any]:
7378

7479
try:
7580
with open(CONFIG_FILE) as f:
76-
return json.load(f)
81+
data = json.load(f)
82+
return data if isinstance(data, dict) else {}
7783
except (json.JSONDecodeError, OSError):
7884
return {}
7985

@@ -82,15 +88,15 @@ def _get_env_config() -> dict[str, Any]:
8288
"""Get configuration from environment variables."""
8389
config: dict[str, Any] = {}
8490

85-
# Universal opt-out (DO_NOT_TRACK standard)
86-
if os.getenv("DO_NOT_TRACK", "").lower() in ("1", "true"):
87-
config["enabled"] = False
88-
89-
# Package-specific opt-out
91+
# Package-specific toggle
9092
enabled_env = os.getenv("OPENADAPT_TELEMETRY_ENABLED", "")
9193
if enabled_env:
9294
config["enabled"] = _parse_bool(enabled_env)
9395

96+
# Universal opt-out (DO_NOT_TRACK standard) always wins.
97+
if os.getenv("DO_NOT_TRACK", "").lower() in ("1", "true"):
98+
config["enabled"] = False
99+
94100
# Internal/developer flags
95101
if os.getenv("OPENADAPT_INTERNAL", "").lower() in ("true", "1", "yes"):
96102
config["internal"] = True
@@ -122,9 +128,72 @@ def _get_env_config() -> dict[str, Any]:
122128
except ValueError:
123129
pass
124130

131+
# Optional override for deterministic anonymization in controlled environments.
132+
anon_salt = os.getenv("OPENADAPT_TELEMETRY_ANON_SALT")
133+
if anon_salt:
134+
if _is_valid_anon_salt(anon_salt):
135+
config["anon_salt"] = anon_salt.strip()
136+
else:
137+
_warn_invalid_anon_salt_once()
138+
125139
return config
126140

127141

142+
def _is_valid_anon_salt(value: Any) -> bool:
143+
"""Check whether a salt value is valid for HMAC anonymization."""
144+
return isinstance(value, str) and len(value.strip()) >= 32
145+
146+
147+
def _warn_invalid_anon_salt_once() -> None:
148+
"""Warn once per process when OPENADAPT_TELEMETRY_ANON_SALT is invalid."""
149+
global _INVALID_ANON_SALT_WARNED
150+
if _INVALID_ANON_SALT_WARNED:
151+
return
152+
warnings.warn(
153+
"Ignoring invalid OPENADAPT_TELEMETRY_ANON_SALT; must be >= 32 chars.",
154+
stacklevel=2,
155+
)
156+
_INVALID_ANON_SALT_WARNED = True
157+
158+
159+
def _generate_anon_salt() -> str:
160+
"""Generate a high-entropy random salt."""
161+
return secrets.token_hex(32)
162+
163+
164+
def get_or_create_anon_salt() -> str:
165+
"""Get anonymization salt from env/config, creating one if missing.
166+
167+
Priority:
168+
1. OPENADAPT_TELEMETRY_ANON_SALT (if valid)
169+
2. telemetry config file `anon_salt` (if valid)
170+
3. generated and persisted random salt
171+
"""
172+
env_salt = os.getenv("OPENADAPT_TELEMETRY_ANON_SALT")
173+
if env_salt:
174+
if _is_valid_anon_salt(env_salt):
175+
return env_salt.strip()
176+
_warn_invalid_anon_salt_once()
177+
178+
config_data = _load_config_file()
179+
file_salt = config_data.get("anon_salt")
180+
if _is_valid_anon_salt(file_salt):
181+
return str(file_salt).strip()
182+
183+
generated = _generate_anon_salt()
184+
config_data["anon_salt"] = generated
185+
try:
186+
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
187+
with open(CONFIG_FILE, "w") as f:
188+
json.dump(config_data, f, indent=2)
189+
except OSError:
190+
warnings.warn(
191+
"Failed to persist telemetry anonymization salt; using ephemeral salt for this process.",
192+
stacklevel=2,
193+
)
194+
return generated
195+
196+
128197
def load_config() -> TelemetryConfig:
129198
"""Load telemetry configuration from all sources.
130199
@@ -148,7 +217,7 @@ def load_config() -> TelemetryConfig:
148217
merged.update(env_config)
149218

150219
# Remove None values for fields that should use defaults
151-
config_dict = {k: v for k, v in merged.items() if v is not None or k == "dsn"}
220+
config_dict = {k: v for k, v in merged.items() if v is not None or k in {"dsn", "anon_salt"}}
152221

153222
return TelemetryConfig(**config_dict, _loaded=True)
154223

@@ -172,6 +241,7 @@ def save_config(config: TelemetryConfig) -> None:
172241
"performance_tracking": config.performance_tracking,
173242
"feature_usage": config.feature_usage,
174243
"send_default_pii": config.send_default_pii,
244+
"anon_salt": config.anon_salt,
175245
}
176246

177247
with open(CONFIG_FILE, "w") as f:

0 commit comments

Comments
 (0)