Skip to content

Commit a865432

Browse files
authored
Merge pull request #4 from OpenAdaptAI/feat/telemetry-hardening-phase2
feat: harden telemetry privacy filters and anon ID policy
2 parents a8f2a57 + 216f6d0 commit a865432

9 files changed

Lines changed: 502 additions & 86 deletions

File tree

README.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ Unified telemetry and error tracking for OpenAdapt packages.
1515
- **Unified Error Tracking**: Consistent error reporting across all OpenAdapt packages
1616
- **Privacy-First Design**: Automatic PII scrubbing and path sanitization
1717
- **Configurable Opt-Out**: Respects `DO_NOT_TRACK` and custom environment variables
18-
- **CI/Dev Mode Detection**: Automatically tags internal usage for filtering
18+
- **Internal Usage Tagging**: Explicit flags + CI detection with optional git heuristic
1919
- **GlitchTip/Sentry Compatible**: Uses the Sentry SDK for maximum compatibility
2020

2121
## Installation
@@ -98,10 +98,12 @@ with TelemetrySpan("indexing", "build_faiss_index") as span:
9898
| `OPENADAPT_TELEMETRY_ENABLED` | `true` | Enable/disable telemetry |
9999
| `OPENADAPT_INTERNAL` | `false` | Tag as internal usage |
100100
| `OPENADAPT_DEV` | `false` | Development mode |
101+
| `OPENADAPT_INTERNAL_FROM_GIT` | `false` | Optional: tag as internal when running from a git checkout |
101102
| `OPENADAPT_TELEMETRY_DSN` | - | GlitchTip/Sentry DSN |
102103
| `OPENADAPT_TELEMETRY_ENVIRONMENT` | `production` | Environment name |
103104
| `OPENADAPT_TELEMETRY_SAMPLE_RATE` | `1.0` | Error sampling rate (0.0-1.0) |
104105
| `OPENADAPT_TELEMETRY_TRACES_SAMPLE_RATE` | `0.01` | Performance sampling rate |
106+
| `OPENADAPT_TELEMETRY_ANON_SALT` | generated | Optional anonymization salt override (advanced use only) |
105107

106108
### Configuration File
107109

@@ -160,7 +162,9 @@ export OPENADAPT_TELEMETRY_ENABLED=false
160162
- File paths have usernames replaced with `<user>`
161163
- Sensitive fields (password, token, api_key, etc.) are redacted
162164
- Email addresses and phone numbers are scrubbed from messages
163-
- User IDs are hashed before upload (`anon:<hash>`)
165+
- Top-level event messages/logentry strings are scrubbed
166+
- Tag keys are validated, sensitive/invalid keys are dropped, and values are scrubbed before upload
167+
- User IDs are HMAC-anonymized before upload (`anon:v2:<hash>`)
164168
- `send_default_pii` is enforced to `false` by the client
165169

166170
## Internal Usage Tagging
@@ -169,8 +173,8 @@ Internal/developer usage is automatically detected via:
169173

170174
1. `OPENADAPT_INTERNAL=true` environment variable
171175
2. `OPENADAPT_DEV=true` environment variable
172-
3. Git repository present in working directory
173-
4. CI environment detected (GitHub Actions, GitLab CI, etc.)
176+
3. CI environment detected (GitHub Actions, GitLab CI, etc.)
177+
4. Optional git repository heuristic when `OPENADAPT_INTERNAL_FROM_GIT=true`
174178

175179
Filter in GlitchTip:
176180
```

src/openadapt_telemetry/client.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,11 @@ def is_ci_environment() -> bool:
6060
def is_internal_user() -> bool:
6161
"""Determine if current usage is from internal team.
6262
63-
Uses multiple heuristics to detect internal/developer usage:
63+
Uses multiple signals to detect internal/developer usage:
6464
1. Explicit OPENADAPT_INTERNAL environment variable
6565
2. OPENADAPT_DEV environment variable
66-
3. Git repository present in current directory
67-
4. CI environment detected
66+
3. CI environment detected
67+
4. Optional git repository heuristic when OPENADAPT_INTERNAL_FROM_GIT=true
6868
6969
Returns:
7070
True if this appears to be internal usage.
@@ -77,25 +77,26 @@ def is_internal_user() -> bool:
7777
if os.getenv("OPENADAPT_DEV", "").lower() in ("true", "1", "yes"):
7878
return True
7979

80-
# Method 3: Git repository present (development checkout)
81-
if Path(".git").exists() or Path("../.git").exists():
82-
return True
83-
84-
# Method 4: CI/CD environment
80+
# Method 3: CI/CD environment
8581
if is_ci_environment():
8682
return True
8783

84+
# Method 4: optional git heuristic
85+
if os.getenv("OPENADAPT_INTERNAL_FROM_GIT", "").lower() in ("true", "1", "yes"):
86+
if Path(".git").exists() or Path("../.git").exists():
87+
return True
88+
8889
return False
8990

9091

9192
def _compose_before_send(base: BeforeSendFn, extra: BeforeSendFn) -> BeforeSendFn:
92-
"""Compose custom before_send after privacy filtering."""
93+
"""Compose custom before_send before final privacy filtering."""
9394

9495
def composed(event: Event, hint: Hint) -> Optional[Event]:
95-
sanitized = base(event, hint)
96-
if sanitized is None:
96+
modified = extra(event, hint)
97+
if modified is None:
9798
return None
98-
return extra(sanitized, hint)
99+
return base(modified, hint)
99100

100101
return composed
101102

@@ -219,7 +220,7 @@ def initialize(
219220
if not callable(custom_before_send):
220221
raise TypeError("before_send must be callable")
221222
warnings.warn(
222-
"Custom before_send is composed after OpenAdapt privacy filtering and cannot bypass scrubbing.",
223+
"Custom before_send runs before OpenAdapt privacy filtering; final payload is always scrubbed.",
223224
stacklevel=2,
224225
)
225226
before_send = _compose_before_send(base_before_send, custom_before_send)

src/openadapt_telemetry/config.py

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
import json
1313
import os
14+
import secrets
15+
import warnings
1416
from dataclasses import dataclass, field
1517
from pathlib import Path
1618
from typing import Any, Optional
@@ -27,11 +29,13 @@
2729
"performance_tracking": True,
2830
"feature_usage": True,
2931
"send_default_pii": False,
32+
"anon_salt": None,
3033
}
3134

3235
# Config file location
3336
CONFIG_DIR = Path.home() / ".config" / "openadapt"
3437
CONFIG_FILE = CONFIG_DIR / "telemetry.json"
38+
_INVALID_ANON_SALT_WARNED = False
3539

3640

3741
@dataclass
@@ -48,6 +52,7 @@ class TelemetryConfig:
4852
performance_tracking: bool = True
4953
feature_usage: bool = True
5054
send_default_pii: bool = False
55+
anon_salt: Optional[str] = None
5156

5257
_loaded: bool = field(default=False, repr=False)
5358

@@ -73,7 +78,8 @@ def _load_config_file() -> dict[str, Any]:
7378

7479
try:
7580
with open(CONFIG_FILE) as f:
76-
return json.load(f)
81+
data = json.load(f)
82+
return data if isinstance(data, dict) else {}
7783
except (json.JSONDecodeError, OSError):
7884
return {}
7985

@@ -122,9 +128,72 @@ def _get_env_config() -> dict[str, Any]:
122128
except ValueError:
123129
pass
124130

131+
# Optional override for deterministic anonymization in controlled environments.
132+
anon_salt = os.getenv("OPENADAPT_TELEMETRY_ANON_SALT")
133+
if anon_salt:
134+
if _is_valid_anon_salt(anon_salt):
135+
config["anon_salt"] = anon_salt.strip()
136+
else:
137+
_warn_invalid_anon_salt_once()
138+
125139
return config
126140

127141

142+
def _is_valid_anon_salt(value: Any) -> bool:
143+
"""Check whether a salt value is valid for HMAC anonymization."""
144+
return isinstance(value, str) and len(value.strip()) >= 32
145+
146+
147+
def _warn_invalid_anon_salt_once() -> None:
148+
"""Warn once per process when OPENADAPT_TELEMETRY_ANON_SALT is invalid."""
149+
global _INVALID_ANON_SALT_WARNED
150+
if _INVALID_ANON_SALT_WARNED:
151+
return
152+
warnings.warn(
153+
"Ignoring invalid OPENADAPT_TELEMETRY_ANON_SALT; must be >= 32 chars.",
154+
stacklevel=2,
155+
)
156+
_INVALID_ANON_SALT_WARNED = True
157+
158+
159+
def _generate_anon_salt() -> str:
160+
"""Generate a high-entropy random salt."""
161+
return secrets.token_hex(32)
162+
163+
164+
def get_or_create_anon_salt() -> str:
165+
"""Get anonymization salt from env/config, creating one if missing.
166+
167+
Priority:
168+
1. OPENADAPT_TELEMETRY_ANON_SALT (if valid)
169+
2. telemetry config file `anon_salt` (if valid)
170+
3. generated and persisted random salt
171+
"""
172+
env_salt = os.getenv("OPENADAPT_TELEMETRY_ANON_SALT")
173+
if env_salt:
174+
if _is_valid_anon_salt(env_salt):
175+
return env_salt.strip()
176+
_warn_invalid_anon_salt_once()
177+
178+
config_data = _load_config_file()
179+
file_salt = config_data.get("anon_salt")
180+
if _is_valid_anon_salt(file_salt):
181+
return str(file_salt).strip()
182+
183+
generated = _generate_anon_salt()
184+
config_data["anon_salt"] = generated
185+
try:
186+
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
187+
with open(CONFIG_FILE, "w") as f:
188+
json.dump(config_data, f, indent=2)
189+
except OSError:
190+
warnings.warn(
191+
"Failed to persist telemetry anonymization salt; using ephemeral salt for this process.",
192+
stacklevel=2,
193+
)
194+
return generated
195+
196+
128197
def load_config() -> TelemetryConfig:
129198
"""Load telemetry configuration from all sources.
130199
@@ -148,7 +217,7 @@ def load_config() -> TelemetryConfig:
148217
merged.update(env_config)
149218

150219
# Remove None values for fields that should use defaults
151-
config_dict = {k: v for k, v in merged.items() if v is not None or k == "dsn"}
220+
config_dict = {k: v for k, v in merged.items() if v is not None or k in {"dsn", "anon_salt"}}
152221

153222
return TelemetryConfig(**config_dict, _loaded=True)
154223

@@ -172,6 +241,7 @@ def save_config(config: TelemetryConfig) -> None:
172241
"performance_tracking": config.performance_tracking,
173242
"feature_usage": config.feature_usage,
174243
"send_default_pii": config.send_default_pii,
244+
"anon_salt": config.anon_salt,
175245
}
176246

177247
with open(CONFIG_FILE, "w") as f:

src/openadapt_telemetry/decorators.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import functools
1212
import time
13-
from typing import Any, Callable, Optional, TypeVar, Union
13+
from typing import Any, Callable, Optional, TypeVar
1414

1515
import sentry_sdk
1616

@@ -57,7 +57,7 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
5757
result = func(*args, **kwargs)
5858
transaction.set_status("ok")
5959
return result
60-
except Exception as e:
60+
except Exception:
6161
transaction.set_status("internal_error")
6262
raise
6363
finally:

0 commit comments

Comments
 (0)