Skip to content

Commit 2d2063f

Browse files
fix(analytics): distinguish Signal from WhatsApp in platform detection
Signal deliberately uses 'WhatsApp' User-Agent to bypass rate limits on sites like Twitter. This caused Signal link previews to be tracked as WhatsApp in Plausible analytics. Detection logic: - Real WhatsApp: WhatsApp/X.Y.Z (3+ part version), e.g., WhatsApp/2.23.18.78 i - Signal: Simple WhatsApp or WhatsApp/2 (no full version) Reference: signalapp/Signal-Android#10060 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 79d9352 commit 2d2063f

File tree

2 files changed

+104
-10
lines changed

2 files changed

+104
-10
lines changed

api/analytics.py

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import asyncio
1010
import logging
11+
import re
1112

1213
import httpx
1314
from fastapi import Request
@@ -19,6 +20,7 @@
1920
DOMAIN = "pyplots.ai"
2021

2122
# All platforms from nginx.conf bot detection (27 total)
23+
# Order matters for some patterns - more specific patterns checked first via _detect_whatsapp_or_signal()
2224
PLATFORM_PATTERNS = {
2325
# Social Media
2426
"twitter": "twitterbot",
@@ -28,12 +30,10 @@
2830
"reddit": "redditbot",
2931
"tumblr": "tumblr",
3032
"mastodon": "mastodon",
31-
# Messaging Apps
33+
# Messaging Apps (whatsapp handled specially - see _detect_whatsapp_or_signal)
3234
"slack": "slackbot",
3335
"discord": "discordbot",
3436
"telegram": "telegrambot",
35-
"whatsapp": "whatsapp",
36-
"signal": "signal",
3737
"viber": "viber",
3838
"skype": "skypeuripreview",
3939
"teams": "microsoft teams",
@@ -53,6 +53,34 @@
5353
"showyoubot": "showyoubot",
5454
}
5555

56+
# Real WhatsApp User-Agent has version + platform suffix: "WhatsApp/2.23.18.78 i" or "WhatsApp/2.21.22.23 A"
57+
# Signal uses WhatsApp User-Agent to bypass rate limits but sends simpler format: "WhatsApp" or "WhatsApp/2"
58+
# Pattern matches: WhatsApp/X.Y.Z (at least 3-part version) followed by platform indicator (i/A/N/W or more text)
59+
# See: https://github.com/signalapp/Signal-Android/issues/10060
60+
REAL_WHATSAPP_PATTERN = re.compile(r"whatsapp/\d+\.\d+\.\d+", re.IGNORECASE)
61+
62+
63+
def _detect_whatsapp_or_signal(user_agent: str) -> str | None:
64+
"""Distinguish WhatsApp from Signal based on User-Agent format.
65+
66+
Signal deliberately uses 'WhatsApp' User-Agent to bypass rate limits on sites like Twitter.
67+
But real WhatsApp includes full version: 'WhatsApp/2.23.18.78 i' (iOS) or 'WhatsApp/2.21.22.23 A' (Android).
68+
Signal sends simpler format: 'WhatsApp' or 'WhatsApp/2'.
69+
70+
Returns:
71+
'whatsapp' for real WhatsApp, 'signal' for Signal-pretending-to-be-WhatsApp, None if neither.
72+
"""
73+
ua_lower = user_agent.lower()
74+
if "whatsapp" not in ua_lower:
75+
return None
76+
77+
# Real WhatsApp has 3+ part version (e.g., WhatsApp/2.23.18.78)
78+
if REAL_WHATSAPP_PATTERN.search(user_agent):
79+
return "whatsapp"
80+
81+
# Has "whatsapp" but no full version - likely Signal
82+
return "signal"
83+
5684

5785
def detect_platform(user_agent: str) -> str:
5886
"""Detect platform from User-Agent string.
@@ -61,8 +89,13 @@ def detect_platform(user_agent: str) -> str:
6189
user_agent: The User-Agent header value
6290
6391
Returns:
64-
Platform name (e.g., 'twitter', 'whatsapp') or 'unknown'
92+
Platform name (e.g., 'twitter', 'whatsapp', 'signal') or 'unknown'
6593
"""
94+
# Special handling for WhatsApp vs Signal (Signal uses WhatsApp User-Agent)
95+
whatsapp_or_signal = _detect_whatsapp_or_signal(user_agent)
96+
if whatsapp_or_signal:
97+
return whatsapp_or_signal
98+
6699
ua_lower = user_agent.lower()
67100
for platform, pattern in PLATFORM_PATTERNS.items():
68101
if pattern in ua_lower:

tests/unit/api/test_analytics.py

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import pytest
66

7-
from api.analytics import PLATFORM_PATTERNS, detect_platform, track_og_image
7+
from api.analytics import PLATFORM_PATTERNS, _detect_whatsapp_or_signal, detect_platform, track_og_image
88

99

1010
class TestDetectPlatform:
@@ -14,9 +14,29 @@ def test_detects_twitter(self) -> None:
1414
"""Should detect Twitter bot."""
1515
assert detect_platform("Twitterbot/1.0") == "twitter"
1616

17-
def test_detects_whatsapp(self) -> None:
18-
"""Should detect WhatsApp."""
19-
assert detect_platform("WhatsApp/2.21.4.22") == "whatsapp"
17+
def test_detects_whatsapp_ios(self) -> None:
18+
"""Should detect real WhatsApp iOS."""
19+
assert detect_platform("WhatsApp/2.23.18.78 i") == "whatsapp"
20+
21+
def test_detects_whatsapp_android(self) -> None:
22+
"""Should detect real WhatsApp Android."""
23+
assert detect_platform("WhatsApp/2.21.22.23 A") == "whatsapp"
24+
25+
def test_detects_whatsapp_desktop(self) -> None:
26+
"""Should detect real WhatsApp Desktop."""
27+
assert detect_platform("WhatsApp/2.2336.9 N") == "whatsapp"
28+
29+
def test_detects_signal_as_fake_whatsapp(self) -> None:
30+
"""Should detect Signal (which uses fake WhatsApp User-Agent).
31+
32+
Signal deliberately uses 'WhatsApp' User-Agent to bypass rate limits,
33+
but without full version number like real WhatsApp.
34+
See: https://github.com/signalapp/Signal-Android/issues/10060
35+
"""
36+
# Signal sends simple "WhatsApp" or "WhatsApp/2" without full version
37+
assert detect_platform("WhatsApp") == "signal"
38+
assert detect_platform("WhatsApp/2") == "signal"
39+
assert detect_platform("WhatsApp/2.1") == "signal" # Only 2-part version
2040

2141
def test_detects_facebook(self) -> None:
2242
"""Should detect Facebook."""
@@ -60,8 +80,49 @@ def test_case_insensitive(self) -> None:
6080
assert detect_platform("twitterbot/1.0") == "twitter"
6181

6282
def test_all_platforms_have_patterns(self) -> None:
63-
"""Should have 27 platform patterns defined."""
64-
assert len(PLATFORM_PATTERNS) == 27
83+
"""Should have 25 platform patterns in dict (whatsapp/signal handled separately)."""
84+
# 27 total platforms: 25 in PLATFORM_PATTERNS + whatsapp + signal (special handling)
85+
assert len(PLATFORM_PATTERNS) == 25
86+
87+
88+
class TestWhatsAppSignalDetection:
89+
"""Tests for WhatsApp vs Signal detection logic."""
90+
91+
def test_real_whatsapp_ios(self) -> None:
92+
"""Real WhatsApp iOS should return 'whatsapp'."""
93+
assert _detect_whatsapp_or_signal("WhatsApp/2.23.18.78 i") == "whatsapp"
94+
95+
def test_real_whatsapp_android(self) -> None:
96+
"""Real WhatsApp Android should return 'whatsapp'."""
97+
assert _detect_whatsapp_or_signal("WhatsApp/2.21.22.23 A") == "whatsapp"
98+
99+
def test_real_whatsapp_cfnetwork(self) -> None:
100+
"""Real WhatsApp with CFNetwork should return 'whatsapp'."""
101+
assert _detect_whatsapp_or_signal("WhatsApp/2.18.31.32 CFNetwork/894 Darwin/17.4.0") == "whatsapp"
102+
103+
def test_signal_simple(self) -> None:
104+
"""Signal's simple WhatsApp UA should return 'signal'."""
105+
assert _detect_whatsapp_or_signal("WhatsApp") == "signal"
106+
107+
def test_signal_with_major_version(self) -> None:
108+
"""Signal's WhatsApp/2 should return 'signal'."""
109+
assert _detect_whatsapp_or_signal("WhatsApp/2") == "signal"
110+
111+
def test_signal_with_two_part_version(self) -> None:
112+
"""Signal's WhatsApp/2.1 (only 2 parts) should return 'signal'."""
113+
assert _detect_whatsapp_or_signal("WhatsApp/2.1") == "signal"
114+
115+
def test_non_whatsapp_returns_none(self) -> None:
116+
"""Non-WhatsApp User-Agent should return None."""
117+
assert _detect_whatsapp_or_signal("Twitterbot/1.0") is None
118+
assert _detect_whatsapp_or_signal("Mozilla/5.0") is None
119+
assert _detect_whatsapp_or_signal("") is None
120+
121+
def test_case_insensitive(self) -> None:
122+
"""Should handle case-insensitive matching."""
123+
assert _detect_whatsapp_or_signal("WHATSAPP/2.23.18.78") == "whatsapp"
124+
assert _detect_whatsapp_or_signal("whatsapp/2.23.18.78") == "whatsapp"
125+
assert _detect_whatsapp_or_signal("WHATSAPP") == "signal"
65126

66127

67128
class TestTrackOgImage:

0 commit comments

Comments
 (0)