Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,22 @@ DATABASE_URL=postgres://user:password@localhost:5432/boost_dashboard
# ==============================================================================
# Reddit (reddit_activity_tracker)
# ==============================================================================
# Register a "script" app at https://www.reddit.com/prefs/apps
# Register a "script" app at https://www.reddit.com/prefs/apps (preferred):
# REDDIT_CLIENT_ID=your_client_id
# REDDIT_CLIENT_SECRET=your_client_secret
# REDDIT_USER_AGENT=r_cpp_scraper/1.0 by u/yourusername
#
# Optional: minimum seconds between API requests (default 1.0, ~60 req/min)
# Alternative auth (when client credentials are unavailable):
# REDDIT_BEARER_TOKEN=token_v2_cookie_value (~24h)
# REDDIT_SESSION_COOKIE=reddit_session_cookie (~180d; auto-mints bearer)
# REDDIT_CSRF_TOKEN=csrf_token (optional; required if session mint fails)
#
# Optional rate limiting and discovery (defaults shown):
# REQUEST_INTERVAL=1.0
# RATE_LIMIT_LOW_WATERMARK=2.0
#
# First run when DB is empty: scrape this many days back (default 30)
# REDDIT_DEFAULT_LOOKBACK_DAYS=30

# ==============================================================================
# YouTube (cppa_youtube_script_tracker)
Expand Down
6 changes: 6 additions & 0 deletions config/boost_collector_schedule.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,9 @@ groups:
tasks:
- command: run_boost_mailing_list_tracker
schedule: daily

reddit:
default_time: "17:00"
tasks:
- command: run_reddit_activity_tracker
schedule: daily
6 changes: 6 additions & 0 deletions config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,8 +530,14 @@ def _slack_team_scope_from_env():
REDDIT_CLIENT_ID = (env("REDDIT_CLIENT_ID", default="") or "").strip()
REDDIT_CLIENT_SECRET = (env("REDDIT_CLIENT_SECRET", default="") or "").strip()
REDDIT_USER_AGENT = (env("REDDIT_USER_AGENT", default="") or "").strip()
REDDIT_BEARER_TOKEN = (env("REDDIT_BEARER_TOKEN", default="") or "").strip()
REDDIT_SESSION_COOKIE = (env("REDDIT_SESSION_COOKIE", default="") or "").strip()
REDDIT_CSRF_TOKEN = (env("REDDIT_CSRF_TOKEN", default="") or "").strip() or None
# Minimum seconds between API requests (default 1.0, ~60 req/min). Env: REQUEST_INTERVAL.
REDDIT_REQUEST_INTERVAL = env.float("REQUEST_INTERVAL", default=1.0)
# Pause when X-Ratelimit-Remaining drops below this value. Env: RATE_LIMIT_LOW_WATERMARK.
REDDIT_RATE_LIMIT_LOW_WATERMARK = env.float("RATE_LIMIT_LOW_WATERMARK", default=2.0)
REDDIT_DEFAULT_LOOKBACK_DAYS = env.int("REDDIT_DEFAULT_LOOKBACK_DAYS", default=30)

# WG21 Paper Tracker Configuration
WG21_GITHUB_DISPATCH_ENABLED = env.bool("WG21_GITHUB_DISPATCH_ENABLED", default=False)
Expand Down
2 changes: 1 addition & 1 deletion core/_version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# file generated by setuptools-scm; do not edit
version = "0.1.1.dev579+g8b4cba29b.d20260609"
version = "0.1.1.dev584+g9efa67002.d20260612"
15 changes: 15 additions & 0 deletions cppa_user_tracker/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
GitHubAccount,
Identity,
MailingListProfile,
RedditUser,
SlackUser,
TempProfileIdentityRelation,
TmpIdentity,
Expand Down Expand Up @@ -99,3 +100,17 @@ class WG21PaperAuthorProfileAdmin(ModelAdmin):
list_display = ("id", "identity", "display_name", "updated_at")
search_fields = ("display_name",)
raw_id_fields = ("identity",)


@admin.register(RedditUser)
class RedditUserAdmin(ModelAdmin):
list_display = (
"id",
"identity",
"reddit_user_id",
"username",
"display_name",
"updated_at",
)
search_fields = ("reddit_user_id", "username", "display_name")
raw_id_fields = ("identity",)
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
("cppa_user_tracker", "0008_wg21paperauthorprofile_author_alias"),
]

operations = [
migrations.CreateModel(
name="RedditUser",
fields=[
(
"baseprofile_ptr",
models.OneToOneField(
auto_created=True,
on_delete=django.db.models.deletion.CASCADE,
parent_link=True,
primary_key=True,
serialize=False,
to="cppa_user_tracker.baseprofile",
),
),
(
"reddit_user_id",
models.CharField(
blank=True,
db_index=True,
max_length=64,
null=True,
unique=True,
),
),
(
"username",
models.CharField(db_index=True, max_length=255, unique=True),
),
(
"display_name",
models.CharField(blank=True, db_index=True, max_length=255),
),
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
],
bases=("cppa_user_tracker.baseprofile",),
),
migrations.AlterField(
model_name="baseprofile",
name="type",
field=models.CharField(
choices=[
("github", "GitHub"),
("slack", "Slack"),
("mailing_list", "Mailing list"),
("wg21", "WG21"),
("discord", "Discord"),
("youtube", "YouTube"),
("reddit", "Reddit"),
],
db_index=True,
max_length=20,
),
),
]
21 changes: 21 additions & 0 deletions cppa_user_tracker/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class ProfileType(models.TextChoices):
WG21 = "wg21", "WG21" # pyright: ignore[reportCallIssue]
DISCORD = "discord", "Discord" # pyright: ignore[reportCallIssue]
YOUTUBE = "youtube", "YouTube" # pyright: ignore[reportCallIssue]
REDDIT = "reddit", "Reddit" # pyright: ignore[reportCallIssue]


class GitHubAccountType(models.TextChoices):
Expand Down Expand Up @@ -195,6 +196,26 @@ def save(self, *args, **kwargs):
updated_at = models.DateTimeField(auto_now=True)


class RedditUser(BaseProfile):
"""Profile for Reddit; extends BaseProfile."""

def save(self, *args, **kwargs):
self.type = ProfileType.REDDIT
super().save(*args, **kwargs)

reddit_user_id = models.CharField(
max_length=64,
unique=True,
db_index=True,
null=True,
blank=True,
)
username = models.CharField(max_length=255, unique=True, db_index=True)
display_name = models.CharField(max_length=255, db_index=True, blank=True)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)


class YoutubeSpeaker(BaseProfile):
"""YouTube speaker profile.

Expand Down
98 changes: 98 additions & 0 deletions cppa_user_tracker/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,13 @@
MailingListProfile,
SlackUser,
DiscordProfile,
RedditUser,
WG21PaperAuthorProfile,
YoutubeSpeaker,
)

_REDDIT_DELETED_AUTHORS = frozenset({"", "[deleted]", "AutoModerator"})


# --- Identity ---
def create_identity(
Expand Down Expand Up @@ -451,3 +454,98 @@ def get_or_create_youtube_speaker(
speaker.display_name = display_name_val
speaker.save(update_fields=["display_name", "updated_at"])
return speaker, created


class RedditClientProtocol(Protocol):
"""Protocol for a Reddit API client used by get_or_create_reddit_user."""

def fetch_user_about(self, username: str) -> dict[str, Any] | None: ...


def _normalize_reddit_username(author: str | None) -> str | None:
username = (author or "").strip()
if not username or username in _REDDIT_DELETED_AUTHORS:
return None
return username


def _display_name_from_reddit_profile(
profile: dict[str, Any] | None, username: str
) -> str:
if not profile:
return username
subreddit = profile.get("subreddit") or {}
if isinstance(subreddit, dict):
title = (subreddit.get("title") or "").strip()
if title:
return title
return username


@transaction.atomic
def get_or_create_reddit_user(
username: str,
*,
reddit_user_id: str | None = None,
display_name: str | None = None,
client: RedditClientProtocol | None = None,
) -> RedditUser | None:
"""Get or create a RedditUser; call /user/about only when the user is new."""
normalized = _normalize_reddit_username(username)
if not normalized:
return None

existing = RedditUser.objects.filter(username=normalized).first()
if existing is not None:
return existing

profile_data: dict[str, Any] | None = None
if client is not None:
profile_data = client.fetch_user_about(normalized)

resolved_reddit_user_id = (reddit_user_id or "").strip() or None
if profile_data:
profile_id = (profile_data.get("id") or "").strip()
if profile_id:
resolved_reddit_user_id = f"t2_{profile_id}"
elif profile_data.get("fullname"):
resolved_reddit_user_id = str(profile_data.get("fullname")).strip()

resolved_display_name = (display_name or "").strip()
if not resolved_display_name:
resolved_display_name = _display_name_from_reddit_profile(
profile_data, normalized
)

user, created = RedditUser.objects.get_or_create(
username=normalized,
defaults={
"reddit_user_id": resolved_reddit_user_id,
"display_name": resolved_display_name,
},
)
if not created:
if resolved_reddit_user_id:
user.reddit_user_id = resolved_reddit_user_id
user.display_name = resolved_display_name or user.display_name
user.save()
return user


def resolve_reddit_user_from_author_data(
data: dict[str, Any],
*,
client: RedditClientProtocol | None = None,
) -> RedditUser | None:
"""Resolve RedditUser from submission/comment author fields."""
author = data.get("author")
author_fullname = data.get("author_fullname")
username = _normalize_reddit_username(author)
if not username:
return None
reddit_user_id = (author_fullname or "").strip() or None
return get_or_create_reddit_user(
username,
reddit_user_id=reddit_user_id,
client=client,
)
63 changes: 63 additions & 0 deletions cppa_user_tracker/tests/test_services.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Tests for cppa_user_tracker.services."""

from unittest.mock import MagicMock

import pytest

from cppa_user_tracker.models import (
Expand All @@ -8,6 +10,7 @@
GitHubAccount,
GitHubAccountType,
Identity,
RedditUser,
SlackUser,
TempProfileIdentityRelation,
WG21PaperAuthorProfile,
Expand Down Expand Up @@ -806,6 +809,66 @@ def test_get_or_create_discord_profile_updates_existing():
assert profile.is_bot is True


# --- get_or_create_reddit_user ---


@pytest.mark.django_db
def test_get_or_create_reddit_user_creates_and_updates():
client = MagicMock()
client.fetch_user_about.return_value = {
"id": "abc123",
"name": "Taladar",
"subreddit": {"title": "Taladar"},
}
user = services.get_or_create_reddit_user(
"Taladar",
reddit_user_id="t2_old",
client=client,
)
assert user is not None
assert user.username == "Taladar"
assert user.reddit_user_id == "t2_abc123"
assert user.display_name == "Taladar"

user2 = services.get_or_create_reddit_user("Taladar", client=client)
assert user2.pk == user.pk
client.fetch_user_about.assert_called_once()


@pytest.mark.django_db
def test_get_or_create_reddit_user_skips_about_for_existing_user():
RedditUser.objects.create(
username="Taladar",
reddit_user_id="t2_abc123",
display_name="Taladar",
)
client = MagicMock()
user = services.get_or_create_reddit_user("Taladar", client=client)
assert user is not None
assert user.username == "Taladar"
client.fetch_user_about.assert_not_called()


@pytest.mark.django_db
def test_get_or_create_reddit_user_deleted_author_returns_none():
assert services.get_or_create_reddit_user("[deleted]") is None


@pytest.mark.django_db
def test_resolve_reddit_user_from_author_data():
client = MagicMock()
client.fetch_user_about.return_value = {
"id": "abc123",
"subreddit": {"title": "Taladar"},
}
user = services.resolve_reddit_user_from_author_data(
{"author": "Taladar", "author_fullname": "t2_abc123"},
client=client,
)
assert user is not None
assert user.username == "Taladar"


# --- get_or_create_youtube_speaker ---


Expand Down
2 changes: 2 additions & 0 deletions docs/service_api/cppa_user_tracker.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@
| `get_or_create_identity` | display_name: str = '', description: str = '', defaults: dict[str, Any] \| None = None | tuple[Identity, bool] | Get or create an Identity by display_name. If exists, updates description from defaults. |
| `get_or_create_mailing_list_profile` | display_name: str = '', email: str = '' | tuple[MailingListProfile, bool] | Get or create a MailingListProfile by display_name and email. Returns (profile, created). |
| `get_or_create_owner_account` | client: GitHubClientProtocol, owner: str | GitHubAccount | Get or create a GitHubAccount for an owner (org or user). For use by any app. |
| `get_or_create_reddit_user` | username: str, *, reddit_user_id: str \| None = None, display_name: str \| None = None, client: RedditClientProtocol \| None = None | RedditUser \| None | Get or create a RedditUser; call /user/about only when the user is new. |
| `get_or_create_slack_user` | user_data: SlackUserPayload \| dict[str, Any] | tuple[SlackUser, bool] | Get or create a SlackUser from Slack API user data. Returns (SlackUser, created). |
| `get_or_create_unknown_github_account` | name: str \| None = None, email: str = '' | tuple[GitHubAccount, bool] | Get or create a GitHubAccount for commits with no API author/committer. |
| `get_or_create_wg21_paper_author_profile` | display_name: str, email: str \| None = None | tuple[WG21PaperAuthorProfile, bool] | Get or create a WG21PaperAuthorProfile by display_name, with optional email disambiguation. |
| `get_or_create_youtube_speaker` | external_id: str, display_name: str = '', identity: Identity \| None = None | tuple[YoutubeSpeaker, bool] | Get or create a YoutubeSpeaker by external_id. Returns (speaker, created). |
| `remove_email` | email_obj: Email | None | Remove an email from a profile. |
| `remove_temp_profile_identity_relation` | base_profile: BaseProfile, target_identity: TmpIdentity | None | Remove the staging relation between base_profile and target_identity. |
| `resolve_reddit_user_from_author_data` | data: dict[str, Any], *, client: RedditClientProtocol \| None = None | RedditUser \| None | Resolve RedditUser from submission/comment author fields. |
| `update_email` | email_obj: Email, **kwargs: Any | Email | Update an Email instance. Allowed keys: email, is_primary, is_active. |

<!-- SERVICE_API:GENERATED:END -->
Expand Down
Loading
Loading