diff --git a/src/ghdcbot/config/models.py b/src/ghdcbot/config/models.py index b57a47b..ccf02f2 100644 --- a/src/ghdcbot/config/models.py +++ b/src/ghdcbot/config/models.py @@ -214,6 +214,8 @@ class SnapshotConfig(BaseModel): repo_path: str = "" # Format: "owner/repo" (e.g., "org/gitcord-data") # Optional: branch to write to (default: main/master) branch: str | None = None + # Optional: export raw ContributionEvent records to events.json (can be large) + include_raw_events: bool = False class BotConfig(BaseModel): diff --git a/src/ghdcbot/engine/snapshots.py b/src/ghdcbot/engine/snapshots.py index f785b8c..9dae9f9 100644 --- a/src/ghdcbot/engine/snapshots.py +++ b/src/ghdcbot/engine/snapshots.py @@ -114,6 +114,7 @@ def _write_snapshots( contribution_summaries=contribution_summaries, run_id=run_id, generated_at=now, + include_raw_events=snapshot_config.include_raw_events, ) # Write each snapshot file to GitHub @@ -164,6 +165,8 @@ def _collect_snapshot_data( contribution_summaries: list[ContributionSummary] | None, run_id: str, generated_at: datetime, + *, + include_raw_events: bool = False, ) -> dict[str, dict[str, Any]]: """Collect all snapshot data into structured dictionaries.""" org = config.github.org @@ -281,7 +284,7 @@ def _collect_snapshot_data( notifications_data = [] list_notifications = getattr(storage, "list_recent_notifications", None) if callable(list_notifications): - recent_notifications = list_notifications(limit=1000) # Last 1000 notifications + recent_notifications = list_notifications(1000) # Last 1000 notifications for notif in recent_notifications: notifications_data.append({ "dedupe_key": notif.get("dedupe_key"), @@ -302,7 +305,7 @@ def _collect_snapshot_data( "data": notifications_data, } - return { + files: dict[str, dict[str, Any]] = { "meta.json": meta, "identities.json": identities, "scores.json": scores_snapshot, @@ -312,6 +315,32 @@ def _collect_snapshot_data( "notifications.json": notifications, } + if include_raw_events: + list_contributions = getattr(storage, "list_contributions", None) + if callable(list_contributions): + events_data = [ + { + "github_user": event.github_user, + "event_type": event.event_type, + "repo": event.repo, + "created_at": event.created_at.isoformat(), + "payload": event.payload, + } + for event in list_contributions(period_start) + if event.created_at <= period_end + ] + files["events.json"] = { + "schema_version": SCHEMA_VERSION, + "generated_at": generated_at.isoformat(), + "org": org, + "run_id": run_id, + "period_start": period_start.isoformat(), + "period_end": period_end.isoformat(), + "data": events_data, + } + + return files + def _parse_repo_path(repo_path: str) -> tuple[str, str]: """Parse 'owner/repo' or 'owner/repo/path' into (owner, repo). diff --git a/tests/test_snapshots.py b/tests/test_snapshots.py index 03b128a..bd6d4a2 100644 --- a/tests/test_snapshots.py +++ b/tests/test_snapshots.py @@ -1,6 +1,6 @@ """Tests for GitHub snapshot writing.""" -from datetime import datetime, timezone +from datetime import UTC, datetime from unittest.mock import MagicMock import pytest @@ -18,7 +18,7 @@ SnapshotConfig, ) from ghdcbot.core.modes import RunMode -from ghdcbot.core.models import ContributionSummary, Score +from ghdcbot.core.models import ContributionEvent, ContributionSummary, Score from ghdcbot.engine.snapshots import ( SCHEMA_VERSION, _collect_snapshot_data, @@ -29,16 +29,20 @@ class MockStorage: """Mock storage for testing.""" - + def __init__(self) -> None: - self.notifications = [] - + self.notifications: list[dict] = [] + self.contributions: list[ContributionEvent] = [] + def list_recent_notifications(self, limit: int = 1000) -> list[dict]: return self.notifications[:limit] - + def list_pending_issue_requests(self) -> list[dict]: return [] + def list_contributions(self, since: datetime) -> list[ContributionEvent]: + return [e for e in self.contributions if e.created_at >= since] + class MockGitHubWriter: """Mock GitHub writer for testing.""" @@ -95,8 +99,8 @@ def test_collect_snapshot_data() -> None: scores = [ Score( github_user="alice", - period_start=datetime(2024, 1, 1, tzinfo=timezone.utc), - period_end=datetime(2024, 1, 31, tzinfo=timezone.utc), + period_start=datetime(2024, 1, 1, tzinfo=UTC), + period_end=datetime(2024, 1, 31, tzinfo=UTC), points=100, ), ] @@ -106,10 +110,10 @@ def test_collect_snapshot_data() -> None: "456": ["Maintainer"], } - period_start = datetime(2024, 1, 1, tzinfo=timezone.utc) - period_end = datetime(2024, 1, 31, tzinfo=timezone.utc) + period_start = datetime(2024, 1, 1, tzinfo=UTC) + period_end = datetime(2024, 1, 31, tzinfo=UTC) run_id = "test-run-123" - generated_at = datetime(2024, 1, 31, 12, 0, 0, tzinfo=timezone.utc) + generated_at = datetime(2024, 1, 31, 12, 0, 0, tzinfo=UTC) snapshots = _collect_snapshot_data( storage=storage, @@ -189,8 +193,8 @@ def test_collect_snapshot_data_with_contributors() -> None: prs_reviewed=2, comments=10, total_score=50, - period_start=datetime(2024, 1, 1, tzinfo=timezone.utc), - period_end=datetime(2024, 1, 31, tzinfo=timezone.utc), + period_start=datetime(2024, 1, 1, tzinfo=UTC), + period_end=datetime(2024, 1, 31, tzinfo=UTC), ), ] @@ -200,11 +204,11 @@ def test_collect_snapshot_data_with_contributors() -> None: identity_mappings=[], scores=[], member_roles={}, - period_start=datetime(2024, 1, 1, tzinfo=timezone.utc), - period_end=datetime(2024, 1, 31, tzinfo=timezone.utc), + period_start=datetime(2024, 1, 1, tzinfo=UTC), + period_end=datetime(2024, 1, 31, tzinfo=UTC), contribution_summaries=contribution_summaries, run_id="test-run", - generated_at=datetime(2024, 1, 31, 12, 0, 0, tzinfo=timezone.utc), + generated_at=datetime(2024, 1, 31, 12, 0, 0, tzinfo=UTC), ) contributors = snapshots["contributors.json"] @@ -242,8 +246,8 @@ def test_write_snapshots_disabled() -> None: identity_mappings=[], scores=[], member_roles={}, - period_start=datetime(2024, 1, 1, tzinfo=timezone.utc), - period_end=datetime(2024, 1, 31, tzinfo=timezone.utc), + period_start=datetime(2024, 1, 1, tzinfo=UTC), + period_end=datetime(2024, 1, 31, tzinfo=UTC), ) assert len(github_writer.files_written) == 0 @@ -279,8 +283,8 @@ def test_write_snapshots_enabled() -> None: ], scores=[], member_roles={}, - period_start=datetime(2024, 1, 1, tzinfo=timezone.utc), - period_end=datetime(2024, 1, 31, tzinfo=timezone.utc), + period_start=datetime(2024, 1, 1, tzinfo=UTC), + period_end=datetime(2024, 1, 31, tzinfo=UTC), ) # Should have written snapshot files @@ -328,9 +332,230 @@ def test_write_snapshots_handles_errors() -> None: identity_mappings=[], scores=[], member_roles={}, - period_start=datetime(2024, 1, 1, tzinfo=timezone.utc), - period_end=datetime(2024, 1, 31, tzinfo=timezone.utc), + period_start=datetime(2024, 1, 1, tzinfo=UTC), + period_end=datetime(2024, 1, 31, tzinfo=UTC), ) # Should not have written files due to error assert len(github_writer.files_written) == 0 + + +def _make_config(*, snapshots: "SnapshotConfig | None" = None) -> "BotConfig": + return BotConfig( + runtime=RuntimeConfig( + mode=RunMode.DRY_RUN, + log_level="INFO", + data_dir="/tmp/test", + github_adapter="test", + discord_adapter="test", + storage_adapter="test", + ), + github=GitHubConfig(org="test-org", token="test", api_base="https://api.github.com", permissions=PermissionConfig()), + discord=DiscordConfig(guild_id="123", token="test", permissions=PermissionConfig()), + scoring=ScoringConfig(period_days=30, weights={}), + role_mappings=[RoleMappingConfig(discord_role="Contributor", min_score=10)], + assignments=AssignmentConfig(), + snapshots=snapshots, + ) + + +def test_raw_events_excluded_when_false() -> None: + """events.json is not written when include_raw_events is False.""" + storage = MockStorage() + storage.contributions = [ + ContributionEvent( + github_user="alice", + event_type="pr_merged", + repo="org/repo", + created_at=datetime(2024, 1, 15, tzinfo=UTC), + payload={"pr_number": 1}, + ) + ] + snapshots = _collect_snapshot_data( + storage=storage, + config=_make_config(), + identity_mappings=[], + scores=[], + member_roles={}, + period_start=datetime(2024, 1, 1, tzinfo=UTC), + period_end=datetime(2024, 1, 31, tzinfo=UTC), + contribution_summaries=None, + run_id="test-run", + generated_at=datetime(2024, 1, 31, 12, 0, 0, tzinfo=UTC), + include_raw_events=False, + ) + assert "events.json" not in snapshots + + +def test_raw_events_included_when_enabled() -> None: + """events.json is written with correct structure when include_raw_events=True.""" + storage = MockStorage() + storage.contributions = [ + ContributionEvent( + github_user="alice", + event_type="pr_merged", + repo="org/repo", + created_at=datetime(2024, 1, 15, tzinfo=UTC), + payload={"pr_number": 42}, + ), + ContributionEvent( + github_user="bob", + event_type="issue_opened", + repo="org/repo", + created_at=datetime(2024, 1, 20, tzinfo=UTC), + payload={"issue_number": 7}, + ), + ] + snapshots = _collect_snapshot_data( + storage=storage, + config=_make_config(), + identity_mappings=[], + scores=[], + member_roles={}, + period_start=datetime(2024, 1, 1, tzinfo=UTC), + period_end=datetime(2024, 1, 31, tzinfo=UTC), + contribution_summaries=None, + run_id="test-run", + generated_at=datetime(2024, 1, 31, 12, 0, 0, tzinfo=UTC), + include_raw_events=True, + ) + assert "events.json" in snapshots + events_snapshot = snapshots["events.json"] + assert events_snapshot["schema_version"] == SCHEMA_VERSION + assert events_snapshot["org"] == "test-org" + assert len(events_snapshot["data"]) == 2 + assert events_snapshot["data"][0]["github_user"] == "alice" + assert events_snapshot["data"][0]["event_type"] == "pr_merged" + assert events_snapshot["data"][0]["payload"] == {"pr_number": 42} + assert events_snapshot["data"][1]["github_user"] == "bob" + + +def test_raw_events_respects_period_start() -> None: + """Only events at or after period_start are included in events.json.""" + storage = MockStorage() + period_start = datetime(2024, 1, 10, tzinfo=UTC) + storage.contributions = [ + ContributionEvent( + github_user="alice", + event_type="pr_merged", + repo="org/repo", + created_at=datetime(2024, 1, 5, tzinfo=UTC), # before period_start + payload={}, + ), + ContributionEvent( + github_user="bob", + event_type="pr_merged", + repo="org/repo", + created_at=datetime(2024, 1, 15, tzinfo=UTC), # within period + payload={}, + ), + ] + snapshots = _collect_snapshot_data( + storage=storage, + config=_make_config(), + identity_mappings=[], + scores=[], + member_roles={}, + period_start=period_start, + period_end=datetime(2024, 1, 31, tzinfo=UTC), + contribution_summaries=None, + run_id="test-run", + generated_at=datetime(2024, 1, 31, 12, 0, 0, tzinfo=UTC), + include_raw_events=True, + ) + events_data = snapshots["events.json"]["data"] + assert len(events_data) == 1 + assert events_data[0]["github_user"] == "bob" + + +def test_raw_events_respects_period_end() -> None: + """Only events at or before period_end are included in events.json.""" + storage = MockStorage() + period_end = datetime(2024, 1, 31, tzinfo=UTC) + storage.contributions = [ + ContributionEvent( + github_user="alice", + event_type="pr_merged", + repo="org/repo", + created_at=datetime(2024, 1, 15, tzinfo=UTC), # within period + payload={}, + ), + ContributionEvent( + github_user="bob", + event_type="pr_merged", + repo="org/repo", + created_at=datetime(2024, 2, 5, tzinfo=UTC), # after period_end + payload={}, + ), + ] + snapshots = _collect_snapshot_data( + storage=storage, + config=_make_config(), + identity_mappings=[], + scores=[], + member_roles={}, + period_start=datetime(2024, 1, 1, tzinfo=UTC), + period_end=period_end, + contribution_summaries=None, + run_id="test-run", + generated_at=datetime(2024, 1, 31, 12, 0, 0, tzinfo=UTC), + include_raw_events=True, + ) + events_data = snapshots["events.json"]["data"] + assert len(events_data) == 1 + assert events_data[0]["github_user"] == "alice" + + +def test_write_snapshots_raw_events_via_config() -> None: + """include_raw_events=True in SnapshotConfig results in events.json being written.""" + storage = MockStorage() + storage.contributions = [ + ContributionEvent( + github_user="alice", + event_type="pr_merged", + repo="org/repo", + created_at=datetime(2024, 1, 15, tzinfo=UTC), + payload={"pr_number": 1}, + ) + ] + config = _make_config(snapshots=SnapshotConfig(enabled=True, repo_path="org/repo", include_raw_events=True)) + github_writer = MockGitHubWriter() + + write_snapshots_to_github( + storage=storage, + config=config, + github_writer=github_writer, + identity_mappings=[], + scores=[], + member_roles={}, + period_start=datetime(2024, 1, 1, tzinfo=UTC), + period_end=datetime(2024, 1, 31, tzinfo=UTC), + ) + + written_paths = [path for _, _, path, _ in github_writer.files_written] + assert any("events.json" in p for p in written_paths) + + +def test_raw_events_graceful_when_storage_missing_method() -> None: + """events.json is omitted gracefully if storage lacks list_contributions.""" + class MinimalStorage: + def list_recent_notifications(self, _limit: int = 1000) -> list[dict]: + return [] + def list_pending_issue_requests(self) -> list[dict]: + return [] + + storage = MinimalStorage() + snapshots = _collect_snapshot_data( + storage=storage, + config=_make_config(), + identity_mappings=[], + scores=[], + member_roles={}, + period_start=datetime(2024, 1, 1, tzinfo=UTC), + period_end=datetime(2024, 1, 31, tzinfo=UTC), + contribution_summaries=None, + run_id="test-run", + generated_at=datetime(2024, 1, 31, 12, 0, 0, tzinfo=UTC), + include_raw_events=True, + ) + assert "events.json" not in snapshots