Skip to content

Commit 64e1138

Browse files
authored
Correct slack activity miscount from fetch_slack_activity (#2056)
1 parent d24e46a commit 64e1138

7 files changed

Lines changed: 134 additions & 9 deletions

File tree

docs/commands.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
- [`update_library_version_dependencies`](#update_library_version_dependencies)
1717
- [`release_tasks`](#release_tasks)
1818
- [`refresh_users_github_photos`](#refresh_users_github_photos)
19+
- [`clear_slack_activity`](#clear_slack_activity)
1920

2021
## `boost_setup`
2122

@@ -356,3 +357,36 @@ Preview which users would be updated:
356357

357358
- Calls the `refresh_users_github_photos()` Celery task which queues photo updates for all users with GitHub usernames
358359
- With `--dry-run`, displays information about which users would be updated without making any changes
360+
361+
## `clear_slack_activity`
362+
363+
**Purpose**: Delete all slack activity tracking data from the database. This command removes all records from the `SlackActivityBucket` and `ChannelUpdateGap` tables, and resets the `last_update_ts` field to "0" for all channels. This is useful for resetting the slack activity tracking system to its initial state.
364+
365+
**Example**
366+
367+
```bash
368+
./manage.py clear_slack_activity --confirm
369+
```
370+
371+
**Options**
372+
373+
| Options | Format | Description |
374+
|--------------|--------|----------------------------------------------------------------------------------------------|
375+
| `--confirm` | bool | Required flag to confirm deletion. The command will not execute without this flag. |
376+
377+
**Usage Examples**
378+
379+
Execute the deletion:
380+
```bash
381+
./manage.py clear_slack_activity --confirm
382+
```
383+
384+
**Process**
385+
386+
- Deletes all `SlackActivityBucket` records (message counts per user per channel per day)
387+
- Deletes all `ChannelUpdateGap` records (tracking of message fetch progress)
388+
- Resets `last_update_ts` to "0" for all `Channel` records
389+
- All operations are performed within a database transaction to ensure atomicity
390+
- Logs the number of records affected in each table
391+
392+
**Warning**: This command permanently deletes all slack activity data. Use with caution.
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Generated by Django 5.2.8 on 2026-01-06 01:11
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("mailing_list", "0005_postingdata_subscriptiondata"),
10+
]
11+
12+
operations = [
13+
migrations.CreateModel(
14+
name="ListPosting",
15+
fields=[
16+
("id", models.IntegerField(primary_key=True, serialize=False)),
17+
("date", models.DateTimeField()),
18+
("sender_id", models.CharField()),
19+
],
20+
options={
21+
"db_table": "hyperkitty_email",
22+
"managed": False,
23+
},
24+
),
25+
]

pycharm_debugger.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
def set_trace():
2+
import socket
3+
import struct
24
import pydevd_pycharm
35

4-
# this ip address is for the gateway IP, equivalent to host.docker.internal which
5-
# isn't available on all platforms
6-
gateway_ip = "172.17.0.1"
7-
# Use the same port number configured in PyCharm
8-
pydevd_pycharm.settrace(host=gateway_ip, port=12345, suspend=False)
6+
with open("/proc/net/route") as f:
7+
for line in f.readlines()[1:]:
8+
p = line.split()
9+
if p and p[1] == "00000000":
10+
gw = socket.inet_ntoa(struct.pack("<L", int(p[2], 16)))
11+
break
12+
pydevd_pycharm.settrace(host=gw, port=12345, suspend=False)

requirements-dev.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
-c requirements.txt
22
django-debug-toolbar
3-
pydevd-pycharm==252.27397.106 # pinned to appropriate version for current pycharm
3+
pydevd-pycharm==253.29346.142 # pinned to appropriate version for current pycharm

requirements-dev.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ django==5.2.8
1010
# django-debug-toolbar
1111
django-debug-toolbar==6.1.0
1212
# via -r ./requirements-dev.in
13-
pydevd-pycharm==252.27397.106
13+
pydevd-pycharm==253.29346.142
1414
# via -r ./requirements-dev.in
1515
sqlparse==0.5.3
1616
# via
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import logging
2+
3+
import djclick as click
4+
from django.db import transaction
5+
6+
from slack.models import (
7+
SlackActivityBucket,
8+
Channel,
9+
ChannelUpdateGap,
10+
)
11+
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
@click.command()
17+
@click.option(
18+
"--confirm",
19+
is_flag=True,
20+
help="Confirm deletion of all slack activity data.",
21+
)
22+
def command(confirm):
23+
"""
24+
Delete all records in SlackActivityBucket and ChannelUpdateGap tables,
25+
and set last_update_ts to "0" for all Channels.
26+
27+
WARNING: This will delete all slack activity tracking data and reset
28+
all channels to their initial state. Use with caution.
29+
"""
30+
if not confirm:
31+
logger.error(
32+
"This command will delete ALL slack activity data. "
33+
"Use --confirm flag to proceed."
34+
)
35+
return
36+
37+
with transaction.atomic():
38+
activity_count = SlackActivityBucket.objects.count()
39+
gap_count = ChannelUpdateGap.objects.count()
40+
channel_count = Channel.objects.count()
41+
42+
logger.info(f"Deleting {activity_count:,} SlackActivityBucket records...")
43+
SlackActivityBucket.objects.all().delete()
44+
45+
logger.info(f"Deleting {gap_count:,} ChannelUpdateGap records...")
46+
ChannelUpdateGap.objects.all().delete()
47+
48+
logger.info(f"Resetting last_update_ts for {channel_count:,} Channels...")
49+
Channel.objects.all().update(last_update_ts="0")
50+
51+
logger.info("Successfully cleared all slack activity data.")

slack/management/commands/fetch_slack_activity.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ def channel_messages_in_range(channel, oldest, latest):
5151
inclusive=False,
5252
)
5353
for page in pages:
54+
# rate-limit to prevent 429 responses
55+
time.sleep(1)
5456
yield page["messages"]
5557

5658

@@ -108,22 +110,31 @@ def fill_channel_gap(gap: ChannelUpdateGap, debug: bool):
108110
logger.info(
109111
f"Fetching channel history for {gap.channel.name} ({gap.channel.id}) "
110112
f"in range ({gap.oldest_message_ts}, {gap.newest_message_ts})"
113+
f"({parse_ts(gap.oldest_message_ts)}Z to {parse_ts(gap.oldest_message_ts)}Z)"
111114
)
112115
pages = channel_messages_in_range(
113116
channel=gap.channel.id,
114117
latest=gap.newest_message_ts,
115118
oldest=gap.oldest_message_ts,
116119
)
117-
first = True
120+
121+
# pages contain a grouping of 100 messages, oldest 100 returned first
118122
for page in pages:
119123
# use a separate transaction per page to allow restoring from an
120124
# interrupted run.
121125
with transaction.atomic():
126+
# messages within a page of 100 however are newest first, so we need to update the channel on the first
127+
# message to have the future ranges retrieved without overlap
128+
first = True
122129
for message in page:
123-
if first and gap.newest_message_ts is None:
130+
readable_dt = parse_ts(message["ts"])
131+
if first:
124132
gap.channel.last_update_ts = message["ts"]
133+
msg = f"saving {readable_dt}Z as last_update_ts for channel"
134+
logger.debug(msg)
125135
gap.channel.save()
126136
first = False
137+
logger.debug(f"next message ts {readable_dt}Z")
127138
# Shrink the gap, but no need to save until we've finished this
128139
# page (transactionally).
129140
gap.newest_message_ts = message["ts"]

0 commit comments

Comments
 (0)