Skip to content

Commit 5fa5783

Browse files
authored
Celery workflow changes for release reports (#2043)
1 parent e6120b8 commit 5fa5783

6 files changed

Lines changed: 255 additions & 48 deletions

File tree

libraries/admin.py

Lines changed: 59 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import structlog
2+
from datetime import date
23
from django.conf import settings
34
from django.contrib import admin, messages
45
from django.core.exceptions import ValidationError
@@ -12,9 +13,11 @@
1213
from django.shortcuts import redirect
1314
from django.views.generic import TemplateView
1415
from django import forms
16+
from celery import chain, group
1517

1618
from core.admin_filters import StaffUserCreatedByFilter
1719
from libraries.forms import CreateReportForm, CreateReportFullForm
20+
from reports.generation import determine_versions
1821
from versions.models import Version
1922
from versions.tasks import import_all_library_versions
2023
from .filters import ReportConfigurationFilter
@@ -31,15 +34,22 @@
3134
WordcloudMergeWord,
3235
)
3336
from .tasks import (
37+
count_mailinglist_contributors,
38+
count_commit_contributors_totals,
3439
generate_library_report,
40+
generate_mailinglist_cloud,
41+
generate_release_report_with_stats,
42+
generate_search_cloud,
43+
get_mailing_list_stats,
44+
get_new_contributors_count,
45+
get_new_subscribers_stats,
46+
synchronize_commit_author_user_data,
3547
update_authors_and_maintainers,
3648
update_commit_author_github_data,
3749
update_commits,
3850
update_issues,
3951
update_libraries,
4052
update_library_version_documentation_urls_all_versions,
41-
generate_release_report,
42-
synchronize_commit_author_user_data,
4353
)
4454
from .utils import generate_release_report_filename
4555

@@ -189,11 +199,52 @@ def get_context_data(self, **kwargs):
189199

190200
def generate_report(self):
191201
uri = f"{settings.ACCOUNT_DEFAULT_HTTP_PROTOCOL}://{self.request.get_host()}"
192-
generate_release_report.delay(
193-
user_id=self.request.user.id,
194-
params=self.request.GET,
195-
base_uri=uri,
202+
logger.info("Queuing release report workflow")
203+
204+
# Get the report configuration to determine version parameters
205+
form = self.get_form()
206+
if not form.is_valid():
207+
return
208+
209+
report_configuration = form.cleaned_data["report_configuration"]
210+
211+
# NOTE TO FUTURE DEVS: remember to account for the fact that a report
212+
# configuration may not match with a real version in frequent cases where
213+
# reports are generated before the release version has been created.
214+
(report_before_release, prior_version, version) = determine_versions(
215+
report_configuration.version
216+
)
217+
218+
# trigger stats tasks first to run in parallel using group, then chain the final
219+
# report generation task
220+
stats_tasks = group(
221+
[
222+
count_mailinglist_contributors.s(prior_version.pk, version.pk),
223+
get_mailing_list_stats.s(prior_version.pk, version.pk),
224+
count_commit_contributors_totals.s(version.pk, prior_version.pk),
225+
get_new_subscribers_stats.s(
226+
prior_version.release_date, version.release_date or date.today()
227+
),
228+
generate_mailinglist_cloud.s(prior_version.pk, version.pk),
229+
# if the report is based on a live version, look for stats for that
230+
# version, otherwise use the stats for the prior (live) version
231+
generate_search_cloud.s(
232+
prior_version.pk if report_before_release else version.pk
233+
),
234+
get_new_contributors_count.s(version.pk),
235+
]
236+
)
237+
238+
# chain stats collection with final report generation
239+
workflow = chain(
240+
stats_tasks,
241+
generate_release_report_with_stats.s(
242+
self.request.user.id,
243+
self.request.GET,
244+
uri,
245+
),
196246
)
247+
workflow.apply_async()
197248

198249
def locked_publish_check(self):
199250
form = self.get_form()
@@ -245,6 +296,8 @@ class LibraryReportView(ReleaseReportView):
245296
report_type = "library report"
246297

247298
def generate_report(self):
299+
# For library reports, we don't need a complex stats workflow since
300+
# CreateReportFullForm doesn't use the same async stats pattern
248301
generate_library_report.delay(self.request.GET)
249302

250303

libraries/forms.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,120 @@ def get_stats(self):
411411
"slack": slack_stats,
412412
}
413413

414+
def generate_context(
415+
self, report_configuration: ReportConfiguration, stats_results: dict
416+
):
417+
committee_members = report_configuration.financial_committee_members.all()
418+
419+
# NOTE TO FUTURE DEVS: remember to account for the fact that a report
420+
# configuration may not match with a real version in frequent cases where
421+
# reports are generated before the release version has been created.
422+
(report_before_release, prior_version, version) = determine_versions(
423+
report_configuration.version
424+
)
425+
426+
# Unpack stats_results in the same order as tasks were defined in the workflow
427+
(
428+
(mailinglist_contributor_release_count, mailinglist_contributor_new_count),
429+
(mailinglist_post_stats, total_mailinglist_count),
430+
(commit_contributors_release_count, commit_contributors_new_count),
431+
mailinglist_new_subscribers_stats,
432+
(
433+
mailinglist_words,
434+
mailinglist_wordcloud_base64,
435+
mailinglist_wordcloud_top_words,
436+
),
437+
(search_wordcloud_base64, search_wordcloud_top_words, search_stats),
438+
global_contributors_new_count,
439+
) = stats_results
440+
441+
# Compute the synchronous stats that don't require async tasks
442+
commit_count, version_commit_count = get_commit_counts(version)
443+
top_libraries_for_version = get_top_libraries_for_version(version)
444+
top_libraries_by_name = get_libraries_by_name(version)
445+
library_order = self._get_library_order(top_libraries_by_name)
446+
# TODO: we may in future need to find a way to show the removed libraries, for
447+
# now it's not needed. In that case the distinction between running this on a
448+
# ReportConfiguration with a real 'version' entry vs one that instead uses 'master'
449+
# will need to be considered
450+
libraries = get_libraries(library_order)
451+
new_libraries = libraries.exclude(
452+
library_version__version__release_date__lte=prior_version.release_date
453+
).prefetch_related("authors")
454+
top_contributors = get_top_contributors_for_version(version)
455+
mailinglist_counts = get_mailinglist_counts(version)
456+
lines_added, lines_removed = lines_changes_count(version)
457+
opened_issues_count, closed_issues_count = get_issues_counts(
458+
prior_version, version
459+
)
460+
# TODO: connected to above todo, add removed_libraries.count()
461+
removed_library_count = 0
462+
463+
library_data = get_library_data(library_order, prior_version.pk, version.pk)
464+
slack_stats = get_slack_stats(prior_version, version)
465+
466+
library_index_library_data = get_libraries_for_index(library_data, version)
467+
batched_library_data = conditional_batched(
468+
library_data,
469+
2,
470+
lambda x: x.get("top_contributors_release").count()
471+
<= RELEASE_REPORT_AUTHORS_PER_PAGE_THRESHOLD,
472+
)
473+
git_graph_data = get_git_graph_data(prior_version, version)
474+
download = get_download_links(version)
475+
476+
return {
477+
"committee_members": committee_members,
478+
"lines_added": lines_added,
479+
"lines_removed": lines_removed,
480+
"version": version,
481+
"report_configuration": report_configuration,
482+
"prior_version": prior_version,
483+
"opened_issues_count": opened_issues_count,
484+
"closed_issues_count": closed_issues_count,
485+
"mailinglist_wordcloud_base64": mailinglist_wordcloud_base64,
486+
"mailinglist_wordcloud_frequencies": mailinglist_wordcloud_top_words,
487+
"mailinglist_counts": mailinglist_counts,
488+
"mailinglist_total": total_mailinglist_count or 0,
489+
"mailinglist_contributor_release_count": mailinglist_contributor_release_count, # noqa: E501
490+
"mailinglist_contributor_new_count": mailinglist_contributor_new_count,
491+
"mailinglist_post_stats": mailinglist_post_stats,
492+
"mailinglist_new_subscribers_stats": mailinglist_new_subscribers_stats,
493+
"mailinglist_charts_start_year": prior_version.release_date.year,
494+
"search_wordcloud_base64": search_wordcloud_base64,
495+
"search_wordcloud_frequencies": search_wordcloud_top_words,
496+
"search_stats": search_stats,
497+
"commit_contributors_release_count": commit_contributors_release_count,
498+
"commit_contributors_new_count": commit_contributors_new_count,
499+
"global_contributors_new_count": global_contributors_new_count,
500+
"commit_count": commit_count,
501+
"version_commit_count": version_commit_count,
502+
"top_contributors_release_overall": top_contributors,
503+
"library_data": library_data,
504+
"new_libraries": new_libraries,
505+
"batched_library_data": batched_library_data,
506+
"top_libraries_for_version": top_libraries_for_version,
507+
"library_count": libraries.count(),
508+
"library_index_libraries": library_index_library_data,
509+
"added_library_count": new_libraries.count(),
510+
"removed_library_count": removed_library_count,
511+
"downloads": download,
512+
"contribution_box_graph": git_graph_data,
513+
"slack_channels": get_slack_channels(),
514+
"slack": slack_stats,
515+
}
516+
517+
def render_with_stats(self, stats_results, base_uri=None):
518+
"""Render HTML with pre-computed stats results"""
519+
context = self.generate_context(
520+
self.cleaned_data["report_configuration"], stats_results
521+
)
522+
if base_uri:
523+
context["base_uri"] = base_uri
524+
html = render_to_string(self.html_template_name, context)
525+
self.cache_set(html)
526+
return html
527+
414528

415529
class CommitAuthorEmailForm(Form):
416530
"""

libraries/management/commands/release_tasks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def set_tasks(self):
5959
Action("Updating slack activity buckets", ["fetch_slack_activity"]),
6060
Action("Updating website statistics", self.update_website_statistics),
6161
Action("Importing mailing list counts", self.import_ml_counts),
62-
Action("Generating report", self.generate_report),
62+
# Action("Generating report", self.generate_report),
6363
]
6464

6565
def import_versions(self):

libraries/tasks.py

Lines changed: 74 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
CommitAuthor,
2020
ReleaseReport,
2121
)
22-
from mailing_list.models import EmailData, PostingData, SubscriptionData
22+
from mailing_list.models import EmailData, PostingData
2323
from reports.generation import (
2424
generate_algolia_words,
2525
generate_wordcloud,
@@ -251,12 +251,35 @@ def update_issues(clean=False):
251251

252252

253253
@app.task
254-
def generate_release_report(user_id: int, params: dict, base_uri: str = None):
255-
"""Generate a release report asynchronously and save it in RenderedContent."""
254+
def generate_release_report_with_stats(stats_results, user_id, params, base_uri=None):
255+
"""Wrapper task that reorders arguments for workflow mode."""
256+
return generate_release_report(user_id, params, base_uri, stats_results)
257+
258+
259+
@app.task
260+
def generate_release_report(user_id, params, base_uri=None, stats_results=None):
261+
"""Generate a release report asynchronously and save to RenderedContent/PDF
262+
263+
Args:
264+
user_id: ID of the user creating the report
265+
params: Form parameters for report configuration
266+
base_uri: Base URI for the report (optional)
267+
stats_results: Pre-collected stats from workflow (optional)
268+
"""
269+
logger.info(f"Starting generate_release_report {settings.LOCAL_DEVELOPMENT=}")
270+
256271
from libraries.forms import CreateReportForm
257272

258273
form = CreateReportForm(params)
259-
html = form.cache_html(base_uri=base_uri)
274+
if not form.is_valid():
275+
logger.error(f"Form validation failed, {form.errors}")
276+
return None
277+
278+
if stats_results:
279+
html = form.render_with_stats(stats_results, base_uri=base_uri)
280+
else:
281+
html = form.cache_html(base_uri=base_uri)
282+
260283
# override the base uri to reference the internal container for local dev
261284
if settings.LOCAL_DEVELOPMENT:
262285
html = update_base_tag(html, DOCKER_CONTAINER_URL_WEB)
@@ -265,7 +288,9 @@ def generate_release_report(user_id: int, params: dict, base_uri: str = None):
265288
created_by_id=user_id,
266289
report_configuration_id=params.get("report_configuration"),
267290
)
291+
logger.info(f"Saving release_report {params.get('report_configuration')=}")
268292
release_report.save()
293+
logger.info(f"generate release report pdf {release_report.pk=}")
269294
generate_release_report_pdf.delay(
270295
release_report.pk, html=html, publish=params.get("publish")
271296
)
@@ -330,7 +355,7 @@ def generate_library_report(params):
330355
from libraries.forms import CreateReportFullForm
331356

332357
form = CreateReportFullForm(params)
333-
form.cache_html()
358+
return form.cache_html()
334359

335360

336361
@app.task
@@ -588,40 +613,52 @@ def get_mailing_list_stats(prior_version_id: int, version_id: int):
588613

589614
@shared_task
590615
def get_new_subscribers_stats(start_date: date, end_date: date):
591-
data = (
592-
SubscriptionData.objects.filter(
593-
subscription_dt__gte=start_date,
594-
subscription_dt__lte=end_date,
595-
list="boost",
596-
)
597-
.annotate(
598-
week=ExtractWeek("subscription_dt"),
599-
iso_year=ExtractIsoYear("subscription_dt"),
600-
)
601-
.values("iso_year", "week")
602-
.annotate(count=Count("id"))
603-
.order_by("iso_year", "week")
604-
)
616+
"""Get new subscribers statistics for HyperKitty mailing list using raw SQL."""
617+
import psycopg2
618+
from django.conf import settings
605619

606-
# Convert data into a dict for easy lookup
607-
counts_by_week = {(row["iso_year"], row["week"]): row["count"] for row in data}
620+
conn = psycopg2.connect(settings.HYPERKITTY_DATABASE_URL)
608621

609-
# Iterate through every ISO week in the date range
610-
current = start_date
611-
seen = set()
612-
chart_data = []
613-
while current <= end_date:
614-
iso_year, iso_week, _ = current.isocalendar()
615-
key = (iso_year, iso_week)
616-
if key not in seen: # skip duplicate weeks in the same loop
617-
seen.add(key)
618-
year_suffix = str(iso_year)[2:]
619-
label = f"{iso_week} ({year_suffix})"
620-
count = counts_by_week.get(key, 0)
621-
chart_data.append({"x": label, "y": count})
622-
current += timedelta(days=7) # hop by weeks
623-
624-
return chart_data
622+
try:
623+
with conn.cursor() as cursor:
624+
cursor.execute(
625+
"""
626+
SELECT
627+
EXTRACT(ISOYEAR FROM date_joined) as iso_year,
628+
EXTRACT(WEEK FROM date_joined) as iso_week,
629+
COUNT(*) as count
630+
FROM auth_user
631+
WHERE date_joined::date >= %s
632+
AND date_joined::date <= %s
633+
GROUP BY iso_year, iso_week
634+
ORDER BY iso_year, iso_week
635+
""",
636+
[start_date, end_date],
637+
)
638+
639+
data = cursor.fetchall()
640+
641+
# Convert data into a dict for easy lookup
642+
counts_by_week = {(int(row[0]), int(row[1])): row[2] for row in data}
643+
644+
# Iterate through every ISO week in the date range
645+
current = start_date
646+
seen = set()
647+
chart_data = []
648+
while current <= end_date:
649+
iso_year, iso_week, _ = current.isocalendar()
650+
key = (iso_year, iso_week)
651+
if key not in seen: # skip duplicate weeks in the same loop
652+
seen.add(key)
653+
year_suffix = str(iso_year)[2:]
654+
label = f"{iso_week} ({year_suffix})"
655+
count = counts_by_week.get(key, 0)
656+
chart_data.append({"x": label, "y": count})
657+
current += timedelta(days=7) # hop by weeks
658+
659+
return chart_data
660+
finally:
661+
conn.close()
625662

626663

627664
@shared_task

0 commit comments

Comments
 (0)