Skip to content

Commit c104c36

Browse files
authored
feat: add leaderboards for stars, forks and package downloads (IN-894) (#3743)
Signed-off-by: Gašper Grom <gasper.grom@gmail.com>
1 parent 97f09cb commit c104c36

4 files changed

Lines changed: 184 additions & 1 deletion

File tree

services/libs/tinybird/pipes/leaderboards_copy.pipe

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
DESCRIPTION >
22
Aggregates all individual leaderboard pipes into a single unified datasource for querying.
3-
Consolidates 10 different leaderboard types with their respective rankings and values,
3+
Consolidates 13 different leaderboard types with their respective rankings and values,
44
and copies the results to a datasource on a daily schedule at 1 AM.
55

66
NODE leaderboards_copy_union
@@ -17,6 +17,15 @@ SQL >
1717
SELECT *, 'commit-activity' as leaderboardType
1818
FROM leaderboards_commits
1919
UNION ALL
20+
SELECT *, 'stars' as leaderboardType
21+
FROM leaderboards_stars
22+
UNION ALL
23+
SELECT *, 'forks' as leaderboardType
24+
FROM leaderboards_forks
25+
UNION ALL
26+
SELECT *, 'package-downloads' as leaderboardType
27+
FROM leaderboards_package_downloads
28+
UNION ALL
2029
SELECT *, 0.0 as previousPeriodValue, 'focused-teams' as leaderboardType
2130
FROM leaderboards_avg_commits_per_author
2231
UNION ALL
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
DESCRIPTION >
2+
Leaderboard ranking projects by cumulative forks. Compares the current period (last 12 months)
3+
with the previous period (12-24 months ago). Higher fork counts rank higher, showing the most
4+
forked projects.
5+
6+
NODE leaderboards_forks_projects
7+
DESCRIPTION >
8+
Retrieves all projects from the populated datasource
9+
10+
SQL >
11+
SELECT id, name, slug, segmentId, logoUrl, collectionsSlugs, isLF
12+
FROM insights_projects_populated_ds
13+
GROUP BY id, name, slug, segmentId, logoUrl, collectionsSlugs, isLF
14+
15+
NODE leaderboards_forks_metrics
16+
DESCRIPTION >
17+
Retrieves fork counts for last 365 days and previous 365 days from project insights
18+
19+
SQL >
20+
SELECT id, forksLast365Days as currentForks, forksPrevious365Days as previousForks
21+
FROM project_insights_copy_ds
22+
WHERE forksLast365Days > 0
23+
24+
NODE leaderboards_copy_github_forks
25+
DESCRIPTION >
26+
Joins project metadata with fork counts and ranks by most forked projects
27+
28+
SQL >
29+
SELECT
30+
row_number() OVER (ORDER BY coalesce(m.currentForks, 0) DESC) as rank,
31+
p.id as id,
32+
p.segmentId as segmentId,
33+
p.name as name,
34+
p.slug as slug,
35+
p.logoUrl as logoUrl,
36+
p.collectionsSlugs as collectionsSlugs,
37+
p.isLF as isLF,
38+
cast(coalesce(m.currentForks, 0) as Float64) as value,
39+
cast(coalesce(m.previousForks, 0) as Float64) as previousPeriodValue
40+
FROM leaderboards_forks_projects p
41+
INNER JOIN leaderboards_forks_metrics m ON p.id = m.id
42+
WHERE m.currentForks > 0
43+
ORDER BY value DESC
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
DESCRIPTION >
2+
Leaderboard ranking projects by package download counts in the last 30 days.
3+
Compares the current period (last 30 days) with the previous period (30-60 days ago).
4+
Higher download counts rank higher, showing the most downloaded packages.
5+
6+
NODE leaderboards_package_downloads_projects
7+
DESCRIPTION >
8+
Retrieves all projects from the populated datasource
9+
10+
SQL >
11+
SELECT id, name, slug, segmentId, logoUrl, collectionsSlugs, isLF
12+
FROM insights_projects_populated_ds
13+
GROUP BY id, name, slug, segmentId, logoUrl, collectionsSlugs, isLF
14+
15+
NODE leaderboards_package_downloads_current_period
16+
DESCRIPTION >
17+
Calculates package downloads for the last 30 days using cumulative data with 30-day buffer windows to handle missing dates
18+
19+
SQL >
20+
WITH
21+
recent_downloads AS (
22+
SELECT insightsProjectId, argMax(downloadsCount, date) as recentDownloads
23+
FROM packageDownloads
24+
WHERE date >= now() - INTERVAL 60 DAY AND date <= now() AND insightsProjectId != ''
25+
GROUP BY insightsProjectId
26+
),
27+
baseline_downloads AS (
28+
SELECT insightsProjectId, argMax(downloadsCount, date) as baselineDownloads
29+
FROM packageDownloads
30+
WHERE
31+
date >= now() - INTERVAL 60 DAY
32+
AND date < now() - INTERVAL 30 DAY
33+
AND insightsProjectId != ''
34+
GROUP BY insightsProjectId
35+
)
36+
SELECT r.insightsProjectId, r.recentDownloads - coalesce(b.baselineDownloads, 0) as downloads
37+
FROM recent_downloads r
38+
LEFT JOIN baseline_downloads b ON r.insightsProjectId = b.insightsProjectId
39+
40+
NODE leaderboards_package_downloads_previous_period
41+
DESCRIPTION >
42+
Calculates package downloads for the previous 30 days (30-60 days ago) using cumulative data with 30-day buffer windows to handle missing dates
43+
44+
SQL >
45+
WITH
46+
baseline_downloads AS (
47+
SELECT insightsProjectId, argMax(downloadsCount, date) as baselineDownloads
48+
FROM packageDownloads
49+
WHERE
50+
date >= now() - INTERVAL 90 DAY
51+
AND date < now() - INTERVAL 30 DAY
52+
AND insightsProjectId != ''
53+
GROUP BY insightsProjectId
54+
),
55+
older_downloads AS (
56+
SELECT insightsProjectId, argMax(downloadsCount, date) as olderDownloads
57+
FROM packageDownloads
58+
WHERE
59+
date >= now() - INTERVAL 90 DAY
60+
AND date < now() - INTERVAL 60 DAY
61+
AND insightsProjectId != ''
62+
GROUP BY insightsProjectId
63+
)
64+
SELECT b.insightsProjectId, b.baselineDownloads - coalesce(o.olderDownloads, 0) as downloads
65+
FROM baseline_downloads b
66+
LEFT JOIN older_downloads o ON b.insightsProjectId = o.insightsProjectId
67+
68+
NODE leaderboards_copy_package_downloads
69+
DESCRIPTION >
70+
Joins project metadata with download counts and ranks by most downloaded packages
71+
72+
SQL >
73+
SELECT
74+
row_number() OVER (ORDER BY coalesce(c.downloads, 0) DESC) as rank,
75+
p.id as id,
76+
p.segmentId as segmentId,
77+
p.name as name,
78+
p.slug as slug,
79+
p.logoUrl as logoUrl,
80+
p.collectionsSlugs as collectionsSlugs,
81+
p.isLF as isLF,
82+
cast(coalesce(c.downloads, 0) as Float64) as value,
83+
cast(coalesce(pp.downloads, 0) as Float64) as previousPeriodValue
84+
FROM leaderboards_package_downloads_projects p
85+
INNER JOIN leaderboards_package_downloads_current_period c ON p.id = c.insightsProjectId
86+
LEFT JOIN leaderboards_package_downloads_previous_period pp ON p.id = pp.insightsProjectId
87+
WHERE c.downloads > 0
88+
ORDER BY value DESC
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
DESCRIPTION >
2+
Leaderboard ranking projects by cumulative stars. Compares the current period (last 12 months)
3+
with the previous period (12-24 months ago). Higher star counts rank higher, showing the most
4+
popular projects.
5+
6+
NODE leaderboards_stars_projects
7+
DESCRIPTION >
8+
Retrieves all projects from the populated datasource
9+
10+
SQL >
11+
SELECT id, name, slug, segmentId, logoUrl, collectionsSlugs, isLF
12+
FROM insights_projects_populated_ds
13+
GROUP BY id, name, slug, segmentId, logoUrl, collectionsSlugs, isLF
14+
15+
NODE leaderboards_stars_metrics
16+
DESCRIPTION >
17+
Retrieves star counts for last 365 days and previous 365 days from project insights
18+
19+
SQL >
20+
SELECT id, starsLast365Days as currentStars, starsPrevious365Days as previousStars
21+
FROM project_insights_copy_ds
22+
WHERE starsLast365Days > 0
23+
24+
NODE leaderboards_copy_github_stars
25+
DESCRIPTION >
26+
Joins project metadata with star counts and ranks by most starred projects
27+
28+
SQL >
29+
SELECT
30+
row_number() OVER (ORDER BY coalesce(m.currentStars, 0) DESC) as rank,
31+
p.id as id,
32+
p.segmentId as segmentId,
33+
p.name as name,
34+
p.slug as slug,
35+
p.logoUrl as logoUrl,
36+
p.collectionsSlugs as collectionsSlugs,
37+
p.isLF as isLF,
38+
cast(coalesce(m.currentStars, 0) as Float64) as value,
39+
cast(coalesce(m.previousStars, 0) as Float64) as previousPeriodValue
40+
FROM leaderboards_stars_projects p
41+
INNER JOIN leaderboards_stars_metrics m ON p.id = m.id
42+
WHERE m.currentStars > 0
43+
ORDER BY value DESC

0 commit comments

Comments
 (0)