Skip to content

Commit 3568355

Browse files
gaspergromepipav
authored andcommitted
feat(tinybird): oss index (#3065)
Signed-off-by: Gašper Grom <gasper.grom@gmail.com> Co-authored-by: anilb <epipav@gmail.com>
1 parent 772d0cb commit 3568355

5 files changed

Lines changed: 354 additions & 0 deletions

File tree

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
SCHEMA >
2+
`segmentId` String,
3+
`contributorCount` UInt64,
4+
`organizationCount` UInt64,
5+
`projectId` String,
6+
`collectionId` String,
7+
`categoryId` String,
8+
`categoryGroupId` String
9+
10+
ENGINE MergeTree
11+
ENGINE_SORTING_KEY segmentId, projectId, collectionId, categoryId, categoryGroupId
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
TOKEN "categories_oss_index_endpoint_read_8383" READ
2+
3+
NODE categories_oss_index_categoryGroups
4+
SQL >
5+
%
6+
SELECT categoryGroups.id,
7+
FROM categoryGroups
8+
{% if defined(categoryGroupSlug) %}
9+
WHERE
10+
categoryGroups.slug
11+
= {{ String(categoryGroupSlug, description="Category group slug", required=True) }}
12+
{% end %}
13+
14+
NODE categories_oss_index_agregates
15+
SQL >
16+
SELECT categoryId, coalesce(sum(contributorCount), 0) as "totalContributors"
17+
FROM segments_aggregates_with_ids_datasource
18+
WHERE categoryId != '' AND categoryGroupId IN (SELECT id from categories_oss_index_categoryGroups)
19+
GROUP BY categoryId
20+
ORDER BY totalContributors DESC
21+
22+
NODE categories_oss_index_projects_deduplicated
23+
SQL >
24+
SELECT insightsProjects.id, insightsProjects.name, insightsProjects.logoUrl
25+
FROM insightsProjects FINAL
26+
27+
NODE categories_oss_index_collections_deduplicated
28+
SQL >
29+
SELECT collections.id, collections.name FROM collections FINAL
30+
31+
NODE categories_oss_index_projects_ranked_by_category
32+
SQL >
33+
SELECT
34+
categoryId,
35+
projectId,
36+
sum(contributorCount) AS totalContributors,
37+
ROW_NUMBER() OVER (PARTITION BY categoryId ORDER BY sum(contributorCount) DESC) AS rn
38+
FROM segments_aggregates_with_ids_datasource
39+
WHERE categoryId != '' AND projectId != ''
40+
GROUP BY categoryId, projectId
41+
42+
NODE categories_oss_index_projects
43+
SQL >
44+
SELECT
45+
topProjects.categoryId,
46+
topProjects.projectId,
47+
topProjects.totalContributors,
48+
pd.name AS projectName,
49+
pd.logoUrl AS projectLogo
50+
FROM categories_oss_index_projects_ranked_by_category topProjects
51+
JOIN categories_oss_index_projects_deduplicated pd ON pd.id = topProjects.projectId
52+
WHERE topProjects.rn <= 5
53+
54+
NODE categories_oss_index_collections_ranked_by_category
55+
SQL >
56+
SELECT
57+
categoryId,
58+
collectionId,
59+
sum(contributorCount) AS totalContributors,
60+
ROW_NUMBER() OVER (PARTITION BY categoryId ORDER BY sum(contributorCount) DESC) AS rn
61+
FROM segments_aggregates_with_ids_datasource
62+
WHERE categoryId != '' AND collectionId != ''
63+
GROUP BY categoryId, collectionId
64+
65+
NODE categories_oss_index_collections
66+
SQL >
67+
SELECT
68+
topCollections.categoryId,
69+
topCollections.collectionId,
70+
topCollections.totalContributors,
71+
cd.name AS collectionName
72+
FROM categories_oss_index_collections_ranked_by_category topCollections
73+
JOIN categories_oss_index_collections_deduplicated cd ON cd.id = topCollections.collectionId
74+
WHERE topCollections.rn <= 5
75+
76+
NODE categories_oss_index_categories_deduplicated
77+
SQL >
78+
SELECT categories.id, categories.name, categories.slug
79+
FROM categories FINAL
80+
WHERE categories.categoryGroupId IN (SELECT id from categories_oss_index_categoryGroups)
81+
82+
NODE categories_oss_index_top_collections
83+
SQL >
84+
SELECT categoryId, groupArray((collectionId, totalContributors, collectionName)) AS topCollections
85+
FROM categories_oss_index_collections
86+
GROUP BY categoryId
87+
88+
NODE categories_oss_index_top_projects
89+
SQL >
90+
SELECT
91+
categoryId, groupArray((projectId, totalContributors, projectName, projectLogo)) AS topProjects
92+
FROM categories_oss_index_projects
93+
GROUP BY categoryId
94+
95+
NODE categories_oss_index_results
96+
SQL >
97+
SELECT
98+
a.categoryId as "id",
99+
cgd.name as "name",
100+
cgd.slug as "slug",
101+
a.totalContributors,
102+
c.topCollections,
103+
p.topProjects
104+
FROM categories_oss_index_agregates a
105+
LEFT JOIN categories_oss_index_top_collections c USING (categoryId)
106+
LEFT JOIN categories_oss_index_top_projects p USING (categoryId)
107+
LEFT JOIN
108+
categories_oss_index_categories_deduplicated
109+
cgd ON categories_oss_index_categories_deduplicated.id = a.categoryId
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
TOKEN "category_groups_oss_index_endpoint_read_7394" READ
2+
3+
NODE category_groups_oss_index_agregates
4+
SQL >
5+
SELECT categoryGroupId, coalesce(sum(contributorCount), 0) as "totalContributors"
6+
FROM segments_aggregates_with_ids_datasource
7+
WHERE categoryGroupId != ''
8+
GROUP BY categoryGroupId
9+
ORDER BY totalContributors DESC
10+
11+
NODE category_groups_oss_index_projects_deduplicated
12+
SQL >
13+
SELECT insightsProjects.id, insightsProjects.name, insightsProjects.logoUrl
14+
FROM insightsProjects FINAL
15+
16+
NODE category_groups_oss_index_collections_deduplicated
17+
SQL >
18+
SELECT collections.id, collections.name FROM collections FINAL
19+
20+
NODE category_groups_oss_index_projects_ranked_by_category_group
21+
SQL >
22+
SELECT
23+
categoryGroupId,
24+
projectId,
25+
sum(contributorCount) AS totalContributors,
26+
ROW_NUMBER() OVER (PARTITION BY categoryGroupId ORDER BY sum(contributorCount) DESC) AS rn
27+
FROM segments_aggregates_with_ids_datasource
28+
WHERE categoryGroupId != '' AND projectId != ''
29+
GROUP BY categoryGroupId, projectId
30+
31+
NODE category_groups_oss_index_projects
32+
SQL >
33+
SELECT
34+
topProjects.categoryGroupId,
35+
topProjects.projectId,
36+
topProjects.totalContributors,
37+
pd.name AS projectName,
38+
pd.logoUrl AS projectLogo
39+
FROM category_groups_oss_index_projects_ranked_by_category_group topProjects
40+
JOIN category_groups_oss_index_projects_deduplicated pd ON pd.id = topProjects.projectId
41+
WHERE topProjects.rn <= 5
42+
43+
NODE category_groups_oss_index_collections_ranked_by_category_group
44+
SQL >
45+
SELECT
46+
categoryGroupId,
47+
collectionId,
48+
sum(contributorCount) AS totalContributors,
49+
ROW_NUMBER() OVER (PARTITION BY categoryGroupId ORDER BY sum(contributorCount) DESC) AS rn
50+
FROM segments_aggregates_with_ids_datasource
51+
WHERE categoryGroupId != '' AND collectionId != ''
52+
GROUP BY categoryGroupId, collectionId
53+
54+
NODE category_groups_oss_index_collections
55+
SQL >
56+
SELECT
57+
topCollections.categoryGroupId,
58+
topCollections.collectionId,
59+
topCollections.totalContributors,
60+
cd.name AS collectionName
61+
FROM category_groups_oss_index_collections_ranked_by_category_group topCollections
62+
JOIN category_groups_oss_index_collections_deduplicated cd ON cd.id = topCollections.collectionId
63+
WHERE topCollections.rn <= 5
64+
65+
NODE category_groups_oss_index_category_groups_deduplicated
66+
SQL >
67+
%
68+
SELECT categoryGroups.id, categoryGroups.name, categoryGroups.type, categoryGroups.slug
69+
FROM categoryGroups FINAL
70+
{% if defined(type) %}
71+
WHERE
72+
categoryGroups.type
73+
= {{ String(type, description="Filter category group type", required=False) }}
74+
{% end %}
75+
76+
NODE category_groups_oss_index_top_collections
77+
SQL >
78+
SELECT
79+
categoryGroupId, groupArray((collectionId, totalContributors, collectionName)) AS topCollections
80+
FROM category_groups_oss_index_collections
81+
GROUP BY categoryGroupId
82+
83+
NODE category_groups_oss_index_top_projects
84+
SQL >
85+
SELECT
86+
categoryGroupId,
87+
groupArray((projectId, totalContributors, projectName, projectLogo)) AS topProjects
88+
FROM category_groups_oss_index_projects
89+
GROUP BY categoryGroupId
90+
91+
NODE category_groups_oss_index_results
92+
SQL >
93+
SELECT
94+
a.categoryGroupId as "id",
95+
cgd.name as "name",
96+
cgd.type as "type",
97+
cgd.slug as "slug",
98+
a.totalContributors,
99+
c.topCollections,
100+
p.topProjects
101+
FROM category_groups_oss_index_agregates a
102+
LEFT JOIN category_groups_oss_index_top_collections c USING (categoryGroupId)
103+
LEFT JOIN category_groups_oss_index_top_projects p USING (categoryGroupId)
104+
JOIN
105+
category_groups_oss_index_category_groups_deduplicated
106+
cgd ON category_groups_oss_index_category_groups_deduplicated.id = a.categoryGroupId
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
TOKEN "collections_oss_index_endpoint_read_9807" READ
2+
3+
NODE collections_oss_index_category_ids
4+
SQL >
5+
%
6+
SELECT categories.id
7+
FROM categories
8+
{% if defined(categorySlug) %}
9+
WHERE categories.slug = {{ String(categorySlug, description="Category slug", required=True) }}
10+
{% end %}
11+
12+
NODE collections_oss_index_agregates
13+
SQL >
14+
SELECT collectionId, coalesce(sum(contributorCount), 0) as "totalContributors"
15+
FROM segments_aggregates_with_ids_datasource
16+
WHERE collectionId != ''
17+
GROUP BY collectionId
18+
ORDER BY totalContributors DESC
19+
20+
NODE collections_oss_index_projects_deduplicated
21+
SQL >
22+
SELECT insightsProjects.id, insightsProjects.name, insightsProjects.logoUrl
23+
FROM insightsProjects FINAL
24+
25+
NODE collections_oss_index_projects_ranked_by_collection
26+
SQL >
27+
SELECT
28+
collectionId,
29+
projectId,
30+
sum(contributorCount) AS totalContributors,
31+
ROW_NUMBER() OVER (PARTITION BY collectionId ORDER BY sum(contributorCount) DESC) AS rn
32+
FROM segments_aggregates_with_ids_datasource
33+
WHERE collectionId != '' AND projectId != ''
34+
GROUP BY collectionId, projectId
35+
36+
NODE categories_oss_index_projects
37+
SQL >
38+
SELECT
39+
topProjects.collectionId,
40+
topProjects.projectId,
41+
topProjects.totalContributors,
42+
pd.name AS projectName,
43+
pd.logoUrl AS projectLogo
44+
FROM collections_oss_index_projects_ranked_by_collection topProjects
45+
JOIN collections_oss_index_projects_deduplicated pd ON pd.id = topProjects.projectId
46+
WHERE topProjects.rn <= 5
47+
48+
NODE collections_oss_index_collections_deduplicated
49+
SQL >
50+
SELECT collections.id, collections.slug, collections.name
51+
FROM collections FINAL
52+
WHERE collections.categoryId IN (SELECT id from collections_oss_index_category_ids)
53+
54+
NODE collections_oss_index_top_projects
55+
SQL >
56+
SELECT
57+
collectionId,
58+
groupArray((projectId, totalContributors, projectName, projectLogo)) AS topProjects
59+
FROM categories_oss_index_projects
60+
GROUP BY collectionId
61+
62+
NODE collections_oss_index_results
63+
SQL >
64+
SELECT
65+
a.collectionId as "id", cd.name as "name", cd.slug as "slug", a.totalContributors, p.topProjects
66+
FROM collections_oss_index_agregates a
67+
LEFT JOIN collections_oss_index_top_projects p USING (collectionId)
68+
JOIN collections_oss_index_collections_deduplicated cd ON cd.id = a.collectionId
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
NODE segments_aggregates_with_ids_projects
2+
SQL >
3+
SELECT insightsProjects.id, insightsProjects.segmentId
4+
FROM insightsProjects FINAL
5+
WHERE insightsProjects.segmentId != ''
6+
7+
NODE segments_aggregates_with_ids_collections_projects
8+
SQL >
9+
SELECT collectionsInsightsProjects.collectionId, collectionsInsightsProjects.insightsProjectId
10+
FROM collectionsInsightsProjects FINAL
11+
12+
NODE segments_aggregates_with_ids_collections
13+
SQL >
14+
SELECT collections.id, collections.categoryId, FROM collections FINAL
15+
16+
NODE segments_aggregates_with_ids_categories
17+
SQL >
18+
SELECT categories.id, categories.categoryGroupId, FROM categories FINAL
19+
20+
NODE segments_aggregates_with_ids_aggregates
21+
SQL >
22+
SELECT
23+
segmentId,
24+
countDistinctMerge(contributorCount) AS contributorCount,
25+
countDistinctMerge(organizationCount) AS organizationCount
26+
FROM segmentsAggregatedMV
27+
GROUP BY segmentId
28+
29+
NODE segments_aggregates_with_ids_results
30+
SQL >
31+
SELECT
32+
segments_aggregates_with_ids_aggregates.segmentId as "segmentId",
33+
segments_aggregates_with_ids_aggregates.contributorCount,
34+
segments_aggregates_with_ids_aggregates.organizationCount,
35+
segments_aggregates_with_ids_projects.id AS "projectId",
36+
segments_aggregates_with_ids_collections.id as "collectionId",
37+
segments_aggregates_with_ids_categories.id as "categoryId",
38+
segments_aggregates_with_ids_categories.categoryGroupId AS "categoryGroupId"
39+
FROM segments_aggregates_with_ids_projects
40+
JOIN
41+
segments_aggregates_with_ids_collections_projects
42+
ON segments_aggregates_with_ids_projects.id
43+
= segments_aggregates_with_ids_collections_projects.insightsProjectId
44+
JOIN
45+
segments_aggregates_with_ids_aggregates
46+
ON segments_aggregates_with_ids_aggregates.segmentId
47+
= segments_aggregates_with_ids_projects.segmentId
48+
LEFT JOIN
49+
segments_aggregates_with_ids_collections
50+
ON segments_aggregates_with_ids_collections.id
51+
= segments_aggregates_with_ids_collections_projects.collectionId
52+
LEFT JOIN
53+
segments_aggregates_with_ids_categories
54+
ON segments_aggregates_with_ids_categories.id
55+
= segments_aggregates_with_ids_collections.categoryId
56+
57+
TYPE COPY
58+
TARGET_DATASOURCE segments_aggregates_with_ids_datasource
59+
COPY_MODE replace
60+
COPY_SCHEDULE 0 * * * *

0 commit comments

Comments
 (0)