Skip to content

Commit ca7032a

Browse files
authored
feat: agentic ai momentum tb endpoints (#4051)
Signed-off-by: anilb <epipav@gmail.com>
1 parent eabfd30 commit ca7032a

11 files changed

Lines changed: 654 additions & 1 deletion

backend/src/database/migrations/U1776939912__collectionRepositoriesReplicaIdentityUpdates.sql

Whitespace-only changes.
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
ALTER TABLE "collectionsRepositories" REPLICA IDENTITY FULL;
2+
GRANT SELECT ON "collectionsRepositories" TO sequin;
3+
4+
DO $$
5+
BEGIN
6+
IF NOT EXISTS (
7+
SELECT 1 FROM pg_publication_tables
8+
WHERE pubname = 'sequin_pub' AND tablename = 'collectionsRepositories'
9+
) THEN
10+
ALTER PUBLICATION sequin_pub ADD TABLE "collectionsRepositories";
11+
END IF;
12+
END $$;
13+
14+
CREATE INDEX IF NOT EXISTS "ix_collectionsRepositories_updatedAt_id" ON "collectionsRepositories" ("updatedAt", id);
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
SCHEMA >
2+
`collectionId` String,
3+
`totalCount` UInt64,
4+
`totalSoftwareValue` UInt64,
5+
`totalContributorCount` UInt64,
6+
`totalContributorCount30d` UInt64,
7+
`commitsCount30d` UInt64,
8+
`mostActiveProjects` String,
9+
`medianIssueCloseTimeSeconds` Float64,
10+
`medianIssueCloseTimeSeconds30d` Float64,
11+
`medianPrResolutionTimeSeconds` Float64,
12+
`medianPrResolutionTimeSeconds30d` Float64,
13+
`projectsWithGithubPrActivity` UInt64,
14+
`projectsWithGithubIssueActivity` UInt64
15+
16+
ENGINE MergeTree
17+
ENGINE_SORTING_KEY collectionId
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
SCHEMA >
2+
`projectId` String,
3+
`name` String,
4+
`slug` String,
5+
`githubRepoLink` String,
6+
`stars` UInt64,
7+
`stars30d` UInt64,
8+
`forks` UInt64,
9+
`forks30d` UInt64,
10+
`downloads` Nullable(UInt64),
11+
`downloads30d` Nullable(Int64),
12+
`dockerPulls` Nullable(UInt64),
13+
`dockerPulls30d` Nullable(Int64),
14+
`dependentRepos` Nullable(UInt64),
15+
`dependentPackages` Nullable(UInt64),
16+
`commits` UInt64,
17+
`commits30d` UInt64,
18+
`contributors` UInt64,
19+
`contributors30d` UInt64,
20+
`newContributors30d` UInt64,
21+
`mergeRate` Nullable(Float64),
22+
`mergeRate30d` Nullable(Float64),
23+
`prResolveTimeSeconds` Nullable(Float64),
24+
`prResolveTimeSeconds30d` Nullable(Float64),
25+
`issueCloseTimeSeconds` Nullable(Float64),
26+
`issueCloseTimeSeconds30d` Nullable(Float64),
27+
`issueResponseTimeSeconds` Nullable(Float64),
28+
`issueResponseTimeSeconds30d` Nullable(Float64),
29+
`noResponseIssues` Nullable(UInt64),
30+
`noResponseIssues30d` Nullable(UInt64),
31+
`vulnerabilities` Nullable(UInt64),
32+
`vulnerabilities30d` Nullable(UInt64),
33+
`cocomoValue` UInt64
34+
35+
ENGINE MergeTree
36+
ENGINE_SORTING_KEY projectId
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
DESCRIPTION >
2+
- `collectionsRepositories` contains junction table linking collections to repositories.
3+
- `id` is the primary key identifier for the collection-repository relationship record.
4+
- `collectionId` links to the collection this relationship belongs to.
5+
- `repoId` links to the repository in this relationship.
6+
- `createdAt` and `updatedAt` are standard timestamp fields for record lifecycle tracking.
7+
- `deletedAt` is used for soft delete functionality - null means active record, timestamp means logically deleted.
8+
9+
SCHEMA >
10+
`id` String `json:$.record.id`,
11+
`collectionId` String `json:$.record.collectionId`,
12+
`repoId` String `json:$.record.repoId`,
13+
`createdAt` DateTime64(3) `json:$.record.createdAt`,
14+
`updatedAt` DateTime64(3) `json:$.record.updatedAt`,
15+
`deletedAt` Nullable(DateTime64(3)) `json:$.record.deletedAt`
16+
17+
ENGINE ReplacingMergeTree
18+
ENGINE_PARTITION_KEY toYear(createdAt)
19+
ENGINE_SORTING_KEY repoId, collectionId
20+
ENGINE_VER updatedAt
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
NODE at_a_glance_metrics
2+
SQL >
3+
SELECT
4+
totalCount as total_count,
5+
totalSoftwareValue as total_software_value,
6+
totalContributorCount as total_contributor_count,
7+
totalContributorCount30d as total_contributor_count_30d,
8+
commitsCount30d as commits_count_30d,
9+
mostActiveProjects as most_active_projects,
10+
medianIssueCloseTimeSeconds as median_issue_close_time_seconds,
11+
medianIssueCloseTimeSeconds30d as median_issue_close_time_seconds_30d,
12+
medianPrResolutionTimeSeconds as median_pr_resolution_time_seconds,
13+
medianPrResolutionTimeSeconds30d as median_pr_resolution_time_seconds_30d,
14+
projectsWithGithubPrActivity as projects_with_github_pr_activity,
15+
projectsWithGithubIssueActivity as projects_with_github_issue_activity
16+
FROM agentic_ai_momentum_glance_ds
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
DESCRIPTION >
2+
Pre-computes at-a-glance metrics for the Agentic AI collection.
3+
Filters by specific repositories in the collection via collectionsRepositories.
4+
Runs daily and stores the result in agentic_ai_momentum_glance_ds.
5+
6+
NODE ai_repos
7+
SQL >
8+
SELECT r.url, r.segmentId, r.insightsProjectId
9+
FROM collectionsRepositories cr FINAL
10+
JOIN repositories r FINAL ON cr.repoId = r.id
11+
WHERE
12+
cr.collectionId = '3cf46a5d-abd0-440e-95f9-4ae9458903a8'
13+
AND isNull (cr.deletedAt)
14+
AND isNull (r.deletedAt)
15+
AND r.enabled = true
16+
17+
NODE ai_projects
18+
SQL >
19+
SELECT id, name, slug, softwareValue
20+
FROM insights_projects_populated_ds
21+
WHERE id IN (SELECT DISTINCT insightsProjectId FROM ai_repos)
22+
23+
NODE total_contributor_count
24+
SQL >
25+
SELECT countDistinct(memberId) AS totalContributorCount
26+
FROM activityRelations_deduplicated_cleaned_bucket_union
27+
WHERE
28+
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
29+
AND channel IN (SELECT url FROM ai_repos)
30+
AND (type, platform) IN (
31+
SELECT activityType, platform
32+
FROM activityTypes
33+
WHERE isCodeContribution = 1 OR isCollaboration = 1
34+
)
35+
36+
NODE total_contributor_count_30d
37+
SQL >
38+
SELECT countDistinct(memberId) AS totalContributorCount30d
39+
FROM activityRelations_deduplicated_cleaned_bucket_union
40+
WHERE
41+
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
42+
AND channel IN (SELECT url FROM ai_repos)
43+
AND timestamp >= now() - INTERVAL 30 DAY
44+
AND (type, platform) IN (
45+
SELECT activityType, platform
46+
FROM activityTypes
47+
WHERE isCodeContribution = 1 OR isCollaboration = 1
48+
)
49+
50+
NODE commits_count_30d
51+
SQL >
52+
SELECT count() AS commitsCount30d
53+
FROM activityRelations_deduplicated_cleaned_bucket_union
54+
WHERE
55+
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
56+
AND channel IN (SELECT url FROM ai_repos)
57+
AND type IN ('authored-commit', 'committed-commit')
58+
AND timestamp >= now() - INTERVAL 30 DAY
59+
60+
NODE new_contributors_per_project
61+
SQL >
62+
SELECT segmentId, countIf(first_activity >= now() - INTERVAL 30 DAY) AS newContributors
63+
FROM
64+
(
65+
SELECT segmentId, memberId, min(timestamp) AS first_activity
66+
FROM activityRelations_deduplicated_cleaned_bucket_union
67+
WHERE
68+
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
69+
AND channel IN (SELECT url FROM ai_repos)
70+
AND (type, platform) IN (
71+
SELECT activityType, platform
72+
FROM activityTypes
73+
WHERE isCodeContribution = 1 OR isCollaboration = 1
74+
)
75+
GROUP BY segmentId, memberId
76+
)
77+
GROUP BY segmentId
78+
79+
NODE most_active_projects
80+
SQL >
81+
SELECT
82+
coalesce(
83+
arrayStringConcat(
84+
arrayMap(
85+
x
86+
-> toJSONString(map('name', x .1, 'slug', x .2, 'newContributors', toString(x .3))),
87+
arraySlice(
88+
arrayReverseSort(x -> x .3, groupArray((p.name, p.slug, ncp.newContributors))),
89+
1,
90+
3
91+
)
92+
),
93+
','
94+
),
95+
''
96+
) AS mostActiveProjects
97+
FROM new_contributors_per_project ncp
98+
LEFT JOIN
99+
(
100+
SELECT segmentId, any (insightsProjectId) AS insightsProjectId
101+
FROM ai_repos
102+
GROUP BY segmentId
103+
) ar
104+
ON ar.segmentId = ncp.segmentId
105+
LEFT JOIN ai_projects p ON p.id = ar.insightsProjectId
106+
107+
NODE median_issue_close_time
108+
SQL >
109+
SELECT coalesce(median(closedInSeconds), 0) AS medianIssueCloseTimeSeconds
110+
FROM issues_analyzed
111+
WHERE
112+
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
113+
AND channel IN (SELECT url FROM ai_repos)
114+
AND isNotNull(closedAt)
115+
AND closedInSeconds > 0
116+
117+
NODE median_issue_close_time_30d
118+
SQL >
119+
SELECT coalesce(median(closedInSeconds), 0) AS medianIssueCloseTimeSeconds30d
120+
FROM issues_analyzed
121+
WHERE
122+
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
123+
AND channel IN (SELECT url FROM ai_repos)
124+
AND isNotNull(closedAt)
125+
AND closedInSeconds > 0
126+
AND closedAt >= now() - INTERVAL 30 DAY
127+
128+
NODE median_pr_resolution_time
129+
SQL >
130+
SELECT coalesce(median(resolvedInSeconds), 0) AS medianPrResolutionTimeSeconds
131+
FROM pull_requests_analyzed
132+
WHERE
133+
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
134+
AND channel IN (SELECT url FROM ai_repos)
135+
AND isNotNull(resolvedAt)
136+
AND resolvedInSeconds > 0
137+
138+
NODE median_pr_resolution_time_30d
139+
SQL >
140+
SELECT coalesce(median(resolvedInSeconds), 0) AS medianPrResolutionTimeSeconds30d
141+
FROM pull_requests_analyzed
142+
WHERE
143+
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
144+
AND channel IN (SELECT url FROM ai_repos)
145+
AND isNotNull(resolvedAt)
146+
AND resolvedInSeconds > 0
147+
AND resolvedAt >= now() - INTERVAL 30 DAY
148+
149+
NODE projects_with_github_pr
150+
SQL >
151+
SELECT countDistinct(segmentId) AS projectsWithGithubPrActivity
152+
FROM pull_requests_analyzed
153+
WHERE
154+
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
155+
AND channel IN (SELECT url FROM ai_repos)
156+
AND platform = 'github'
157+
158+
NODE projects_with_github_issue
159+
SQL >
160+
SELECT countDistinct(segmentId) AS projectsWithGithubIssueActivity
161+
FROM issues_analyzed
162+
WHERE
163+
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
164+
AND channel IN (SELECT url FROM ai_repos)
165+
AND platform = 'github'
166+
167+
NODE at_a_glance_metrics
168+
SQL >
169+
SELECT
170+
'3cf46a5d-abd0-440e-95f9-4ae9458903a8' AS collectionId,
171+
count(*) AS totalCount,
172+
sum(softwareValue) AS totalSoftwareValue,
173+
any (total_contributor_count.totalContributorCount) AS totalContributorCount,
174+
any (total_contributor_count_30d.totalContributorCount30d) AS totalContributorCount30d,
175+
any (commits_count_30d.commitsCount30d) AS commitsCount30d,
176+
concat('[', any (most_active_projects.mostActiveProjects), ']') AS mostActiveProjects,
177+
any (median_issue_close_time.medianIssueCloseTimeSeconds) AS medianIssueCloseTimeSeconds,
178+
any (
179+
median_issue_close_time_30d.medianIssueCloseTimeSeconds30d
180+
) AS medianIssueCloseTimeSeconds30d,
181+
any (median_pr_resolution_time.medianPrResolutionTimeSeconds) AS medianPrResolutionTimeSeconds,
182+
any (
183+
median_pr_resolution_time_30d.medianPrResolutionTimeSeconds30d
184+
) AS medianPrResolutionTimeSeconds30d,
185+
any (projects_with_github_pr.projectsWithGithubPrActivity) AS projectsWithGithubPrActivity,
186+
any (
187+
projects_with_github_issue.projectsWithGithubIssueActivity
188+
) AS projectsWithGithubIssueActivity
189+
FROM ai_projects
190+
CROSS JOIN total_contributor_count
191+
CROSS JOIN total_contributor_count_30d
192+
CROSS JOIN commits_count_30d
193+
CROSS JOIN most_active_projects
194+
CROSS JOIN median_issue_close_time
195+
CROSS JOIN median_issue_close_time_30d
196+
CROSS JOIN median_pr_resolution_time
197+
CROSS JOIN median_pr_resolution_time_30d
198+
CROSS JOIN projects_with_github_pr
199+
CROSS JOIN projects_with_github_issue
200+
201+
TYPE COPY
202+
TARGET_DATASOURCE agentic_ai_momentum_glance_ds
203+
COPY_MODE replace
204+
COPY_SCHEDULE 0 2 * * *
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
NODE ai_projects_list
2+
SQL >
3+
SELECT
4+
projectId as project_id,
5+
name,
6+
slug,
7+
githubRepoLink as github_repo_link,
8+
stars,
9+
stars30d as stars_30d,
10+
forks,
11+
forks30d as forks_30d,
12+
downloads,
13+
downloads30d as downloads_30d,
14+
dockerPulls as docker_pulls,
15+
dockerPulls30d as docker_pulls_30d,
16+
dependentRepos as dependent_repos,
17+
dependentPackages as dependent_packages,
18+
commits,
19+
commits30d as commits_30d,
20+
contributors,
21+
contributors30d as contributors_30d,
22+
newContributors30d as new_contributors_30d,
23+
mergeRate as merge_rate,
24+
mergeRate30d as merge_rate_30d,
25+
prResolveTimeSeconds as pr_resolve_time_seconds,
26+
prResolveTimeSeconds30d as pr_resolve_time_seconds_30d,
27+
issueCloseTimeSeconds as issue_close_time_seconds,
28+
issueCloseTimeSeconds30d as issue_close_time_seconds_30d,
29+
issueResponseTimeSeconds as issue_response_time_seconds,
30+
issueResponseTimeSeconds30d as issue_response_time_seconds_30d,
31+
noResponseIssues as no_response_issues,
32+
noResponseIssues30d as no_response_issues_30d,
33+
vulnerabilities,
34+
vulnerabilities30d as vulnerabilities_30d,
35+
cocomoValue as cocomo_value
36+
FROM agentic_ai_projects_list_ds

0 commit comments

Comments
 (0)