@@ -7,16 +7,34 @@ import { IDatasetDescriptor, IDiscoverySource, IDiscoverySourceRow } from '../ty
77
88const log = getServiceLogger ( )
99
10+ const CATEGORY_SLUG = 'project-onboardings'
1011const GITHUB_GRAPHQL_URL = 'https://api.github.com/graphql'
12+ const GITHUB_NON_REPO_OWNERS = new Set ( [ 'user-attachments' , 'orgs' , 'apps' , 'marketplace' ] )
1113const OWNER = 'linuxfoundation'
1214const REPO = 'insights'
13- const CATEGORY_SLUG = 'project-onboardings'
1415
1516interface GraphQLResponse < T > {
1617 data ?: T
1718 errors ?: Array < { message : string } >
1819}
1920
21+ interface DiscussionNode {
22+ number : number
23+ body : string
24+ closed : boolean
25+ }
26+
27+ interface DiscussionsPage {
28+ pageInfo : { hasNextPage : boolean ; endCursor : string | null }
29+ nodes : DiscussionNode [ ]
30+ }
31+
32+ interface DiscussionsData {
33+ repository : {
34+ discussions : DiscussionsPage
35+ }
36+ }
37+
2038async function graphqlRequest < T > ( query : string , variables : Record < string , unknown > ) : Promise < T > {
2139 const raw = process . env . CROWD_GITHUB_PERSONAL_ACCESS_TOKENS
2240 if ( ! raw ) {
@@ -75,17 +93,16 @@ async function graphqlRequest<T>(query: string, variables: Record<string, unknow
7593 } )
7694}
7795
78- const GITHUB_NON_REPO_OWNERS = new Set ( [ 'user-attachments' , 'orgs' , 'apps' , 'marketplace' ] )
7996
8097// Extracts github.com/{owner}/{repo} URLs from markdown text, normalised to the repo root.
8198function extractRepoUrls ( text : string ) : string [ ] {
8299 const urls = new Set < string > ( )
83100 const regex = / h t t p s ? : \/ \/ g i t h u b \. c o m \/ ( [ a - z A - Z 0 - 9 _ . - ] + ) \/ ( [ a - z A - Z 0 - 9 _ . - ] + ) / gi
84101 let match : RegExpExecArray | null
85102 while ( ( match = regex . exec ( text ) ) !== null ) {
86- const owner = match [ 1 ]
87- const repo = match [ 2 ] . replace ( / \. g i t $ / , '' )
88- if ( owner && repo && ! GITHUB_NON_REPO_OWNERS . has ( owner . toLowerCase ( ) ) ) {
103+ const owner = match [ 1 ] . toLowerCase ( )
104+ const repo = match [ 2 ] . replace ( / \. g i t $ / , '' ) . replace ( / [ . , ; : ! ? ] + $ / , '' ) . toLowerCase ( )
105+ if ( owner && repo && ! GITHUB_NON_REPO_OWNERS . has ( owner ) ) {
89106 urls . add ( `https://github.com/${ owner } /${ repo } ` )
90107 }
91108 }
@@ -129,16 +146,6 @@ async function getDiscussionCategoryId(): Promise<string> {
129146 return category . id
130147}
131148
132- interface DiscussionNode {
133- number : number
134- body : string
135- closed : boolean
136- }
137-
138- interface DiscussionsPage {
139- pageInfo : { hasNextPage : boolean ; endCursor : string | null }
140- nodes : DiscussionNode [ ]
141- }
142149
143150async function fetchDiscussionsPage (
144151 categoryId : string ,
@@ -162,12 +169,6 @@ async function fetchDiscussionsPage(
162169 }
163170 `
164171
165- interface DiscussionsData {
166- repository : {
167- discussions : DiscussionsPage
168- }
169- }
170-
171172 const data = await graphqlRequest < DiscussionsData > ( query , { categoryId, cursor } )
172173 return data . repository . discussions
173174}
@@ -186,7 +187,6 @@ async function fetchAllDiscussionRepoUrls(): Promise<string[]> {
186187 const page = await fetchDiscussionsPage ( categoryId , cursor )
187188
188189 for ( const discussion of page . nodes ) {
189- if ( discussion . closed ) continue
190190 for ( const url of extractRepoUrls ( discussion . body ) ) {
191191 allUrls . add ( url )
192192 }
0 commit comments