Skip to content

Commit 13b4a80

Browse files
committed
add github source
1 parent cf4eede commit 13b4a80

File tree

4 files changed

+501
-0
lines changed

4 files changed

+501
-0
lines changed

src/components/App/App.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { useMemo } from 'react'
22
import { Config, ConfigProvider } from '../../hooks/useConfig.js'
3+
import { getGitHubSource } from '../../lib/sources/gitHubSource.js'
34
import { getHttpSource } from '../../lib/sources/httpSource.js'
45
import { getHuggingFaceSource } from '../../lib/sources/huggingFaceSource.js'
56
import { getHyperparamSource } from '../../lib/sources/hyperparamSource.js'
@@ -12,6 +13,7 @@ export default function App() {
1213
const col = search.get('col') === null ? undefined : Number(search.get('col'))
1314

1415
const source = getHuggingFaceSource(sourceId) ??
16+
getGitHubSource(sourceId) ??
1517
getHttpSource(sourceId) ??
1618
getHyperparamSource(sourceId, { endpoint: location.origin })
1719

src/lib/sources/gitHubSource.ts

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
import type { DirSource, FileMetadata, FileSource, SourcePart } from './types.js'
2+
import { getFileName } from './utils.js'
3+
4+
interface BaseUrl {
5+
source: string
6+
origin: string
7+
repo: string
8+
branch: string
9+
path: string
10+
}
11+
12+
interface DirectoryUrl extends BaseUrl {
13+
kind: 'directory'
14+
action: 'tree'
15+
}
16+
17+
interface FileUrl extends BaseUrl {
18+
kind: 'file'
19+
action?: 'blob' | 'raw/refs/heads'
20+
resolveUrl: string
21+
}
22+
23+
interface RawFileUrl extends BaseUrl {
24+
kind: 'file'
25+
action: undefined
26+
resolveUrl: string
27+
}
28+
29+
type GHUrl = DirectoryUrl | FileUrl | RawFileUrl
30+
31+
const baseUrl = 'https://github.com'
32+
const baseRawUrl = 'https://raw.githubusercontent.com'
33+
34+
function getSourceParts(url: GHUrl): SourcePart[] {
35+
const sourceParts: SourcePart[] = [{
36+
sourceId: `${baseUrl}/${url.repo}/tree/${url.branch}/`,
37+
text: `${baseUrl}/${url.repo}/tree/${url.branch}/`,
38+
}]
39+
40+
const pathParts = url.path.split('/').filter(d => d.length > 0)
41+
const lastPart = pathParts.at(-1)
42+
if (lastPart) {
43+
for (const [i, part] of pathParts.slice(0, -1).entries()) {
44+
sourceParts.push({
45+
sourceId: `${baseUrl}/${url.repo}/tree/${url.branch}/${pathParts.slice(0, i + 1).join('/')}`,
46+
text: part + '/',
47+
})
48+
}
49+
sourceParts.push({
50+
sourceId: `${baseUrl}/${url.repo}/${url.action === 'tree' ? 'tree/' : 'blob/'}${url.branch}${url.path}`,
51+
text: lastPart,
52+
})
53+
}
54+
return sourceParts
55+
}
56+
function getPrefix(url: DirectoryUrl): string {
57+
return `${baseUrl}/${url.repo}/tree/${url.branch}${url.path}`.replace(/\/$/, '')
58+
}
59+
async function fetchFilesList(url: DirectoryUrl, options?: { requestInit?: RequestInit, accessToken?: string }): Promise<FileMetadata[]> {
60+
const apiURL = `https://api.github.com/repos/${url.repo}/contents/${url.path}?ref=${url.branch}`
61+
const headers: Record<string, string> = {
62+
'Accept': 'application/vnd.github+json',
63+
}
64+
if (options?.accessToken) {
65+
headers.Authorization = `token ${options.accessToken}`
66+
}
67+
const response = await fetch(apiURL, {
68+
method: 'GET',
69+
headers,
70+
...options?.requestInit,
71+
})
72+
if (!response.ok) {
73+
throw new Error(`GitHub API error: ${response.status} ${response.statusText} - ${await response.text()}`)
74+
}
75+
try {
76+
const data: unknown = await response.json()
77+
const isDirectory = Array.isArray(data)
78+
if (!isDirectory) {
79+
throw new Error('Not a directory')
80+
}
81+
const files: FileMetadata[] = []
82+
for (const file of data as unknown[]) {
83+
if (typeof file !== 'object' || file === null || !('name' in file) || !('path' in file) || !('type' in file) || !('size' in file)) {
84+
throw new Error('Invalid file metadata')
85+
}
86+
if (file.type !== 'file' && file.type !== 'dir') {
87+
throw new Error('Unsupported file type')
88+
}
89+
if (typeof file.name !== 'string' || typeof file.path !== 'string' || typeof file.size !== 'number') {
90+
throw new Error('Invalid file metadata types')
91+
}
92+
files.push({
93+
name: getFileName(file.path),
94+
fileSize: file.size,
95+
sourceId: `${url.origin}/${url.repo}/${file.type === 'file' ? 'blob' : 'tree'}/${url.branch}/${file.path}`.replace(/\/$/, ''),
96+
kind: file.type === 'file' ? 'file' : 'directory', // 'unknown' is considered as a directory
97+
})
98+
}
99+
return files
100+
} catch (error) {
101+
throw new Error(`Failed to parse GitHub API response: ${error instanceof Error ? error.message : String(error)}`)
102+
}
103+
}
104+
export function getGitHubSource(sourceId: string, options?: {requestInit?: RequestInit, accessToken?: string}): FileSource | DirSource | undefined {
105+
try {
106+
const url = parseGitHubUrl(sourceId)
107+
// async function fetchVersions() {
108+
// const refsList = await fetchRefsList(url, options)
109+
// return {
110+
// label: 'Branches',
111+
// versions: refsList.map(({ refType, name, ref }) => {
112+
// const label = refType === 'branches' ? name :
113+
// refType === 'converts' ? `[convert] ${name}` :
114+
// refType === 'tags' ? `[tag] ${name}` :
115+
// `[pr] ${name}`
116+
// // remove refs/heads/ from the ref name
117+
// // e.g. refs/heads/main -> main
118+
// const fixedRef = refType === 'branches' ? ref.replace(/refs\/heads\//, '') : ref
119+
// const branchSourceId = `${url.origin}/${getFullName(url)}/${url.kind === 'file' ? 'blob' : 'tree'}/${fixedRef}${url.path}`
120+
// return {
121+
// label,
122+
// sourceId: branchSourceId,
123+
// }
124+
// }),
125+
// }
126+
// }
127+
if (url.kind === 'file') {
128+
return {
129+
kind: 'file',
130+
sourceId,
131+
sourceParts: getSourceParts(url),
132+
fileName: getFileName(url.path),
133+
resolveUrl: url.resolveUrl,
134+
requestInit: options?.requestInit,
135+
// fetchVersions,
136+
}
137+
} else {
138+
return {
139+
kind: 'directory',
140+
sourceId,
141+
sourceParts: getSourceParts(url),
142+
prefix: getPrefix(url),
143+
listFiles: () => fetchFilesList(url, options),
144+
// fetchVersions,
145+
}
146+
}
147+
} catch {
148+
return undefined
149+
}
150+
}
151+
152+
export function parseGitHubUrl(url: string): GHUrl {
153+
const urlObject = new URL(url)
154+
// ^ throws 'TypeError: URL constructor: {url} is not a valid URL.' if url is not a valid URL
155+
156+
if (
157+
urlObject.protocol !== 'https:' ||
158+
![
159+
'github.co', 'github.com', 'www.github.com', 'raw.githubusercontent.com',
160+
].includes(urlObject.host)
161+
) {
162+
throw new Error('Not a GitHub URL')
163+
}
164+
165+
const { pathname } = urlObject
166+
167+
if (urlObject.host === 'raw.githubusercontent.com') {
168+
// https://raw.githubusercontent.com/apache/parquet-testing/refs/heads/master/variant/README.md
169+
const rawFileGroups =
170+
/^\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/(?<action>(refs\/heads\/)?)(?<branch>[^/]+)(?<path>(\/[^/]+)+)$/.exec(
171+
pathname
172+
)?.groups
173+
if (
174+
rawFileGroups?.owner !== undefined &&
175+
rawFileGroups.repo !== undefined &&
176+
rawFileGroups.branch !== undefined &&
177+
rawFileGroups.path !== undefined
178+
) {
179+
const branch = rawFileGroups.branch.replace(/\//g, '%2F')
180+
const source = `${urlObject.origin}/${rawFileGroups.owner}/${rawFileGroups.repo}/${branch}${rawFileGroups.path}`
181+
return {
182+
kind: 'file',
183+
source,
184+
origin: urlObject.origin,
185+
repo: rawFileGroups.owner + '/' + rawFileGroups.repo,
186+
branch,
187+
path: rawFileGroups.path,
188+
resolveUrl: source,
189+
}
190+
} else {
191+
throw new Error('Unsupported GitHub URL')
192+
}
193+
}
194+
195+
const repoGroups = /^\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/?$/.exec(
196+
pathname
197+
)?.groups
198+
if (repoGroups?.owner !== undefined && repoGroups.repo !== undefined) {
199+
return {
200+
kind: 'directory',
201+
source: url,
202+
origin: urlObject.origin,
203+
repo: repoGroups.owner + '/' + repoGroups.repo,
204+
action: 'tree',
205+
branch: 'main', // hardcode the default branch
206+
path: '',
207+
}
208+
}
209+
210+
const folderGroups =
211+
/^\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/(?<action>tree)\/(?<branch>[^/]+)(?<path>(\/[^/]+)*)\/?$/.exec(
212+
pathname
213+
)?.groups
214+
if (
215+
folderGroups?.owner !== undefined &&
216+
folderGroups.repo !== undefined &&
217+
folderGroups.action !== undefined &&
218+
folderGroups.branch !== undefined &&
219+
folderGroups.path !== undefined
220+
) {
221+
const branch = folderGroups.branch.replace(/\//g, '%2F')
222+
const source = `${urlObject.origin}/${folderGroups.owner}/${folderGroups.repo}/${folderGroups.action}/${branch}${folderGroups.path}`
223+
return {
224+
kind: 'directory',
225+
source,
226+
origin: urlObject.origin,
227+
repo: folderGroups.owner + '/' + folderGroups.repo,
228+
action: 'tree',
229+
branch,
230+
path: folderGroups.path,
231+
}
232+
}
233+
234+
// https://github.com/apache/parquet-testing/blob/master/variant/README.md
235+
// https://github.com/apache/parquet-testing/raw/refs/heads/master/variant/README.md
236+
const fileGroups =
237+
/^\/(?<owner>[^/]+)\/(?<repo>[^/]+)\/(?<action>blob|refs\/heads|raw\/refs\/heads)\/(?<branch>[^/]+)(?<path>(\/[^/]+)+)$/.exec(
238+
pathname
239+
)?.groups
240+
if (
241+
fileGroups?.owner !== undefined &&
242+
fileGroups.repo !== undefined &&
243+
fileGroups.action !== undefined &&
244+
fileGroups.branch !== undefined &&
245+
fileGroups.path !== undefined
246+
) {
247+
const branch = fileGroups.branch.replace(/\//g, '%2F')
248+
const source = `${urlObject.origin}/${fileGroups.owner}/${fileGroups.repo}/${fileGroups.action}/${branch}${fileGroups.path}`
249+
return {
250+
kind: 'file',
251+
source,
252+
origin: urlObject.origin,
253+
repo: fileGroups.owner + '/' + fileGroups.repo,
254+
action: fileGroups.action === 'blob' ? 'blob' : 'raw/refs/heads',
255+
branch,
256+
path: fileGroups.path,
257+
resolveUrl: `${baseRawUrl}/${fileGroups.owner}/${fileGroups.repo}/refs/heads/${branch}${fileGroups.path}`,
258+
}
259+
}
260+
261+
throw new Error('Unsupported GitHub URL')
262+
}

src/lib/sources/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
export { getHttpSource } from './httpSource.js'
22
export { getHyperparamSource } from './hyperparamSource.js'
33
export { getHuggingFaceSource } from './huggingFaceSource.js'
4+
export { getGitHubSource } from './gitHubSource.js'
45
export type { HyperparamFileMetadata } from './hyperparamSource.js'
56
export type { DirSource, FileKind, FileMetadata, FileSource, Source, SourcePart } from './types.js'
67
export { getFileName } from './utils.js'

0 commit comments

Comments
 (0)