Skip to content

Commit 059dd4c

Browse files
fix(security): harden repository URL checks
1 parent 103ce58 commit 059dd4c

6 files changed

Lines changed: 128 additions & 46 deletions

File tree

pipeline/workers/process-module.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { ensureDirectory, fileExists } from "../../scripts/shared/fs-utils.ts";
22
import { ensureRepository, getCommitDate } from "../../scripts/shared/git.ts";
3+
import { isRepositoryType } from "../../scripts/updateRepositoryApiData/helpers.ts";
34
import { rename, rm } from "node:fs/promises";
45
import { execFile } from "node:child_process";
56
import { buildModuleAnalysisCacheKey } from "../../scripts/shared/module-analysis-cache.ts";
@@ -912,7 +913,7 @@ async function enrichModule(module: ModuleInput, config: ProcessModuleConfig): P
912913
}
913914

914915
// Check GitHub issues
915-
if (module.url.includes("github.com") && module.hasGithubIssues === false) {
916+
if (isRepositoryType(module.url, "github") && module.hasGithubIssues === false) {
916917
enrichIssues.push("Issues are not enabled in the GitHub repository. So users cannot report bugs. Please enable issues in your repo.");
917918
}
918919

scripts/check-modules/remote-sha.ts

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import { buildAuthHeadersFromEnv, createHttpClient } from "../shared/http-client.ts";
99
import { createLogger } from "../shared/logger.ts";
1010
import { createRateLimiter } from "../shared/rate-limiter.ts";
11+
import { getRepositoryType } from "../updateRepositoryApiData/helpers.ts";
1112

1213
interface RepoCoordinates {
1314
owner: string;
@@ -263,24 +264,17 @@ export async function getRemoteCommitSha(url: string, branch = "master"): Promis
263264
return null;
264265
}
265266

266-
// Try each platform
267-
if (url.includes("github.com")) {
268-
return await getGitHubCommitSha(url, branch);
269-
}
270-
271-
if (url.includes("gitlab.com")) {
272-
return await getGitLabCommitSha(url, branch);
273-
}
274-
275-
if (url.includes("bitbucket.org")) {
276-
return await getBitbucketCommitSha(url, branch);
277-
}
278-
279-
if (url.includes("codeberg.org")) {
280-
return await getCodebergCommitSha(url, branch);
267+
switch (getRepositoryType(url)) {
268+
case "github":
269+
return await getGitHubCommitSha(url, branch);
270+
case "gitlab":
271+
return await getGitLabCommitSha(url, branch);
272+
case "bitbucket":
273+
return await getBitbucketCommitSha(url, branch);
274+
case "codeberg":
275+
return await getCodebergCommitSha(url, branch);
276+
default:
277+
logger.debug(`Unsupported platform for URL: ${url}`);
278+
return null;
281279
}
282-
283-
// Unsupported platform - caller must clone
284-
logger.debug(`Unsupported platform for URL: ${url}`);
285-
return null;
286280
}

scripts/collect-metadata/__tests__/parser.test.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,18 @@ describe("collect-metadata/parser", () => {
4646
assert.strictEqual(modules.length, 0);
4747
assert.strictEqual(issues.length, 0); // Google.com doesn't match repo patterns, so line is ignored
4848
});
49+
50+
it("should parse markdown links without regex backtracking", () => {
51+
const markdown = `
52+
### Utilities
53+
| [MMM-Title](https://github.com/example/MMM-Title "Repo Title") | [Example User](https://github.com/example) | Description |
54+
`;
55+
const { modules, issues } = parseModuleList(markdown);
56+
57+
assert.strictEqual(issues.length, 0);
58+
assert.strictEqual(modules.length, 1);
59+
assert.strictEqual(modules[0].name, "MMM-Title");
60+
assert.strictEqual(modules[0].url, "https://github.com/example/MMM-Title");
61+
assert.strictEqual(modules[0].maintainer, "Example User");
62+
});
4963
});

scripts/collect-metadata/parser.ts

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,25 @@ function stripUrlTitle(url: string): string {
5555
return url.substring(0, urlTitleStart);
5656
}
5757

58+
function parseMarkdownLink(cell: string): { text: string; url: string } | null {
59+
const textStart = cell.indexOf("[");
60+
const textEnd = cell.indexOf("]", textStart + 1);
61+
const urlStart = cell.indexOf("(", textEnd + 1);
62+
const urlEnd = cell.indexOf(")", urlStart + 1);
63+
64+
if (textStart === -1 || textEnd === -1 || urlStart === -1 || urlEnd === -1) {
65+
return null;
66+
}
67+
68+
const text = cell.slice(textStart + 1, textEnd).trim();
69+
const url = stripUrlTitle(cell.slice(urlStart + 1, urlEnd).trim());
70+
if (!text || !url) {
71+
return null;
72+
}
73+
74+
return { text, url };
75+
}
76+
5877
function parseModuleRow(line: string, category: string, issues: string[]): ParsedModuleEntry | null {
5978
if (!isRepositoryRow(line)) {
6079
return null;
@@ -66,21 +85,21 @@ function parseModuleRow(line: string, category: string, issues: string[]): Parse
6685
}
6786

6887
const repoCell = parts[1];
69-
const repoMatch = repoCell.match(/\[(.*?)\]\((.*?)\)/u);
70-
if (!repoMatch) {
88+
const repoLink = parseMarkdownLink(repoCell);
89+
if (!repoLink) {
7190
return null;
7291
}
7392

74-
const name = repoMatch[1].trim();
75-
const url = stripUrlTitle(repoMatch[2].trim());
93+
const name = repoLink.text;
94+
const url = repoLink.url;
7695
const maintainerCell = parts[2] || "";
7796
const description = parts[3] || "";
7897
const outdatedInfo = parts[4] ? parts[4].trim() : "";
7998

8099
let maintainer = maintainerCell;
81-
const maintainerMatch = maintainerCell.match(/\[(.*?)\]\((.*?)\)/u);
82-
if (maintainerMatch) {
83-
maintainer = maintainerMatch[1].trim();
100+
const maintainerLink = parseMarkdownLink(maintainerCell);
101+
if (maintainerLink) {
102+
maintainer = maintainerLink.text;
84103
}
85104

86105
const repoType = getRepositoryType(url);
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import { describe, it } from "node:test";
2+
import { getRepositoryId, getRepositoryType, isRepositoryType } from "../helpers.ts";
3+
import assert from "node:assert";
4+
5+
describe("updateRepositoryApiData/helpers", () => {
6+
it("detects supported repository hosts by exact hostname", () => {
7+
assert.strictEqual(getRepositoryType("https://github.com/user/MMM-Test"), "github");
8+
assert.strictEqual(getRepositoryType("https://gitlab.com/group/MMM-Test"), "gitlab");
9+
assert.strictEqual(getRepositoryType("https://bitbucket.org/team/MMM-Test"), "bitbucket");
10+
assert.strictEqual(getRepositoryType("https://codeberg.org/user/MMM-Test"), "codeberg");
11+
});
12+
13+
it("rejects deceptive hostnames that only contain the trusted domain as a substring", () => {
14+
assert.strictEqual(getRepositoryType("https://github.com.evil.example/user/MMM-Test"), "unknown");
15+
assert.strictEqual(getRepositoryType("https://evil.example/github.com/user/MMM-Test"), "unknown");
16+
assert.strictEqual(isRepositoryType("https://github.com.evil.example/user/MMM-Test", "github"), false);
17+
});
18+
19+
it("extracts repository ids from normalized repository urls", () => {
20+
assert.strictEqual(getRepositoryId("git+https://github.com/user/MMM-Test.git"), "user/MMM-Test");
21+
assert.strictEqual(getRepositoryId("https://github.com/user/MMM-Test/"), "user/MMM-Test");
22+
});
23+
});

scripts/updateRepositoryApiData/helpers.ts

Lines changed: 50 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -59,36 +59,67 @@ export interface PartitionModulesResult<TModule extends RepositoryModuleLike> {
5959
processedCount: number;
6060
}
6161

62-
export function getRepositoryType(url: string): RepositoryType {
63-
if (url.includes("github.com")) {
64-
return "github";
62+
const REPOSITORY_HOSTS: ReadonlyMap<string, Exclude<RepositoryType, "unknown">> = new Map([
63+
["github.com", "github"],
64+
["www.github.com", "github"],
65+
["gitlab.com", "gitlab"],
66+
["www.gitlab.com", "gitlab"],
67+
["bitbucket.org", "bitbucket"],
68+
["www.bitbucket.org", "bitbucket"],
69+
["codeberg.org", "codeberg"],
70+
["www.codeberg.org", "codeberg"]
71+
]);
72+
73+
function normalizeRepositoryUrl(url: string): string {
74+
return url.startsWith("git+https://") || url.startsWith("git+http://")
75+
? url.slice(4)
76+
: url;
77+
}
78+
79+
function parseRepositoryUrl(url: string): URL | null {
80+
try {
81+
return new URL(normalizeRepositoryUrl(url));
6582
}
66-
if (url.includes("gitlab.com")) {
67-
return "gitlab";
83+
catch {
84+
return null;
6885
}
69-
if (url.includes("bitbucket.org")) {
70-
return "bitbucket";
86+
}
87+
88+
function getRepositoryUrlParts(url: string): { pathParts: string[]; type: Exclude<RepositoryType, "unknown"> } | null {
89+
const parsedUrl = parseRepositoryUrl(url);
90+
if (!parsedUrl) {
91+
return null;
7192
}
72-
if (url.includes("codeberg.org")) {
73-
return "codeberg";
93+
94+
const type = REPOSITORY_HOSTS.get(parsedUrl.hostname.toLowerCase());
95+
if (!type) {
96+
return null;
7497
}
75-
return "unknown";
98+
99+
const rawPathParts = parsedUrl.pathname.split("/").filter(Boolean);
100+
const pathParts = rawPathParts.map((part, index) =>
101+
index === rawPathParts.length - 1 ? part.replace(/\.git$/u, "") : part
102+
);
103+
104+
return { type, pathParts };
105+
}
106+
107+
export function getRepositoryType(url: string): RepositoryType {
108+
return getRepositoryUrlParts(url)?.type ?? "unknown";
76109
}
77110

78111
export function getRepositoryId(url: string): string | null {
79-
const urlParts = url.split("/");
80-
const hostIndex = urlParts.findIndex(part =>
81-
part.includes("github.com")
82-
|| part.includes("gitlab.com")
83-
|| part.includes("bitbucket.org")
84-
|| part.includes("codeberg.org"));
85-
86-
if (hostIndex !== -1 && urlParts.length > hostIndex + 2) {
87-
return `${urlParts[hostIndex + 1]}/${urlParts[hostIndex + 2]}`;
112+
const repositoryUrlParts = getRepositoryUrlParts(url);
113+
if (repositoryUrlParts && repositoryUrlParts.pathParts.length >= 2) {
114+
return `${repositoryUrlParts.pathParts[0]}/${repositoryUrlParts.pathParts[1]}`;
88115
}
89116
return null;
90117
}
91118

119+
export function isRepositoryType(url: string, repositoryType: Exclude<RepositoryType, "unknown">): boolean {
120+
return getRepositoryUrlParts(url)?.type === repositoryType;
121+
}
122+
92123
export function sortModuleListByLastUpdate<TModule extends RepositoryModuleLike>(previousData: PreviousRepositoryData, moduleList: TModule[]): void {
93124
moduleList.sort((a, b) => {
94125
const lastUpdateA = previousData.repositories?.find(repo => repo.id === a.id)?.gitHubDataLastUpdate;

0 commit comments

Comments
 (0)