Skip to content

Commit 26074f9

Browse files
committed
feat: improve URL accessibility checks and download handling
- Enhanced the URL accessibility check function to handle multiple redirect types and provide detailed feedback on accessibility status, including content type validation. - Updated the download function to follow redirects correctly and ensure proper error handling, improving the reliability of downloading source archives from GitHub. - Adjusted the main function to utilize the final URLs after redirects for downloading, ensuring accurate resource retrieval.
1 parent 0ad2de9 commit 26074f9

1 file changed

Lines changed: 152 additions & 50 deletions

File tree

.github/scripts/upload-to-r2.js

Lines changed: 152 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -68,58 +68,131 @@ function findArtifacts(dir, pattern) {
6868
async function checkUrlAccessible(url, maxRetries = 10, initialDelay = 1000) {
6969
for (let attempt = 0; attempt < maxRetries; attempt++) {
7070
try {
71-
const isAccessible = await new Promise((resolve, reject) => {
71+
const result = await new Promise((resolve, reject) => {
7272
const request = https.get(url, { timeout: 10000 }, (response) => {
73-
// Follow redirects immediately (no retry needed for redirects)
74-
if (response.statusCode === 302 || response.statusCode === 301) {
75-
response.destroy();
73+
const statusCode = response.statusCode;
74+
75+
// Follow redirects
76+
if (
77+
statusCode === 302 ||
78+
statusCode === 301 ||
79+
statusCode === 307 ||
80+
statusCode === 308
81+
) {
7682
const redirectUrl = response.headers.location;
77-
// Recursively check the redirect URL, but only once (no retries)
83+
response.destroy();
84+
if (!redirectUrl) {
85+
resolve({
86+
accessible: false,
87+
statusCode,
88+
error: "Redirect without location header",
89+
});
90+
return;
91+
}
92+
// Follow the redirect URL
7893
return https
7994
.get(redirectUrl, { timeout: 10000 }, (redirectResponse) => {
95+
const redirectStatus = redirectResponse.statusCode;
96+
const contentType =
97+
redirectResponse.headers["content-type"] || "";
98+
// Check if it's actually a file (zip/tar.gz) and not HTML
99+
const isFile =
100+
contentType.includes("application/zip") ||
101+
contentType.includes("application/gzip") ||
102+
contentType.includes("application/x-gzip") ||
103+
contentType.includes("application/x-tar") ||
104+
redirectUrl.includes(".zip") ||
105+
redirectUrl.includes(".tar.gz");
80106
const isGood =
81-
redirectResponse.statusCode >= 200 &&
82-
redirectResponse.statusCode < 300;
107+
redirectStatus >= 200 && redirectStatus < 300 && isFile;
83108
redirectResponse.destroy();
84-
resolve(isGood);
109+
resolve({
110+
accessible: isGood,
111+
statusCode: redirectStatus,
112+
finalUrl: redirectUrl,
113+
contentType,
114+
});
115+
})
116+
.on("error", (error) => {
117+
resolve({
118+
accessible: false,
119+
statusCode,
120+
error: error.message,
121+
});
85122
})
86-
.on("error", () => resolve(false))
87123
.on("timeout", function () {
88124
this.destroy();
89-
resolve(false);
125+
resolve({
126+
accessible: false,
127+
statusCode,
128+
error: "Timeout following redirect",
129+
});
90130
});
91131
}
92-
// Check if status is good (200-299 range)
93-
const isGood =
94-
response.statusCode >= 200 && response.statusCode < 300;
132+
133+
// Check if status is good (200-299 range) and it's actually a file
134+
const contentType = response.headers["content-type"] || "";
135+
const isFile =
136+
contentType.includes("application/zip") ||
137+
contentType.includes("application/gzip") ||
138+
contentType.includes("application/x-gzip") ||
139+
contentType.includes("application/x-tar") ||
140+
url.includes(".zip") ||
141+
url.includes(".tar.gz");
142+
const isGood = statusCode >= 200 && statusCode < 300 && isFile;
95143
response.destroy();
96-
resolve(isGood);
144+
resolve({ accessible: isGood, statusCode, contentType });
97145
});
146+
98147
request.on("error", (error) => {
99-
resolve(false);
148+
resolve({
149+
accessible: false,
150+
statusCode: null,
151+
error: error.message,
152+
});
100153
});
154+
101155
request.on("timeout", () => {
102156
request.destroy();
103-
resolve(false);
157+
resolve({
158+
accessible: false,
159+
statusCode: null,
160+
error: "Request timeout",
161+
});
104162
});
105163
});
106164

107-
if (isAccessible) {
165+
if (result.accessible) {
108166
if (attempt > 0) {
109-
console.log(`URL ${url} is now accessible after ${attempt} retries`);
167+
console.log(
168+
`✓ URL ${url} is now accessible after ${attempt} retries (status: ${result.statusCode})`
169+
);
170+
} else {
171+
console.log(
172+
`✓ URL ${url} is accessible (status: ${result.statusCode})`
173+
);
110174
}
111-
return true;
175+
return result.finalUrl || url; // Return the final URL (after redirects) if available
176+
} else {
177+
const errorMsg = result.error ? ` - ${result.error}` : "";
178+
const statusMsg = result.statusCode
179+
? ` (status: ${result.statusCode})`
180+
: "";
181+
const contentTypeMsg = result.contentType
182+
? ` [content-type: ${result.contentType}]`
183+
: "";
184+
console.log(
185+
`✗ URL ${url} not accessible${statusMsg}${contentTypeMsg}${errorMsg}`
186+
);
112187
}
113188
} catch (error) {
114-
// Continue to retry
189+
console.log(`✗ URL ${url} check failed: ${error.message}`);
115190
}
116191

117192
if (attempt < maxRetries - 1) {
118193
const delay = initialDelay * Math.pow(2, attempt);
119194
console.log(
120-
`URL ${url} not accessible yet (attempt ${
121-
attempt + 1
122-
}/${maxRetries}), retrying in ${delay}ms...`
195+
` Retrying in ${delay}ms... (attempt ${attempt + 1}/${maxRetries})`
123196
);
124197
await new Promise((resolve) => setTimeout(resolve, delay));
125198
}
@@ -130,31 +203,59 @@ async function checkUrlAccessible(url, maxRetries = 10, initialDelay = 1000) {
130203

131204
async function downloadFromGitHub(url, outputPath) {
132205
return new Promise((resolve, reject) => {
133-
https
134-
.get(url, (response) => {
135-
if (response.statusCode === 302 || response.statusCode === 301) {
136-
// Follow redirect
137-
return downloadFromGitHub(response.headers.location, outputPath)
138-
.then(resolve)
139-
.catch(reject);
140-
}
141-
if (response.statusCode !== 200) {
142-
reject(
143-
new Error(
144-
`Failed to download ${url}: ${response.statusCode} ${response.statusMessage}`
145-
)
146-
);
206+
const request = https.get(url, { timeout: 30000 }, (response) => {
207+
const statusCode = response.statusCode;
208+
209+
// Follow redirects (all redirect types)
210+
if (
211+
statusCode === 301 ||
212+
statusCode === 302 ||
213+
statusCode === 307 ||
214+
statusCode === 308
215+
) {
216+
const redirectUrl = response.headers.location;
217+
response.destroy();
218+
if (!redirectUrl) {
219+
reject(new Error(`Redirect without location header for ${url}`));
147220
return;
148221
}
149-
const fileStream = fs.createWriteStream(outputPath);
150-
response.pipe(fileStream);
151-
fileStream.on("finish", () => {
152-
fileStream.close();
153-
resolve();
154-
});
155-
fileStream.on("error", reject);
156-
})
157-
.on("error", reject);
222+
// Resolve relative redirects
223+
const finalRedirectUrl = redirectUrl.startsWith("http")
224+
? redirectUrl
225+
: new URL(redirectUrl, url).href;
226+
console.log(` Following redirect: ${finalRedirectUrl}`);
227+
return downloadFromGitHub(finalRedirectUrl, outputPath)
228+
.then(resolve)
229+
.catch(reject);
230+
}
231+
232+
if (statusCode !== 200) {
233+
response.destroy();
234+
reject(
235+
new Error(
236+
`Failed to download ${url}: ${statusCode} ${response.statusMessage}`
237+
)
238+
);
239+
return;
240+
}
241+
242+
const fileStream = fs.createWriteStream(outputPath);
243+
response.pipe(fileStream);
244+
fileStream.on("finish", () => {
245+
fileStream.close();
246+
resolve();
247+
});
248+
fileStream.on("error", (error) => {
249+
response.destroy();
250+
reject(error);
251+
});
252+
});
253+
254+
request.on("error", reject);
255+
request.on("timeout", () => {
256+
request.destroy();
257+
reject(new Error(`Request timeout for ${url}`));
258+
});
158259
});
159260
}
160261

@@ -179,12 +280,13 @@ async function main() {
179280
console.log(` TAR.GZ: ${githubTarGzUrl}`);
180281

181282
// Wait for archives to be accessible with exponential backoff
182-
await checkUrlAccessible(githubZipUrl);
183-
await checkUrlAccessible(githubTarGzUrl);
283+
// This returns the final URL after following redirects
284+
const finalZipUrl = await checkUrlAccessible(githubZipUrl);
285+
const finalTarGzUrl = await checkUrlAccessible(githubTarGzUrl);
184286

185287
console.log(`Downloading source archives from GitHub...`);
186-
await downloadFromGitHub(githubZipUrl, sourceZipPath);
187-
await downloadFromGitHub(githubTarGzUrl, sourceTarGzPath);
288+
await downloadFromGitHub(finalZipUrl, sourceZipPath);
289+
await downloadFromGitHub(finalTarGzUrl, sourceTarGzPath);
188290

189291
console.log(`Downloaded source archives successfully`);
190292

0 commit comments

Comments
 (0)