Skip to content

Commit d96897d

Browse files
Use worker-served HTML to populate Gumroad products (#19)
- update the Gumroad Cloudflare worker to return cached profile HTML for a username - adjust the Gumroad link builder to fetch that HTML and extract product links client-side ------ [Codex Task](https://chatgpt.com/codex/tasks/task_e_6916ee4d36e88325a6b7ac48e21b03a9)
1 parent 2f1a62e commit d96897d

2 files changed

Lines changed: 34 additions & 63 deletions

File tree

cloudflare-workers/gumroad-products/worker.js

Lines changed: 6 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ export default {
8080
return respondJSON(origin, allowed, { error: "Invalid or missing Gumroad username." }, 400);
8181
}
8282

83-
const profileUrl = `https://${u}.gumroad.com/`;
8483
const cache = caches.default;
8584
const dataKey = new Request(`https://gumroad-products.internal/cache?u=${encodeURIComponent(u)}`);
8685
const metaKey = new Request(`https://gumroad-products.internal/meta?u=${encodeURIComponent(u)}`);
@@ -96,7 +95,7 @@ export default {
9695
if (cachedBody && age <= S_MAX_AGE) {
9796
return new Response(cachedBody, {
9897
headers: {
99-
"Content-Type": "application/json; charset=utf-8",
98+
"Content-Type": "text/html; charset=utf-8",
10099
"Cache-Control": `public, max-age=${S_MAX_AGE}`,
101100
"X-Cache": "HIT",
102101
"X-Upstream-Status": "none",
@@ -136,7 +135,7 @@ export default {
136135
if (cachedBody && age <= (S_MAX_AGE + STALE_WINDOW)) {
137136
return new Response(cachedBody, {
138137
headers: {
139-
"Content-Type": "application/json; charset=utf-8",
138+
"Content-Type": "text/html; charset=utf-8",
140139
"Cache-Control": `public, max-age=0, stale-while-revalidate=${STALE_WINDOW}`,
141140
"X-Cache": "STALE",
142141
"X-Upstream-Status": String(upstream.status),
@@ -153,22 +152,11 @@ export default {
153152

154153
const html = await upstream.text();
155154

156-
const products = extractProducts(html);
157-
158-
const payload = {
159-
username: u,
160-
profile_url: profileUrl,
161-
count: products.length,
162-
products,
163-
fetched_at: new Date().toISOString(),
164-
};
165-
const body = JSON.stringify(payload);
166-
167155
// Update cache
168156
const now = Math.floor(Date.now() / 1000);
169-
const dataResp = new Response(body, {
157+
const dataResp = new Response(html, {
170158
headers: {
171-
"Content-Type": "application/json; charset=utf-8",
159+
"Content-Type": "text/html; charset=utf-8",
172160
"Cache-Control": `public, max-age=${S_MAX_AGE}`,
173161
},
174162
});
@@ -178,9 +166,9 @@ export default {
178166
ctx.waitUntil(cache.put(dataKey, dataResp.clone()));
179167
ctx.waitUntil(cache.put(metaKey, metaResp.clone()));
180168

181-
return new Response(body, {
169+
return new Response(html, {
182170
headers: {
183-
"Content-Type": "application/json; charset=utf-8",
171+
"Content-Type": "text/html; charset=utf-8",
184172
"Cache-Control": `public, max-age=${S_MAX_AGE}`,
185173
"X-Cache": cachedBody ? "MISS-REVAL" : "MISS",
186174
"X-Upstream-Status": "200",
@@ -189,31 +177,3 @@ export default {
189177
});
190178
},
191179
};
192-
193-
function extractProducts(html) {
194-
// Lightweight HTML parsing without DOM: heuristic regex over links.
195-
// For more robustness, you could use an HTML parser lib with Workers Bundler.
196-
const linkRe = /<a\b[^>]*href=["']([^"']*\/l\/[^"']*)["'][^>]*>([\s\S]*?)<\/a>/gi;
197-
const tagRe = /<\/?[^>]+>/g;
198-
const nbspRe = /&nbsp;/g;
199-
200-
const seen = new Set();
201-
const items = [];
202-
let m;
203-
while ((m = linkRe.exec(html)) !== null) {
204-
const href = m[1];
205-
let title = m[2].replace(tagRe, '').replace(nbspRe, ' ').trim();
206-
if (!title) continue;
207-
208-
// Normalize absolute vs relative
209-
const url = href.startsWith('http') ? href : new URL(href, 'https://example.com').href;
210-
const slug = url.split('/').filter(Boolean).pop();
211-
212-
const key = url + '|' + title;
213-
if (seen.has(key)) continue;
214-
seen.add(key);
215-
216-
items.push({ title, url, slug });
217-
}
218-
return items;
219-
}

gumroad-links.html

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -653,34 +653,49 @@ <h3 style="margin: 0; font-size: 1.1rem;">Included parameters</h3>
653653
}
654654
}
655655

656-
function parseProductsFromProfile(html, profileUrl, username) {
656+
function parseProductsFromHTML(html, username) {
657657
const parser = new DOMParser();
658658
const doc = parser.parseFromString(html, 'text/html');
659-
const productCards = Array.from(doc.querySelectorAll('article.product-card'));
659+
const anchors = Array.from(doc.querySelectorAll('a[href]'));
660660
const products = new Map();
661+
const baseUrl = `https://${username}.gumroad.com/`;
661662

662-
for (const card of productCards) {
663-
const link = card.querySelector('a[href]');
664-
if (!link) {
663+
for (const anchor of anchors) {
664+
const rawHref = anchor.getAttribute('href');
665+
if (!rawHref) {
665666
continue;
666667
}
668+
667669
let resolvedUrl;
668670
try {
669-
resolvedUrl = new URL(link.getAttribute('href'), profileUrl);
671+
resolvedUrl = new URL(rawHref, baseUrl);
670672
} catch (error) {
671673
continue;
672674
}
675+
676+
if (!resolvedUrl.hostname.endsWith('gumroad.com')) {
677+
continue;
678+
}
679+
680+
const segments = resolvedUrl.pathname.split('/').filter(Boolean);
681+
if (segments.length < 2 || segments[0].toLowerCase() !== 'l') {
682+
continue;
683+
}
684+
673685
const slug = sanitiseSlug(resolvedUrl.pathname);
674686
if (!slug || products.has(slug)) {
675687
continue;
676688
}
677-
const titleElement = card.querySelector('[itemprop="name"], h4, h3') || link;
678-
const name = (titleElement.textContent || '').trim();
679-
const productUrl = composeProductUrl(username, slug) || resolvedUrl.href;
689+
690+
const textContent = (anchor.textContent || '').replace(/\s+/g, ' ').trim();
691+
const titleAttr = (anchor.getAttribute('title') || '').trim();
692+
const label = textContent || titleAttr || slug;
693+
const canonicalUrl = composeProductUrl(username, slug) || resolvedUrl.href;
694+
680695
products.set(slug, {
681696
slug,
682-
name: name || slug,
683-
url: productUrl,
697+
name: label,
698+
url: canonicalUrl,
684699
});
685700
}
686701

@@ -780,12 +795,8 @@ <h3 style="margin: 0; font-size: 1.1rem;">Included parameters</h3>
780795
throw new Error(`Request failed: ${response.status} (X-Cache=${xCache}, Upstream=${xUp})`);
781796
}
782797

783-
const data = await response.json();
784-
const products = (data.products || []).map((p) => ({
785-
name: p.name || p.title || p.slug || '',
786-
slug: p.slug,
787-
url: p.url,
788-
})).filter((product) => Boolean(product.slug));
798+
const html = await response.text();
799+
const products = parseProductsFromHTML(html, cleanUsername);
789800

790801
if (products.length === 0) {
791802
populateProductSelect([]);

0 commit comments

Comments
 (0)