Skip to content

Commit 8b2a772

Browse files
committed
bug: unwanted navigation caused by cookie consent logic
1 parent 15fbcff commit 8b2a772

2 files changed

Lines changed: 46 additions & 1 deletion

File tree

src/core/browser-engine.js

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,9 +208,24 @@ export class BrowserEngine {
208208
if (!text || text.length > 50) continue; // Skip empty or too long text (unlikely to be a simple button)
209209

210210
const lowerText = text.toLowerCase();
211-
if (acceptButtonTexts.some(t => lowerText.includes(t.toLowerCase()))) {
211+
const isMatch = acceptButtonTexts.some(t => lowerText.includes(t.toLowerCase()));
212+
213+
if (isMatch) {
212214
try {
215+
// Safety: If it's a link with an href, it might navigate.
216+
// We prefer clicking buttons or elements that don't look like external links.
217+
const isLinkWithHref = btn.tagName === 'A' && btn.getAttribute('href') && !btn.getAttribute('href').startsWith('#');
218+
const isStrongAccept = /accept all|allow all|agree to all|accept and continue/i.test(lowerText);
219+
220+
// If it's a link that navigates, only click if it's a VERY strong "Accept All" signal.
221+
// Otherwise, we might be clicking a "Cookie Policy" link by mistake.
222+
if (isLinkWithHref && !isStrongAccept) {
223+
continue;
224+
}
225+
213226
btn.click();
227+
// One click is often enough for most banners; let's break if we found a strong match
228+
if (isStrongAccept) break;
214229
} catch (e) { }
215230
}
216231
}

src/core/mirror-cloner.js

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,9 +570,39 @@ export class MirrorCloner {
570570
timeout: this.options.timeout,
571571
});
572572

573+
// Capture the baseline URL after potentially initial redirects but BEFORE cookie clicking
574+
const baselineUrl = page.url();
575+
573576
// Handle cookie consent banners before further processing
574577
await this.browserEngine.handleCookieConsent(page);
575578

579+
// Give a small window for any triggered navigation to start
580+
await new Promise(r => setTimeout(r, 1000));
581+
582+
// Post-cookie click safety check: verify if we navigated away from the original intent
583+
let currentUrl = page.url();
584+
if (currentUrl !== baselineUrl) {
585+
// If we navigated to what looks like a cookie settings/privacy page, try to go back
586+
const isCookiePage = /cookie|consent|privacy|settings|policy/i.test(currentUrl);
587+
const hostChanged = (new URL(currentUrl).hostname !== new URL(baselineUrl).hostname);
588+
589+
if (isCookiePage || hostChanged) {
590+
if (this.options.debug) {
591+
console.log(chalk.yellow(` ⚠️ Cookie consent caused navigation to: ${currentUrl}. Attempting to backtracking to: ${baselineUrl}`));
592+
}
593+
try {
594+
// Go back or re-navigate
595+
await page.goBack({ waitUntil: 'domcontentloaded', timeout: 10000 }).catch(async () => {
596+
await page.goto(baselineUrl, { waitUntil: 'domcontentloaded', timeout: 10000 });
597+
});
598+
// Ensure we are back
599+
currentUrl = page.url();
600+
} catch (e) {
601+
if (this.options.debug) console.log(chalk.dim(` Backtracking failed: ${e.message}`));
602+
}
603+
}
604+
}
605+
576606
await this.waitForRootReady(page);
577607
await this.scrollToBottomAndLoad(page);
578608
await this.waitForImagesSettled(page, 8000);

0 commit comments

Comments
 (0)