@@ -170,6 +170,7 @@ const markWedgeIfMatches = (err) => {
170170 const msg = err ?. message || String ( err ) ;
171171 if ( / C o m m a n d t i m e o u t : / i. test ( msg ) ) runtimeWedgeDetected = true ;
172172} ;
173+
173174const setAuthState = async ( state ) => {
174175 latestAuthState = state ;
175176} ;
@@ -1040,22 +1041,33 @@ const handlePostLoginChallenge = async () => {
10401041 "confirm it's you" ,
10411042 ] ) ;
10421043 if ( affirm ) {
1043- await page . evaluate ( `
1044- (() => {
1045- const target = ${ JSON . stringify ( affirm ) } ;
1046- const btns = Array.from(document.querySelectorAll('button, [role="button"]'));
1047- const match = btns.find((b) => (b.textContent || '').trim() === target);
1048- if (!match) return;
1049- try { match.focus(); } catch (_) {}
1050- const mouseOpts = { bubbles: true, cancelable: true, view: window };
1051- try { match.dispatchEvent(new MouseEvent('pointerdown', mouseOpts)); } catch (_) {}
1052- try { match.dispatchEvent(new MouseEvent('mousedown', mouseOpts)); } catch (_) {}
1053- try { match.dispatchEvent(new MouseEvent('pointerup', mouseOpts)); } catch (_) {}
1054- try { match.dispatchEvent(new MouseEvent('mouseup', mouseOpts)); } catch (_) {}
1055- try { match.dispatchEvent(new MouseEvent('click', mouseOpts)); } catch (_) {}
1056- try { match.click(); } catch (_) {}
1057- })()
1058- ` ) ;
1044+ const escaped = affirm . replace ( / " / g, '\\"' ) ;
1045+ let clickedViaPage = false ;
1046+ try {
1047+ await page . click (
1048+ 'button:has-text("' + escaped + '"), [role="button"]:has-text("' + escaped + '")' ,
1049+ { timeout : 3000 } ,
1050+ ) ;
1051+ clickedViaPage = true ;
1052+ } catch ( _ ) { }
1053+ if ( ! clickedViaPage ) {
1054+ await page . evaluate ( `
1055+ (() => {
1056+ const target = ${ JSON . stringify ( affirm ) } ;
1057+ const btns = Array.from(document.querySelectorAll('button, [role="button"]'));
1058+ const match = btns.find((b) => (b.textContent || '').trim() === target);
1059+ if (!match) return;
1060+ try { match.focus(); } catch (_) {}
1061+ const mouseOpts = { bubbles: true, cancelable: true, view: window };
1062+ try { match.dispatchEvent(new MouseEvent('pointerdown', mouseOpts)); } catch (_) {}
1063+ try { match.dispatchEvent(new MouseEvent('mousedown', mouseOpts)); } catch (_) {}
1064+ try { match.dispatchEvent(new MouseEvent('pointerup', mouseOpts)); } catch (_) {}
1065+ try { match.dispatchEvent(new MouseEvent('mouseup', mouseOpts)); } catch (_) {}
1066+ try { match.dispatchEvent(new MouseEvent('click', mouseOpts)); } catch (_) {}
1067+ try { match.click(); } catch (_) {}
1068+ })()
1069+ ` ) ;
1070+ }
10591071 clicked = true ;
10601072 await page . sleep ( 3500 ) ;
10611073 continue ;
@@ -1121,23 +1133,57 @@ const handlePostLoginChallenge = async () => {
11211133 'okay' ,
11221134 ] ) ;
11231135 if ( advance ) {
1124- await page . evaluate ( `
1125- (() => {
1126- const target = ${ JSON . stringify ( advance ) } ;
1127- const btns = Array.from(document.querySelectorAll('button, [role="button"]'));
1128- const match = btns.find((b) => (b.textContent || '').trim() === target);
1129- if (!match) return;
1130- // Try three escalating click strategies for React-rendered buttons.
1131- try { match.focus(); } catch (_) {}
1132- const mouseOpts = { bubbles: true, cancelable: true, view: window };
1133- try { match.dispatchEvent(new MouseEvent('pointerdown', mouseOpts)); } catch (_) {}
1134- try { match.dispatchEvent(new MouseEvent('mousedown', mouseOpts)); } catch (_) {}
1135- try { match.dispatchEvent(new MouseEvent('pointerup', mouseOpts)); } catch (_) {}
1136- try { match.dispatchEvent(new MouseEvent('mouseup', mouseOpts)); } catch (_) {}
1137- try { match.dispatchEvent(new MouseEvent('click', mouseOpts)); } catch (_) {}
1138- try { match.click(); } catch (_) {}
1139- })()
1140- ` ) ;
1136+ const escaped = advance . replace ( / " / g, '\\"' ) ;
1137+ // Prefer page.click (trusted browser-level events) over evaluate-based
1138+ // dispatch. Instagram rejects synthesized clicks as untrusted. Use a
1139+ // text-match CSS selector — ":has-text()" Playwright locator syntax —
1140+ // with a short timeout so a misselected button doesn't stall the run.
1141+ let clickedViaPage = false ;
1142+ try {
1143+ await page . click (
1144+ 'button:has-text("' + escaped + '"), [role="button"]:has-text("' + escaped + '")' ,
1145+ { timeout : 3000 } ,
1146+ ) ;
1147+ clickedViaPage = true ;
1148+ } catch ( _ ) {
1149+ // Fall through to evaluate-based dispatch as a last-resort fallback.
1150+ }
1151+ // Belt-and-suspenders: also focus + Enter-key the matched button.
1152+ // Some Instagram challenge pages treat keyboard Enter as a more
1153+ // "trusted" interaction signal than synthesized mouse clicks.
1154+ try {
1155+ await page . evaluate ( `
1156+ (() => {
1157+ const target = ${ JSON . stringify ( advance ) } ;
1158+ const btns = Array.from(document.querySelectorAll('button, [role="button"]'));
1159+ const match = btns.find((b) => (b.textContent || '').trim() === target);
1160+ if (match && typeof match.focus === 'function') match.focus();
1161+ })()
1162+ ` ) ;
1163+ if ( typeof page . keyboard ?. press === 'function' ) {
1164+ await page . keyboard . press ( 'Enter' ) ;
1165+ } else if ( typeof page . press === 'function' ) {
1166+ await page . press ( 'body' , 'Enter' ) ;
1167+ }
1168+ } catch ( _ ) { }
1169+ if ( ! clickedViaPage ) {
1170+ await page . evaluate ( `
1171+ (() => {
1172+ const target = ${ JSON . stringify ( advance ) } ;
1173+ const btns = Array.from(document.querySelectorAll('button, [role="button"]'));
1174+ const match = btns.find((b) => (b.textContent || '').trim() === target);
1175+ if (!match) return;
1176+ try { match.focus(); } catch (_) {}
1177+ const mouseOpts = { bubbles: true, cancelable: true, view: window };
1178+ try { match.dispatchEvent(new MouseEvent('pointerdown', mouseOpts)); } catch (_) {}
1179+ try { match.dispatchEvent(new MouseEvent('mousedown', mouseOpts)); } catch (_) {}
1180+ try { match.dispatchEvent(new MouseEvent('pointerup', mouseOpts)); } catch (_) {}
1181+ try { match.dispatchEvent(new MouseEvent('mouseup', mouseOpts)); } catch (_) {}
1182+ try { match.dispatchEvent(new MouseEvent('click', mouseOpts)); } catch (_) {}
1183+ try { match.click(); } catch (_) {}
1184+ })()
1185+ ` ) ;
1186+ }
11411187 clicked = true ;
11421188 // Give the next page/step a moment to render.
11431189 await page . sleep ( 3500 ) ;
@@ -1431,6 +1477,7 @@ const collectProfileViaPageCapture = async (username) => {
14311477 await page . sleep ( PROFILE_CAPTURE_WAIT_MS ) ;
14321478
14331479 for ( let attempt = 0 ; attempt < PROFILE_CAPTURE_MAX_ATTEMPTS ; attempt ++ ) {
1480+ if ( runtimeWedgeDetected ) break ;
14341481 await setCollectorTraceSection ( 'profileBootstrap' , {
14351482 method : 'profile_page_capture' ,
14361483 step : 'wait_for_profile_capture' ,
@@ -1440,7 +1487,13 @@ const collectProfileViaPageCapture = async (username) => {
14401487 captureKey : 'profileResponse' ,
14411488 } ) ;
14421489 await page . sleep ( 1000 ) ;
1443- const response = await page . getCapturedResponse ( 'profileResponse' ) ;
1490+ let response ;
1491+ try {
1492+ response = await page . getCapturedResponse ( 'profileResponse' ) ;
1493+ } catch ( err ) {
1494+ markWedgeIfMatches ( err ) ;
1495+ break ;
1496+ }
14441497 const user =
14451498 response ?. data ?. data ?. user ??
14461499 response ?. data ?. user ??
@@ -1762,60 +1815,103 @@ const fetchAccountsCenterHtml = async (path, traceSection) => {
17621815 targetUrl : fullUrl ,
17631816 } ) ;
17641817 }
1765- const reachable = await safeGoto ( fullUrl ) ;
1766- if ( ! reachable ) {
1767- if ( traceSection ) {
1768- await setCollectorTraceSection ( traceSection , {
1769- phase : traceSection ,
1770- method : 'accounts_center_html' ,
1771- step : 'navigate_accounts_center' ,
1772- status : 'error' ,
1773- outcome : 'page_unreachable' ,
1774- path,
1775- targetUrl : fullUrl ,
1776- } ) ;
1818+ // Strategy: navigate to a light instagram.com page once and then XHR-fetch
1819+ // the Accounts Center path from the existing tab. This avoids letting the
1820+ // heavy Accounts Center React app load, which has been observed to wedge
1821+ // the remote browser runtime after 1-2 navigations. Cookies on the
1822+ // instagram.com domain cover accountscenter.instagram.com (subdomain).
1823+ const alreadyOnInstagram = await page . evaluate (
1824+ `(() => { try { return location.hostname.endsWith('instagram.com'); } catch (_) { return false; } })()`
1825+ ) . catch ( ( ) => false ) ;
1826+ if ( ! alreadyOnInstagram ) {
1827+ const reachable = await safeGoto ( 'https://www.instagram.com/' ) ;
1828+ if ( ! reachable ) {
1829+ if ( traceSection ) {
1830+ await setCollectorTraceSection ( traceSection , {
1831+ phase : traceSection ,
1832+ method : 'accounts_center_html' ,
1833+ step : 'navigate_accounts_center' ,
1834+ status : 'error' ,
1835+ outcome : 'anchor_page_unreachable' ,
1836+ path,
1837+ targetUrl : fullUrl ,
1838+ } ) ;
1839+ }
1840+ throw new Error ( 'instagram.com anchor page could not be reached' ) ;
17771841 }
1778- throw new Error ( 'accounts center page could not be reached for ' + path ) ;
17791842 }
17801843 if ( traceSection ) {
17811844 await setCollectorTraceSection ( traceSection , {
17821845 phase : traceSection ,
17831846 method : 'accounts_center_html' ,
17841847 step : 'read_accounts_center_html' ,
1785- status : 'waiting_html ' ,
1848+ status : 'fetching_via_xhr ' ,
17861849 path,
17871850 targetUrl : fullUrl ,
17881851 } ) ;
17891852 }
1790- await page . sleep ( 1500 ) ;
1791- // Previously: `await page.evaluate('document.documentElement.outerHTML')` —
1792- // returning the FULL Accounts Center HTML (often multi-MB) across the
1793- // remote browser boundary. Large payloads are the documented cause of
1794- // runtime wedges (docs/2026-04-17-instagram-runtime-root-cause-plan.md)
1795- // and the motivation for the closed #168. We now project inside the page:
1796- // extract the data-sjs script bodies and shallow fb_dtsg/lsd/jazoest
1797- // tokens from the meta HTML, then stringify the HTML back on the host
1798- // only if downstream parsers need it. Most callers immediately run
1799- // `extractDataSjsBlocks(html)` — we return a preparsed blob so the host
1800- // never sees the raw document.
1801- const shrunk = await page . evaluate ( `
1802- (() => {
1803- try {
1804- const scripts = Array.from(document.querySelectorAll('script[type="application/json"][data-sjs]'));
1805- const payloads = [];
1806- for (const el of scripts) {
1807- const text = el.textContent || '';
1808- if (!text.includes('fxcal_settings')) continue;
1809- payloads.push(text);
1853+ // Fetch the Accounts Center page as raw HTML via XHR. Parse data-sjs
1854+ // scripts inside page so we only return the small payloads we need.
1855+ let shrunk ;
1856+ try {
1857+ shrunk = await page . evaluate ( `
1858+ (async () => {
1859+ try {
1860+ const res = await fetch(${ JSON . stringify ( fullUrl ) } , {
1861+ credentials: 'include',
1862+ headers: {
1863+ 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1864+ 'sec-fetch-mode': 'navigate',
1865+ 'sec-fetch-site': 'same-site',
1866+ 'sec-fetch-dest': 'document',
1867+ },
1868+ });
1869+ if (!res.ok) return { error: 'xhr status ' + res.status };
1870+ const html = await res.text();
1871+ const payloads = [];
1872+ const re = /<script type="application\\/json"[^>]*data-sjs[^>]*>([\\s\\S]*?)<\\/script>/g;
1873+ let m;
1874+ while ((m = re.exec(html)) !== null) {
1875+ if (!m[1].includes('fxcal_settings')) continue;
1876+ payloads.push(m[1]);
1877+ }
1878+ const metaSlice = html.slice(0, 80000);
1879+ return { payloads, metaSlice, scriptCount: payloads.length, htmlLength: html.length };
1880+ } catch (err) {
1881+ return { error: err && err.message ? err.message : String(err) };
18101882 }
1811- const fullHtml = document.documentElement.outerHTML;
1812- const metaSlice = fullHtml.slice(0, 80000);
1813- return { payloads, metaSlice, scriptCount: scripts.length };
1814- } catch (err) {
1815- return { error: err && err.message ? err.message : String(err) };
1816- }
1817- })()
1818- ` ) ;
1883+ })()
1884+ ` ) ;
1885+ } catch ( err ) {
1886+ markWedgeIfMatches ( err ) ;
1887+ throw err ;
1888+ }
1889+ if ( shrunk && shrunk . error ) {
1890+ // Fall back to full navigation once if XHR failed (auth, CORS, etc.).
1891+ const reachable = await safeGoto ( fullUrl ) ;
1892+ if ( ! reachable ) {
1893+ throw new Error ( 'accounts center page could not be reached for ' + path ) ;
1894+ }
1895+ await page . sleep ( 1500 ) ;
1896+ shrunk = await page . evaluate ( `
1897+ (() => {
1898+ try {
1899+ const scripts = Array.from(document.querySelectorAll('script[type="application/json"][data-sjs]'));
1900+ const payloads = [];
1901+ for (const el of scripts) {
1902+ const text = el.textContent || '';
1903+ if (!text.includes('fxcal_settings')) continue;
1904+ payloads.push(text);
1905+ }
1906+ const fullHtml = document.documentElement.outerHTML;
1907+ const metaSlice = fullHtml.slice(0, 80000);
1908+ return { payloads, metaSlice, scriptCount: scripts.length };
1909+ } catch (err) {
1910+ return { error: err && err.message ? err.message : String(err) };
1911+ }
1912+ })()
1913+ ` ) ;
1914+ }
18191915 const payloads = ( shrunk && shrunk . payloads ) || [ ] ;
18201916 const metaSlice = ( shrunk && shrunk . metaSlice ) || '' ;
18211917 // Reconstruct a minimal "html" that extractDataSjsBlocks + extractMetaTokens
0 commit comments