|
| 1 | +/** |
| 2 | + * NotebookLM TypeScript Client — Authentication |
| 3 | + * |
| 4 | + * Reads Google cookies from the NOTEBOOKLM_AUTH_JSON env var (Playwright |
| 5 | + * storage state format), fetches the NotebookLM homepage to extract CSRF |
| 6 | + * and session tokens, and returns an AuthTokens object for RPC calls. |
| 7 | + * |
| 8 | + * @module lib/services/notebooklm/auth |
| 9 | + */ |
| 10 | + |
| 11 | +import type { AuthTokens, NotebookLMCookie } from './types'; |
| 12 | +import { fetchWithTimeout } from './rpc'; |
| 13 | + |
| 14 | +// --------------------------------------------------------------------------- |
| 15 | +// Constants |
| 16 | +// --------------------------------------------------------------------------- |
| 17 | + |
| 18 | +const NOTEBOOKLM_HOME = 'https://notebooklm.google.com/'; |
| 19 | + |
| 20 | +const ALLOWED_DOMAINS = [ |
| 21 | + '.google.com', |
| 22 | + 'notebooklm.google.com', |
| 23 | + '.googleusercontent.com', |
| 24 | +]; |
| 25 | + |
| 26 | +const CSRF_REGEX = /"SNlM0e"\s*:\s*"([^"]+)"/; |
| 27 | +const SESSION_REGEX = /"FdrFJe"\s*:\s*"([^"]+)"/; |
| 28 | + |
| 29 | +/** Default timeout for the initial auth fetch (ms) */ |
| 30 | +const AUTH_FETCH_TIMEOUT_MS = 30_000; |
| 31 | + |
| 32 | +// --------------------------------------------------------------------------- |
| 33 | +// Helpers |
| 34 | +// --------------------------------------------------------------------------- |
| 35 | + |
| 36 | +/** |
| 37 | + * Check if a cookie domain is in the allowed list. |
| 38 | + */ |
| 39 | +function isAllowedDomain(domain: string): boolean { |
| 40 | + const normalized = domain.startsWith('.') ? domain : `.${domain}`; |
| 41 | + return ALLOWED_DOMAINS.some( |
| 42 | + (allowed) => |
| 43 | + normalized === allowed || |
| 44 | + normalized.endsWith(allowed) || |
| 45 | + domain === allowed.replace(/^\./, '') |
| 46 | + ); |
| 47 | +} |
| 48 | + |
| 49 | +/** |
| 50 | + * Parse the Playwright storage state JSON from the env var. |
| 51 | + */ |
| 52 | +function parseCookiesFromEnv(): Record<string, string> { |
| 53 | + const authJson = process.env.NOTEBOOKLM_AUTH_JSON; |
| 54 | + if (!authJson) { |
| 55 | + throw new Error( |
| 56 | + '[NotebookLM] NOTEBOOKLM_AUTH_JSON env var is not set. ' + |
| 57 | + 'Set it to a Playwright storage state JSON with Google cookies.' |
| 58 | + ); |
| 59 | + } |
| 60 | + |
| 61 | + let storageState: { cookies?: NotebookLMCookie[] }; |
| 62 | + try { |
| 63 | + storageState = JSON.parse(authJson) as { cookies?: NotebookLMCookie[] }; |
| 64 | + } catch { |
| 65 | + throw new Error( |
| 66 | + '[NotebookLM] NOTEBOOKLM_AUTH_JSON is not valid JSON. ' + |
| 67 | + 'Expected Playwright storage state format: {"cookies": [...]}' |
| 68 | + ); |
| 69 | + } |
| 70 | + |
| 71 | + const rawCookies = storageState.cookies; |
| 72 | + if (!Array.isArray(rawCookies) || rawCookies.length === 0) { |
| 73 | + throw new Error( |
| 74 | + '[NotebookLM] No cookies found in NOTEBOOKLM_AUTH_JSON. ' + |
| 75 | + 'Expected format: {"cookies": [{"name": "SID", "value": "...", "domain": ".google.com"}, ...]}' |
| 76 | + ); |
| 77 | + } |
| 78 | + |
| 79 | + // Filter to allowed domains and deduplicate (last wins) |
| 80 | + const cookies: Record<string, string> = {}; |
| 81 | + for (const cookie of rawCookies) { |
| 82 | + if ( |
| 83 | + cookie.name && |
| 84 | + cookie.value && |
| 85 | + cookie.domain && |
| 86 | + isAllowedDomain(cookie.domain) |
| 87 | + ) { |
| 88 | + cookies[cookie.name] = cookie.value; |
| 89 | + } |
| 90 | + } |
| 91 | + |
| 92 | + if (!cookies['SID']) { |
| 93 | + throw new Error( |
| 94 | + '[NotebookLM] SID cookie not found in NOTEBOOKLM_AUTH_JSON. ' + |
| 95 | + 'The Google auth cookies may be missing or from the wrong domain.' |
| 96 | + ); |
| 97 | + } |
| 98 | + |
| 99 | + return cookies; |
| 100 | +} |
| 101 | + |
| 102 | +/** |
| 103 | + * Build a Cookie header string from a cookies record. |
| 104 | + */ |
| 105 | +function buildCookieHeader(cookies: Record<string, string>): string { |
| 106 | + return Object.entries(cookies) |
| 107 | + .map(([name, value]) => `${name}=${value}`) |
| 108 | + .join('; '); |
| 109 | +} |
| 110 | + |
| 111 | +// --------------------------------------------------------------------------- |
| 112 | +// Main auth function |
| 113 | +// --------------------------------------------------------------------------- |
| 114 | + |
| 115 | +/** |
| 116 | + * Initialize authentication for the NotebookLM client. |
| 117 | + * |
| 118 | + * 1. Reads cookies from NOTEBOOKLM_AUTH_JSON env var |
| 119 | + * 2. Fetches the NotebookLM homepage to extract CSRF + session tokens |
| 120 | + * 3. Returns AuthTokens for use in RPC calls |
| 121 | + * |
| 122 | + * @throws Error if cookies are missing, auth is expired, or tokens can't be extracted |
| 123 | + */ |
| 124 | +export async function initAuth(): Promise<AuthTokens> { |
| 125 | + console.log('[NotebookLM] Initializing authentication...'); |
| 126 | + |
| 127 | + // Step 1: Parse cookies from env |
| 128 | + const cookies = parseCookiesFromEnv(); |
| 129 | + const cookieHeader = buildCookieHeader(cookies); |
| 130 | + |
| 131 | + console.log( |
| 132 | + `[NotebookLM] Found ${Object.keys(cookies).length} cookies from allowed domains` |
| 133 | + ); |
| 134 | + |
| 135 | + // Step 2: Fetch NotebookLM homepage to get CSRF and session tokens |
| 136 | + let html: string; |
| 137 | + let finalUrl: string; |
| 138 | + |
| 139 | + try { |
| 140 | + const response = await fetchWithTimeout( |
| 141 | + NOTEBOOKLM_HOME, |
| 142 | + { |
| 143 | + method: 'GET', |
| 144 | + headers: { |
| 145 | + Cookie: cookieHeader, |
| 146 | + 'User-Agent': |
| 147 | + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', |
| 148 | + }, |
| 149 | + redirect: 'follow', |
| 150 | + }, |
| 151 | + AUTH_FETCH_TIMEOUT_MS |
| 152 | + ); |
| 153 | + |
| 154 | + finalUrl = response.url; |
| 155 | + html = await response.text(); |
| 156 | + } catch (error: unknown) { |
| 157 | + if (error instanceof Error && error.name === 'AbortError') { |
| 158 | + throw new Error( |
| 159 | + '[NotebookLM] Auth fetch timed out after ' + |
| 160 | + `${AUTH_FETCH_TIMEOUT_MS}ms. Check network connectivity.` |
| 161 | + ); |
| 162 | + } |
| 163 | + throw new Error( |
| 164 | + `[NotebookLM] Failed to fetch NotebookLM homepage: ${error instanceof Error ? error.message : String(error)}` |
| 165 | + ); |
| 166 | + } |
| 167 | + |
| 168 | + // Step 3: Check for auth redirect (Google login page) |
| 169 | + if ( |
| 170 | + finalUrl.includes('accounts.google.com') || |
| 171 | + finalUrl.includes('/signin') || |
| 172 | + finalUrl.includes('ServiceLogin') |
| 173 | + ) { |
| 174 | + throw new Error( |
| 175 | + '[NotebookLM] Authentication expired — redirected to Google login page. ' + |
| 176 | + 'Update NOTEBOOKLM_AUTH_JSON with fresh cookies from a logged-in browser session.' |
| 177 | + ); |
| 178 | + } |
| 179 | + |
| 180 | + // Step 4: Extract CSRF token |
| 181 | + const csrfMatch = html.match(CSRF_REGEX); |
| 182 | + if (!csrfMatch || !csrfMatch[1]) { |
| 183 | + throw new Error( |
| 184 | + '[NotebookLM] Could not extract CSRF token (SNlM0e) from NotebookLM page. ' + |
| 185 | + 'The page structure may have changed, or auth cookies may be invalid.' |
| 186 | + ); |
| 187 | + } |
| 188 | + const csrfToken = csrfMatch[1]; |
| 189 | + |
| 190 | + // Step 5: Extract session ID |
| 191 | + const sessionMatch = html.match(SESSION_REGEX); |
| 192 | + if (!sessionMatch || !sessionMatch[1]) { |
| 193 | + throw new Error( |
| 194 | + '[NotebookLM] Could not extract session ID (FdrFJe) from NotebookLM page. ' + |
| 195 | + 'The page structure may have changed, or auth cookies may be invalid.' |
| 196 | + ); |
| 197 | + } |
| 198 | + const sessionId = sessionMatch[1]; |
| 199 | + |
| 200 | + console.log('[NotebookLM] Authentication initialized successfully'); |
| 201 | + |
| 202 | + return { |
| 203 | + cookies, |
| 204 | + cookieHeader, |
| 205 | + csrfToken, |
| 206 | + sessionId, |
| 207 | + }; |
| 208 | +} |
0 commit comments