Skip to content

Commit 97f88c6

Browse files
author
Miriad
committed
Merge feat/notebooklm-ts-client into dev — pure TS NotebookLM client replaces Python CLI wrapper (2,009 lines, 0 Python deps)
2 parents 5bfa0e0 + e1c6d66 commit 97f88c6

File tree

7 files changed

+1736
-554
lines changed

7 files changed

+1736
-554
lines changed

app/api/cron/ingest/route.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ function buildPrompt(trends: TrendResult[], research?: ResearchPayload): string
152152
}
153153
}
154154

155-
if (research.infographicPath) {
155+
if (research.infographicUrl) {
156156
researchContext += `\n### Infographic Available\nAn infographic has been generated for this topic. Use sceneType "narration" with bRollUrl pointing to the infographic for at least one scene.\n`;
157157
}
158158
}

lib/services/notebooklm/auth.ts

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
/**
2+
* NotebookLM TypeScript Client — Authentication
3+
*
4+
* Reads Google cookies from the NOTEBOOKLM_AUTH_JSON env var (Playwright
5+
* storage state format), fetches the NotebookLM homepage to extract CSRF
6+
* and session tokens, and returns an AuthTokens object for RPC calls.
7+
*
8+
* @module lib/services/notebooklm/auth
9+
*/
10+
11+
import type { AuthTokens, NotebookLMCookie } from './types';
12+
import { fetchWithTimeout } from './rpc';
13+
14+
// ---------------------------------------------------------------------------
15+
// Constants
16+
// ---------------------------------------------------------------------------
17+
18+
const NOTEBOOKLM_HOME = 'https://notebooklm.google.com/';
19+
20+
const ALLOWED_DOMAINS = [
21+
'.google.com',
22+
'notebooklm.google.com',
23+
'.googleusercontent.com',
24+
];
25+
26+
const CSRF_REGEX = /"SNlM0e"\s*:\s*"([^"]+)"/;
27+
const SESSION_REGEX = /"FdrFJe"\s*:\s*"([^"]+)"/;
28+
29+
/** Default timeout for the initial auth fetch (ms) */
30+
const AUTH_FETCH_TIMEOUT_MS = 30_000;
31+
32+
// ---------------------------------------------------------------------------
33+
// Helpers
34+
// ---------------------------------------------------------------------------
35+
36+
/**
37+
* Check if a cookie domain is in the allowed list.
38+
*/
39+
function isAllowedDomain(domain: string): boolean {
40+
const normalized = domain.startsWith('.') ? domain : `.${domain}`;
41+
return ALLOWED_DOMAINS.some(
42+
(allowed) =>
43+
normalized === allowed ||
44+
normalized.endsWith(allowed) ||
45+
domain === allowed.replace(/^\./, '')
46+
);
47+
}
48+
49+
/**
50+
* Parse the Playwright storage state JSON from the env var.
51+
*/
52+
function parseCookiesFromEnv(): Record<string, string> {
53+
const authJson = process.env.NOTEBOOKLM_AUTH_JSON;
54+
if (!authJson) {
55+
throw new Error(
56+
'[NotebookLM] NOTEBOOKLM_AUTH_JSON env var is not set. ' +
57+
'Set it to a Playwright storage state JSON with Google cookies.'
58+
);
59+
}
60+
61+
let storageState: { cookies?: NotebookLMCookie[] };
62+
try {
63+
storageState = JSON.parse(authJson) as { cookies?: NotebookLMCookie[] };
64+
} catch {
65+
throw new Error(
66+
'[NotebookLM] NOTEBOOKLM_AUTH_JSON is not valid JSON. ' +
67+
'Expected Playwright storage state format: {"cookies": [...]}'
68+
);
69+
}
70+
71+
const rawCookies = storageState.cookies;
72+
if (!Array.isArray(rawCookies) || rawCookies.length === 0) {
73+
throw new Error(
74+
'[NotebookLM] No cookies found in NOTEBOOKLM_AUTH_JSON. ' +
75+
'Expected format: {"cookies": [{"name": "SID", "value": "...", "domain": ".google.com"}, ...]}'
76+
);
77+
}
78+
79+
// Filter to allowed domains and deduplicate (last wins)
80+
const cookies: Record<string, string> = {};
81+
for (const cookie of rawCookies) {
82+
if (
83+
cookie.name &&
84+
cookie.value &&
85+
cookie.domain &&
86+
isAllowedDomain(cookie.domain)
87+
) {
88+
cookies[cookie.name] = cookie.value;
89+
}
90+
}
91+
92+
if (!cookies['SID']) {
93+
throw new Error(
94+
'[NotebookLM] SID cookie not found in NOTEBOOKLM_AUTH_JSON. ' +
95+
'The Google auth cookies may be missing or from the wrong domain.'
96+
);
97+
}
98+
99+
return cookies;
100+
}
101+
102+
/**
103+
* Build a Cookie header string from a cookies record.
104+
*/
105+
function buildCookieHeader(cookies: Record<string, string>): string {
106+
return Object.entries(cookies)
107+
.map(([name, value]) => `${name}=${value}`)
108+
.join('; ');
109+
}
110+
111+
// ---------------------------------------------------------------------------
112+
// Main auth function
113+
// ---------------------------------------------------------------------------
114+
115+
/**
116+
* Initialize authentication for the NotebookLM client.
117+
*
118+
* 1. Reads cookies from NOTEBOOKLM_AUTH_JSON env var
119+
* 2. Fetches the NotebookLM homepage to extract CSRF + session tokens
120+
* 3. Returns AuthTokens for use in RPC calls
121+
*
122+
* @throws Error if cookies are missing, auth is expired, or tokens can't be extracted
123+
*/
124+
export async function initAuth(): Promise<AuthTokens> {
125+
console.log('[NotebookLM] Initializing authentication...');
126+
127+
// Step 1: Parse cookies from env
128+
const cookies = parseCookiesFromEnv();
129+
const cookieHeader = buildCookieHeader(cookies);
130+
131+
console.log(
132+
`[NotebookLM] Found ${Object.keys(cookies).length} cookies from allowed domains`
133+
);
134+
135+
// Step 2: Fetch NotebookLM homepage to get CSRF and session tokens
136+
let html: string;
137+
let finalUrl: string;
138+
139+
try {
140+
const response = await fetchWithTimeout(
141+
NOTEBOOKLM_HOME,
142+
{
143+
method: 'GET',
144+
headers: {
145+
Cookie: cookieHeader,
146+
'User-Agent':
147+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
148+
},
149+
redirect: 'follow',
150+
},
151+
AUTH_FETCH_TIMEOUT_MS
152+
);
153+
154+
finalUrl = response.url;
155+
html = await response.text();
156+
} catch (error: unknown) {
157+
if (error instanceof Error && error.name === 'AbortError') {
158+
throw new Error(
159+
'[NotebookLM] Auth fetch timed out after ' +
160+
`${AUTH_FETCH_TIMEOUT_MS}ms. Check network connectivity.`
161+
);
162+
}
163+
throw new Error(
164+
`[NotebookLM] Failed to fetch NotebookLM homepage: ${error instanceof Error ? error.message : String(error)}`
165+
);
166+
}
167+
168+
// Step 3: Check for auth redirect (Google login page)
169+
if (
170+
finalUrl.includes('accounts.google.com') ||
171+
finalUrl.includes('/signin') ||
172+
finalUrl.includes('ServiceLogin')
173+
) {
174+
throw new Error(
175+
'[NotebookLM] Authentication expired — redirected to Google login page. ' +
176+
'Update NOTEBOOKLM_AUTH_JSON with fresh cookies from a logged-in browser session.'
177+
);
178+
}
179+
180+
// Step 4: Extract CSRF token
181+
const csrfMatch = html.match(CSRF_REGEX);
182+
if (!csrfMatch || !csrfMatch[1]) {
183+
throw new Error(
184+
'[NotebookLM] Could not extract CSRF token (SNlM0e) from NotebookLM page. ' +
185+
'The page structure may have changed, or auth cookies may be invalid.'
186+
);
187+
}
188+
const csrfToken = csrfMatch[1];
189+
190+
// Step 5: Extract session ID
191+
const sessionMatch = html.match(SESSION_REGEX);
192+
if (!sessionMatch || !sessionMatch[1]) {
193+
throw new Error(
194+
'[NotebookLM] Could not extract session ID (FdrFJe) from NotebookLM page. ' +
195+
'The page structure may have changed, or auth cookies may be invalid.'
196+
);
197+
}
198+
const sessionId = sessionMatch[1];
199+
200+
console.log('[NotebookLM] Authentication initialized successfully');
201+
202+
return {
203+
cookies,
204+
cookieHeader,
205+
csrfToken,
206+
sessionId,
207+
};
208+
}

0 commit comments

Comments
 (0)