diff --git a/clis/chatgptweb/image.js b/clis/chatgptweb/image.js new file mode 100644 index 000000000..1f8545928 --- /dev/null +++ b/clis/chatgptweb/image.js @@ -0,0 +1,97 @@ +import * as os from 'node:os'; +import * as path from 'node:path'; +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { saveBase64ToFile } from '@jackwener/opencli/utils'; +import { getChatGPTVisibleImageUrls, sendChatGPTMessage, waitForChatGPTImages, getChatGPTImageAssets } from './utils.js'; + +const CHATGPT_DOMAIN = 'chatgpt.com'; + +function extFromMime(mime) { + if (mime.includes('png')) return '.png'; + if (mime.includes('webp')) return '.webp'; + if (mime.includes('gif')) return '.gif'; + return '.jpg'; +} + +function normalizeBooleanFlag(value) { + if (typeof value === 'boolean') return value; + const normalized = String(value ?? '').trim().toLowerCase(); + return normalized === 'true' || normalized === '1' || normalized === 'yes' || normalized === 'on'; +} + +function displayPath(filePath) { + const home = os.homedir(); + return filePath.startsWith(home) ? `~${filePath.slice(home.length)}` : filePath; +} + +async function currentChatGPTLink(page) { + const url = await page.evaluate('window.location.href').catch(() => ''); + return typeof url === 'string' && url ? url : 'https://chatgpt.com'; +} + +export const imageCommand = cli({ + site: 'chatgptweb', + name: 'image', + description: 'Generate images with ChatGPT web and save them locally', + domain: CHATGPT_DOMAIN, + strategy: Strategy.COOKIE, + browser: true, + navigateBefore: false, + defaultFormat: 'plain', + timeoutSeconds: 240, + args: [ + { name: 'prompt', positional: true, required: true, help: 'Image prompt to send to ChatGPT' }, + { name: 'op', default: path.join(os.homedir(), 'Pictures', 'chatgpt'), help: 'Output directory' }, + { name: 'sd', type: 'boolean', default: false, help: 'Skip download shorthand; only show ChatGPT link' }, + ], + columns: ['status', 'file', 'link'], + func: async (page, kwargs) => { + const prompt = kwargs.prompt; + const outputDir = kwargs.op || path.join(os.homedir(), 'Pictures', 'chatgpt'); + const skipDownloadRaw = kwargs.sd; + const skipDownload = skipDownloadRaw === '' || skipDownloadRaw === true || normalizeBooleanFlag(skipDownloadRaw); + const timeout = 120; + + // Navigate to chatgpt.com/new with full reload to clear React sidebar state + await page.goto(`https://${CHATGPT_DOMAIN}/new`, { settleMs: 2000 }); + + const beforeUrls = await getChatGPTVisibleImageUrls(page); + + // Send the image generation prompt - must be explicit + const sent = await sendChatGPTMessage(page, `Generate an image of: ${prompt}`); + if (!sent) { + return [{ status: 'โš ๏ธ send-failed', file: '๐Ÿ“ -', link: `๐Ÿ”— ${await currentChatGPTLink(page)}` }]; + } + + // Wait for response and images + const urls = await waitForChatGPTImages(page, beforeUrls, timeout); + const link = await currentChatGPTLink(page); + + if (!urls.length) { + return [{ status: 'โš ๏ธ no-images', file: '๐Ÿ“ -', link: `๐Ÿ”— ${link}` }]; + } + + if (skipDownload) { + return [{ status: '๐ŸŽจ generated', file: '๐Ÿ“ -', link: `๐Ÿ”— ${link}` }]; + } + + // Export and save images + const assets = await getChatGPTImageAssets(page, urls); + if (!assets.length) { + return [{ status: 'โš ๏ธ export-failed', file: '๐Ÿ“ -', link: `๐Ÿ”— ${link}` }]; + } + + const stamp = Date.now(); + const results = []; + for (let index = 0; index < assets.length; index += 1) { + const asset = assets[index]; + const base64 = asset.dataUrl.replace(/^data:[^;]+;base64,/, ''); + const suffix = assets.length > 1 ? `_${index + 1}` : ''; + const ext = extFromMime(asset.mimeType); + const filePath = path.join(outputDir, `chatgpt_${stamp}${suffix}${ext}`); + await saveBase64ToFile(base64, filePath); + results.push({ status: 'โœ… saved', file: `๐Ÿ“ ${displayPath(filePath)}`, link: `๐Ÿ”— ${link}` }); + } + return results; + }, +}); diff --git a/clis/chatgptweb/utils.js b/clis/chatgptweb/utils.js new file mode 100644 index 000000000..c20617c18 --- /dev/null +++ b/clis/chatgptweb/utils.js @@ -0,0 +1,297 @@ +/** + * ChatGPT web browser automation helpers for image generation. + * Cross-platform: works on Linux/macOS/Windows via OpenCLI's CDP browser automation. + */ + +export const CHATGPT_DOMAIN = 'chatgpt.com'; +export const CHATGPT_URL = 'https://chatgpt.com'; + +// Selectors +const COMPOSER_SELECTOR = '[aria-label="Chat with ChatGPT"]'; +const SEND_BTN_SELECTOR = 'button[aria-label="Send prompt"]'; + +function buildComposerLocatorScript() { + const selectorsJson = JSON.stringify([COMPOSER_SELECTOR]); + const markerAttr = 'data-opencli-chatgpt-composer'; + return ` + const isVisible = (el) => { + if (!(el instanceof HTMLElement)) return false; + const style = window.getComputedStyle(el); + if (style.display === 'none' || style.visibility === 'hidden') return false; + const rect = el.getBoundingClientRect(); + return rect.width > 0 && rect.height > 0; + }; + + const markerAttr = ${JSON.stringify(markerAttr)}; + const clearMarkers = (active) => { + document.querySelectorAll('[' + markerAttr + ']').forEach(node => { + if (node !== active) node.removeAttribute(markerAttr); + }); + }; + + const findComposer = () => { + const marked = document.querySelector('[' + markerAttr + '="1"]'); + if (marked instanceof HTMLElement && isVisible(marked)) return marked; + + for (const selector of ${JSON.stringify([COMPOSER_SELECTOR])}) { + const node = Array.from(document.querySelectorAll(selector)).find(c => c instanceof HTMLElement && isVisible(c)); + if (node instanceof HTMLElement) { + node.setAttribute(markerAttr, '1'); + return node; + } + } + return null; + }; + + findComposer.toString = () => 'findComposer'; + return { findComposer, markerAttr }; + `; +} + +/** + * Send a message to the ChatGPT composer and submit it. + * Returns true if the message was sent successfully. + */ +export async function sendChatGPTMessage(page, text) { + // Close sidebar if open (it can cover the chat composer) + await page.evaluate(` + (() => { + const closeBtn = Array.from(document.querySelectorAll('button')).find(b => b.getAttribute('aria-label') === 'Close sidebar'); + if (closeBtn) closeBtn.click(); + })() + `); + await page.wait(0.5); + + // Wait for composer to be ready and use Playwright's type() + await page.wait(1.5); + + const typeResult = await page.evaluate(` + (() => { + ${buildComposerLocatorScript()} + const composer = findComposer(); + if (!composer) return false; + composer.focus(); + composer.textContent = ''; + return true; + })() + `); + + if (!typeResult) return false; + + // Use page.type() which is Playwright's native method + try { + if (page.nativeType) { + await page.nativeType(text); + } else { + throw new Error('nativeType unavailable'); + } + } catch (e) { + // Fallback: use execCommand + await page.evaluate(` + (() => { + const composer = document.querySelector('[aria-label="Chat with ChatGPT"]'); + if (!composer) return; + composer.focus(); + document.execCommand('insertText', false, ${JSON.stringify(text)}); + })() + `); + } + + // Wait for send button to appear (it only shows when there's text) + await page.wait(1.5); + + // Click send button + const sent = await page.evaluate(` + (() => { + const btns = Array.from(document.querySelectorAll('button')); + const sendBtn = btns.find(b => b.getAttribute('aria-label') === 'Send prompt'); + return { sendBtnFound: !!sendBtn }; + })() + `); + + if (!sent || !sent.sendBtnFound) { + return false; + } + + await page.evaluate(` + (() => { + const sendBtn = Array.from(document.querySelectorAll('button')).find(b => b.getAttribute('aria-label') === 'Send prompt'); + if (sendBtn) sendBtn.click(); + })() + `); + return true; +} + +/** + * Check if ChatGPT is still generating a response. + */ +export async function isGenerating(page) { + return await page.evaluate(` + (() => { + return Array.from(document.querySelectorAll('button')).some(b => { + const label = b.getAttribute('aria-label') || ''; + return label === 'Stop generating' || label.includes('Thinking'); + }); + })() + `); +} + +/** + * Get visible image URLs from the ChatGPT page (excluding profile/avatar images). + */ +export async function getChatGPTVisibleImageUrls(page) { + return await page.evaluate(` + (() => { + const isVisible = (el) => { + if (!(el instanceof HTMLElement)) return false; + const style = window.getComputedStyle(el); + if (style.display === 'none' || style.visibility === 'hidden') return false; + const rect = el.getBoundingClientRect(); + return rect.width > 32 && rect.height > 32; + }; + + const imgs = Array.from(document.querySelectorAll('img')).filter(img => + img instanceof HTMLImageElement && isVisible(img) + ); + + const urls = []; + const seen = new Set(); + + for (const img of imgs) { + const src = img.currentSrc || img.src || ''; + const alt = (img.getAttribute('alt') || '').toLowerCase(); + const cls = (img.className || '').toLowerCase(); + const width = img.naturalWidth || img.width || 0; + const height = img.naturalHeight || img.height || 0; + + if (!src) continue; + if (alt.includes('avatar') || alt.includes('profile') || alt.includes('logo') || alt.includes('icon')) continue; + if (cls.includes('avatar') || cls.includes('profile') || cls.includes('icon')) continue; + if (width < 128 && height < 128) continue; + if (seen.has(src)) continue; + + seen.add(src); + urls.push(src); + } + return urls; + })() + `); +} + +/** + * Wait for new images to appear after sending a prompt. + */ +export async function waitForChatGPTImages(page, beforeUrls, timeoutSeconds) { + const beforeSet = new Set(beforeUrls); + const pollIntervalSeconds = 3; + const maxPolls = Math.max(1, Math.ceil(timeoutSeconds / pollIntervalSeconds)); + let lastUrls = []; + let stableCount = 0; + + for (let i = 0; i < maxPolls; i++) { + await page.wait(i === 0 ? 3 : pollIntervalSeconds); + + // Check if still generating + const generating = await isGenerating(page); + if (generating) continue; + + const urls = (await getChatGPTVisibleImageUrls(page)).filter(url => !beforeSet.has(url)); + if (urls.length === 0) continue; + + const key = urls.join('\n'); + const prevKey = lastUrls.join('\n'); + if (key === prevKey) { + stableCount += 1; + } else { + lastUrls = urls; + stableCount = 1; + } + + if (stableCount >= 2 || i === maxPolls - 1) { + return lastUrls; + } + } + return lastUrls; +} + +/** + * Export images by URL: fetch from ChatGPT backend API and convert to base64 data URLs. + */ +export async function getChatGPTImageAssets(page, urls) { + const urlsJson = JSON.stringify(urls); + return await page.evaluate(` + (async (targetUrls) => { + const blobToDataUrl = (blob) => new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onloadend = () => resolve(String(reader.result || '')); + reader.onerror = () => reject(new Error('Failed to read blob')); + reader.readAsDataURL(blob); + }); + + const inferMime = (value, fallbackUrl) => { + if (value) return value; + const lower = String(fallbackUrl || '').toLowerCase(); + if (lower.includes('.png')) return 'image/png'; + if (lower.includes('.webp')) return 'image/webp'; + if (lower.includes('.gif')) return 'image/gif'; + return 'image/jpeg'; + }; + + const results = []; + + for (const targetUrl of targetUrls) { + let dataUrl = ''; + let mimeType = 'image/jpeg'; + let width = 0; + let height = 0; + + // Try to find the img element for size info + const img = Array.from(document.querySelectorAll('img')).find(el => + (el.currentSrc || el.src || '') === targetUrl + ); + if (img) { + width = img.naturalWidth || img.width || 0; + height = img.naturalHeight || img.height || 0; + } + + try { + if (String(targetUrl).startsWith('data:')) { + dataUrl = String(targetUrl); + mimeType = (String(targetUrl).match(/^data:([^;]+);/i) || [])[1] || 'image/png'; + } else { + // Try to fetch via CORS from the page's origin + const res = await fetch(targetUrl, { credentials: 'include' }); + if (res.ok) { + const blob = await res.blob(); + mimeType = inferMime(blob.type, targetUrl); + dataUrl = await blobToDataUrl(blob); + } + } + } catch (e) { + // If fetch fails (CORS), try canvas approach via img element + } + + // Fallback: draw img to canvas + if (!dataUrl && img && img instanceof HTMLImageElement) { + try { + const canvas = document.createElement('canvas'); + canvas.width = img.naturalWidth || img.width || 512; + canvas.height = img.naturalHeight || img.height || 512; + const ctx = canvas.getContext('2d'); + if (ctx) { + ctx.drawImage(img, 0, 0); + dataUrl = canvas.toDataURL('image/png'); + mimeType = 'image/png'; + } + } catch (e) { } + } + + if (dataUrl) { + results.push({ url: String(targetUrl), dataUrl, mimeType, width, height }); + } + } + + return results; + })(${urlsJson}) + `, urls); +} diff --git a/docs/adapters/browser/chatgptweb.md b/docs/adapters/browser/chatgptweb.md new file mode 100644 index 000000000..1f09bf67c --- /dev/null +++ b/docs/adapters/browser/chatgptweb.md @@ -0,0 +1,49 @@ +# ChatGPT Web + +**Mode**: ๐Ÿ” Browser ยท **Domain**: `chatgpt.com` + +## Commands + +| Command | Description | +|---------|-------------| +| `opencli chatgptweb image ` | Generate images in ChatGPT web and optionally save them locally | + +## Usage Examples + +```bash +# Generate an image and save it to the default directory +opencli chatgptweb image "a cyberpunk city at night" + +# Save to a custom output directory +opencli chatgptweb image "a robot sketching on paper" --op ~/Downloads/chatgpt-images + +# Only generate in ChatGPT and print the conversation link +opencli chatgptweb image "a tiny watercolor fox" --sd true +``` + +## Options + +| Option | Description | +|--------|-------------| +| `prompt` | Image prompt to send (required positional argument) | +| `--op` | Output directory for downloaded images (default: `~/Pictures/chatgpt`) | +| `--sd` | Skip download and only print the ChatGPT conversation link | + +## Behavior + +- The command opens a fresh `chatgpt.com/new` page before sending the prompt. +- Output is plain `status / file / link`, not a markdown table. +- When `--sd` is enabled, the command does not download files and only prints the ChatGPT link. +- Downloaded files are named with a timestamp to avoid overwriting prior runs. + +## Prerequisites + +- Chrome is running +- You are already logged into `chatgpt.com` +- [Browser Bridge extension](/guide/browser-bridge) is installed + +## Caveats + +- This adapter targets the ChatGPT web UI, not the macOS desktop app. +- It depends on the current browser session and can fail if ChatGPT shows login, challenge, quota, or other gating UI. +- DOM or product changes on ChatGPT can break composer detection, image detection, or export behavior.