Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions clis/chatgptweb/image.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import * as os from 'node:os';
import * as path from 'node:path';
import { cli, Strategy } from '@jackwener/opencli/registry';
import { saveBase64ToFile } from '@jackwener/opencli/utils';
import { getChatGPTVisibleImageUrls, sendChatGPTMessage, waitForChatGPTImages, getChatGPTImageAssets } from './utils.js';

const CHATGPT_DOMAIN = 'chatgpt.com';

function extFromMime(mime) {
if (mime.includes('png')) return '.png';
if (mime.includes('webp')) return '.webp';
if (mime.includes('gif')) return '.gif';
return '.jpg';
}

function normalizeBooleanFlag(value) {
if (typeof value === 'boolean') return value;
const normalized = String(value ?? '').trim().toLowerCase();
return normalized === 'true' || normalized === '1' || normalized === 'yes' || normalized === 'on';
}

function displayPath(filePath) {
const home = os.homedir();
return filePath.startsWith(home) ? `~${filePath.slice(home.length)}` : filePath;
}

async function currentChatGPTLink(page) {
const url = await page.evaluate('window.location.href').catch(() => '');
return typeof url === 'string' && url ? url : 'https://chatgpt.com';
}

export const imageCommand = cli({
site: 'chatgptweb',
name: 'image',
description: 'Generate images with ChatGPT web and save them locally',
domain: CHATGPT_DOMAIN,
strategy: Strategy.COOKIE,
browser: true,
navigateBefore: false,
defaultFormat: 'plain',
timeoutSeconds: 240,
args: [
{ name: 'prompt', positional: true, required: true, help: 'Image prompt to send to ChatGPT' },
{ name: 'op', default: path.join(os.homedir(), 'Pictures', 'chatgpt'), help: 'Output directory' },
{ name: 'sd', type: 'boolean', default: false, help: 'Skip download shorthand; only show ChatGPT link' },
],
columns: ['status', 'file', 'link'],
func: async (page, kwargs) => {
const prompt = kwargs.prompt;
const outputDir = kwargs.op || path.join(os.homedir(), 'Pictures', 'chatgpt');
const skipDownloadRaw = kwargs.sd;
const skipDownload = skipDownloadRaw === '' || skipDownloadRaw === true || normalizeBooleanFlag(skipDownloadRaw);
const timeout = 120;

// Navigate to chatgpt.com/new with full reload to clear React sidebar state
await page.goto(`https://${CHATGPT_DOMAIN}/new`, { settleMs: 2000 });

const beforeUrls = await getChatGPTVisibleImageUrls(page);

// Send the image generation prompt - must be explicit
const sent = await sendChatGPTMessage(page, `Generate an image of: ${prompt}`);
if (!sent) {
return [{ status: '⚠️ send-failed', file: '📁 -', link: `🔗 ${await currentChatGPTLink(page)}` }];
}

// Wait for response and images
const urls = await waitForChatGPTImages(page, beforeUrls, timeout);
const link = await currentChatGPTLink(page);

if (!urls.length) {
return [{ status: '⚠️ no-images', file: '📁 -', link: `🔗 ${link}` }];
}

if (skipDownload) {
return [{ status: '🎨 generated', file: '📁 -', link: `🔗 ${link}` }];
}

// Export and save images
const assets = await getChatGPTImageAssets(page, urls);
if (!assets.length) {
return [{ status: '⚠️ export-failed', file: '📁 -', link: `🔗 ${link}` }];
}

const stamp = Date.now();
const results = [];
for (let index = 0; index < assets.length; index += 1) {
const asset = assets[index];
const base64 = asset.dataUrl.replace(/^data:[^;]+;base64,/, '');
const suffix = assets.length > 1 ? `_${index + 1}` : '';
const ext = extFromMime(asset.mimeType);
const filePath = path.join(outputDir, `chatgpt_${stamp}${suffix}${ext}`);
await saveBase64ToFile(base64, filePath);
results.push({ status: '✅ saved', file: `📁 ${displayPath(filePath)}`, link: `🔗 ${link}` });
}
return results;
},
});
297 changes: 297 additions & 0 deletions clis/chatgptweb/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,297 @@
/**
* ChatGPT web browser automation helpers for image generation.
* Cross-platform: works on Linux/macOS/Windows via OpenCLI's CDP browser automation.
*/

export const CHATGPT_DOMAIN = 'chatgpt.com';
export const CHATGPT_URL = 'https://chatgpt.com';

// Selectors
const COMPOSER_SELECTOR = '[aria-label="Chat with ChatGPT"]';
const SEND_BTN_SELECTOR = 'button[aria-label="Send prompt"]';

function buildComposerLocatorScript() {
const selectorsJson = JSON.stringify([COMPOSER_SELECTOR]);
const markerAttr = 'data-opencli-chatgpt-composer';
return `
const isVisible = (el) => {
if (!(el instanceof HTMLElement)) return false;
const style = window.getComputedStyle(el);
if (style.display === 'none' || style.visibility === 'hidden') return false;
const rect = el.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
};

const markerAttr = ${JSON.stringify(markerAttr)};
const clearMarkers = (active) => {
document.querySelectorAll('[' + markerAttr + ']').forEach(node => {
if (node !== active) node.removeAttribute(markerAttr);
});
};

const findComposer = () => {
const marked = document.querySelector('[' + markerAttr + '="1"]');
if (marked instanceof HTMLElement && isVisible(marked)) return marked;

for (const selector of ${JSON.stringify([COMPOSER_SELECTOR])}) {
const node = Array.from(document.querySelectorAll(selector)).find(c => c instanceof HTMLElement && isVisible(c));
if (node instanceof HTMLElement) {
node.setAttribute(markerAttr, '1');
return node;
}
}
return null;
};

findComposer.toString = () => 'findComposer';
return { findComposer, markerAttr };
`;
}

/**
* Send a message to the ChatGPT composer and submit it.
* Returns true if the message was sent successfully.
*/
export async function sendChatGPTMessage(page, text) {
// Close sidebar if open (it can cover the chat composer)
await page.evaluate(`
(() => {
const closeBtn = Array.from(document.querySelectorAll('button')).find(b => b.getAttribute('aria-label') === 'Close sidebar');
if (closeBtn) closeBtn.click();
})()
`);
await page.wait(0.5);

// Wait for composer to be ready and use Playwright's type()
await page.wait(1.5);

const typeResult = await page.evaluate(`
(() => {
${buildComposerLocatorScript()}
const composer = findComposer();
if (!composer) return false;
composer.focus();
composer.textContent = '';
return true;
})()
`);

if (!typeResult) return false;

// Use page.type() which is Playwright's native method
try {
if (page.nativeType) {
await page.nativeType(text);
} else {
throw new Error('nativeType unavailable');
}
} catch (e) {
// Fallback: use execCommand
await page.evaluate(`
(() => {
const composer = document.querySelector('[aria-label="Chat with ChatGPT"]');
if (!composer) return;
composer.focus();
document.execCommand('insertText', false, ${JSON.stringify(text)});
})()
`);
}

// Wait for send button to appear (it only shows when there's text)
await page.wait(1.5);

// Click send button
const sent = await page.evaluate(`
(() => {
const btns = Array.from(document.querySelectorAll('button'));
const sendBtn = btns.find(b => b.getAttribute('aria-label') === 'Send prompt');
return { sendBtnFound: !!sendBtn };
})()
`);

if (!sent || !sent.sendBtnFound) {
return false;
}

await page.evaluate(`
(() => {
const sendBtn = Array.from(document.querySelectorAll('button')).find(b => b.getAttribute('aria-label') === 'Send prompt');
if (sendBtn) sendBtn.click();
})()
`);
return true;
}

/**
* Check if ChatGPT is still generating a response.
*/
export async function isGenerating(page) {
return await page.evaluate(`
(() => {
return Array.from(document.querySelectorAll('button')).some(b => {
const label = b.getAttribute('aria-label') || '';
return label === 'Stop generating' || label.includes('Thinking');
});
})()
`);
}

/**
* Get visible image URLs from the ChatGPT page (excluding profile/avatar images).
*/
export async function getChatGPTVisibleImageUrls(page) {
return await page.evaluate(`
(() => {
const isVisible = (el) => {
if (!(el instanceof HTMLElement)) return false;
const style = window.getComputedStyle(el);
if (style.display === 'none' || style.visibility === 'hidden') return false;
const rect = el.getBoundingClientRect();
return rect.width > 32 && rect.height > 32;
};

const imgs = Array.from(document.querySelectorAll('img')).filter(img =>
img instanceof HTMLImageElement && isVisible(img)
);

const urls = [];
const seen = new Set();

for (const img of imgs) {
const src = img.currentSrc || img.src || '';
const alt = (img.getAttribute('alt') || '').toLowerCase();
const cls = (img.className || '').toLowerCase();
const width = img.naturalWidth || img.width || 0;
const height = img.naturalHeight || img.height || 0;

if (!src) continue;
if (alt.includes('avatar') || alt.includes('profile') || alt.includes('logo') || alt.includes('icon')) continue;
if (cls.includes('avatar') || cls.includes('profile') || cls.includes('icon')) continue;
if (width < 128 && height < 128) continue;
if (seen.has(src)) continue;

seen.add(src);
urls.push(src);
}
return urls;
})()
`);
}

/**
* Wait for new images to appear after sending a prompt.
*/
export async function waitForChatGPTImages(page, beforeUrls, timeoutSeconds) {
const beforeSet = new Set(beforeUrls);
const pollIntervalSeconds = 3;
const maxPolls = Math.max(1, Math.ceil(timeoutSeconds / pollIntervalSeconds));
let lastUrls = [];
let stableCount = 0;

for (let i = 0; i < maxPolls; i++) {
await page.wait(i === 0 ? 3 : pollIntervalSeconds);

// Check if still generating
const generating = await isGenerating(page);
if (generating) continue;

const urls = (await getChatGPTVisibleImageUrls(page)).filter(url => !beforeSet.has(url));
if (urls.length === 0) continue;

const key = urls.join('\n');
const prevKey = lastUrls.join('\n');
if (key === prevKey) {
stableCount += 1;
} else {
lastUrls = urls;
stableCount = 1;
}

if (stableCount >= 2 || i === maxPolls - 1) {
return lastUrls;
}
}
return lastUrls;
}

/**
* Export images by URL: fetch from ChatGPT backend API and convert to base64 data URLs.
*/
export async function getChatGPTImageAssets(page, urls) {
const urlsJson = JSON.stringify(urls);
return await page.evaluate(`
(async (targetUrls) => {
const blobToDataUrl = (blob) => new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onloadend = () => resolve(String(reader.result || ''));
reader.onerror = () => reject(new Error('Failed to read blob'));
reader.readAsDataURL(blob);
});

const inferMime = (value, fallbackUrl) => {
if (value) return value;
const lower = String(fallbackUrl || '').toLowerCase();
if (lower.includes('.png')) return 'image/png';
if (lower.includes('.webp')) return 'image/webp';
if (lower.includes('.gif')) return 'image/gif';
return 'image/jpeg';
};

const results = [];

for (const targetUrl of targetUrls) {
let dataUrl = '';
let mimeType = 'image/jpeg';
let width = 0;
let height = 0;

// Try to find the img element for size info
const img = Array.from(document.querySelectorAll('img')).find(el =>
(el.currentSrc || el.src || '') === targetUrl
);
if (img) {
width = img.naturalWidth || img.width || 0;
height = img.naturalHeight || img.height || 0;
}

try {
if (String(targetUrl).startsWith('data:')) {
dataUrl = String(targetUrl);
mimeType = (String(targetUrl).match(/^data:([^;]+);/i) || [])[1] || 'image/png';
} else {
// Try to fetch via CORS from the page's origin
const res = await fetch(targetUrl, { credentials: 'include' });
if (res.ok) {
const blob = await res.blob();
mimeType = inferMime(blob.type, targetUrl);
dataUrl = await blobToDataUrl(blob);
}
}
} catch (e) {
// If fetch fails (CORS), try canvas approach via img element
}

// Fallback: draw img to canvas
if (!dataUrl && img && img instanceof HTMLImageElement) {
try {
const canvas = document.createElement('canvas');
canvas.width = img.naturalWidth || img.width || 512;
canvas.height = img.naturalHeight || img.height || 512;
const ctx = canvas.getContext('2d');
if (ctx) {
ctx.drawImage(img, 0, 0);
dataUrl = canvas.toDataURL('image/png');
mimeType = 'image/png';
}
} catch (e) { }
}

if (dataUrl) {
results.push({ url: String(targetUrl), dataUrl, mimeType, width, height });
}
}

return results;
})(${urlsJson})
`, urls);
}
Loading