Skip to content

Commit f57ea4b

Browse files
committed
Refactor URL extraction flow and stabilize bot tests
1 parent 034dc97 commit f57ea4b

7 files changed

Lines changed: 1364 additions & 208 deletions

File tree

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
const {
2+
generateTextWithProvider,
3+
generateImageWithProvider
4+
} = require('../src/providers');
5+
6+
function jsonResponse(payload, status = 200) {
7+
return {
8+
ok: status >= 200 && status < 300,
9+
status,
10+
text: async () => JSON.stringify(payload),
11+
headers: { get: () => 'application/json' }
12+
};
13+
}
14+
15+
function imageResponse(buffer, mimeType = 'image/png', status = 200) {
16+
return {
17+
ok: status >= 200 && status < 300,
18+
status,
19+
arrayBuffer: async () => buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength),
20+
headers: { get: (name) => (String(name || '').toLowerCase() === 'content-type' ? mimeType : null) },
21+
text: async () => ''
22+
};
23+
}
24+
25+
describe('provider availability coverage', () => {
26+
beforeEach(() => {
27+
vi.restoreAllMocks();
28+
delete process.env.GEMINI_API_KEY;
29+
delete process.env.OPENAI_API_KEY;
30+
delete process.env.OPENROUTER_API_KEY;
31+
delete process.env.CLOUDFLARE_ACCOUNT_ID;
32+
delete process.env.CLOUDFLARE_API_TOKEN;
33+
delete process.env.HUGGINGFACE_INFERENCE_API_TOKEN;
34+
delete process.env.HUGGINGFACE_BASE_URL;
35+
});
36+
37+
it('supports text generation for all configured providers', async () => {
38+
process.env.GEMINI_API_KEY = 'gem-key';
39+
process.env.OPENAI_API_KEY = 'oa-key';
40+
process.env.OPENROUTER_API_KEY = 'or-key';
41+
process.env.CLOUDFLARE_ACCOUNT_ID = 'cf-acc';
42+
process.env.CLOUDFLARE_API_TOKEN = 'cf-key';
43+
process.env.HUGGINGFACE_INFERENCE_API_TOKEN = 'hf-key';
44+
45+
const fetchMock = vi.fn(async (url) => {
46+
const u = String(url);
47+
if (u.includes('generativelanguage.googleapis.com')) {
48+
return jsonResponse({
49+
candidates: [{ content: { parts: [{ text: 'gemini ok' }] } }]
50+
});
51+
}
52+
if (u === 'https://api.openai.com/v1/chat/completions') {
53+
return jsonResponse({
54+
choices: [{ message: { content: 'openai ok' } }]
55+
});
56+
}
57+
if (u === 'https://openrouter.ai/api/v1/chat/completions') {
58+
return jsonResponse({
59+
choices: [{ message: { content: 'openrouter ok' } }]
60+
});
61+
}
62+
if (u.includes('/ai/run/')) {
63+
return jsonResponse({ result: { response: 'cloudflare ok' } });
64+
}
65+
if (u.includes('router.huggingface.co/hf-inference/models/')) {
66+
return jsonResponse({ generated_text: 'huggingface ok' });
67+
}
68+
return jsonResponse({ message: `unexpected ${u}` }, 404);
69+
});
70+
global.fetch = fetchMock;
71+
72+
const runtime = { timeout_ms: 1000 };
73+
const rows = [
74+
{ provider: 'gemini', model: 'gemini-2.5-flash', expected: 'gemini ok' },
75+
{ provider: 'openai', model: 'gpt-4o-mini', expected: 'openai ok' },
76+
{ provider: 'openrouter', model: 'openai/gpt-4o-mini', expected: 'openrouter ok' },
77+
{ provider: 'cloudflare', model: '@cf/meta/llama-3.1-8b-instruct', expected: 'cloudflare ok' },
78+
{ provider: 'huggingface', model: 'mistralai/Mistral-7B-Instruct-v0.2', expected: 'huggingface ok' }
79+
];
80+
81+
for (const row of rows) {
82+
const out = await generateTextWithProvider({ provider: row.provider, model: row.model }, 'ping', runtime);
83+
expect(out).toContain(row.expected);
84+
}
85+
});
86+
87+
it('supports image generation for all configured providers', async () => {
88+
process.env.GEMINI_API_KEY = 'gem-key';
89+
process.env.OPENAI_API_KEY = 'oa-key';
90+
process.env.OPENROUTER_API_KEY = 'or-key';
91+
process.env.CLOUDFLARE_ACCOUNT_ID = 'cf-acc';
92+
process.env.CLOUDFLARE_API_TOKEN = 'cf-key';
93+
process.env.HUGGINGFACE_INFERENCE_API_TOKEN = 'hf-key';
94+
95+
const pngBytes = Buffer.from('89504e470d0a1a0a', 'hex');
96+
const fetchMock = vi.fn(async (url) => {
97+
const u = String(url);
98+
if (u.includes('generativelanguage.googleapis.com')) {
99+
return jsonResponse({
100+
candidates: [{
101+
content: { parts: [{ inlineData: { data: pngBytes.toString('base64'), mimeType: 'image/png' } }] }
102+
}]
103+
});
104+
}
105+
if (u === 'https://api.openai.com/v1/images/generations') {
106+
return jsonResponse({ data: [{ b64_json: pngBytes.toString('base64') }] });
107+
}
108+
if (u === 'https://openrouter.ai/api/v1/images/generations') {
109+
return jsonResponse({ data: [{ b64_json: pngBytes.toString('base64') }] });
110+
}
111+
if (u.includes('/ai/run/')) {
112+
return jsonResponse({ result: { image: `data:image/png;base64,${pngBytes.toString('base64')}` } });
113+
}
114+
if (u.includes('router.huggingface.co/hf-inference/models/')) {
115+
return imageResponse(pngBytes, 'image/png');
116+
}
117+
return jsonResponse({ message: `unexpected ${u}` }, 404);
118+
});
119+
global.fetch = fetchMock;
120+
121+
const runtime = { timeout_ms: 1000 };
122+
const rows = [
123+
{ provider: 'gemini', model: 'gemini-2.5-flash-image' },
124+
{ provider: 'openai', model: 'gpt-image-1' },
125+
{ provider: 'openrouter', model: 'openai/gpt-image-1' },
126+
{ provider: 'cloudflare', model: '@cf/stabilityai/stable-diffusion-xl-base-1.0' },
127+
{ provider: 'huggingface', model: 'black-forest-labs/FLUX.1-schnell' }
128+
];
129+
130+
for (const row of rows) {
131+
const out = await generateImageWithProvider({ provider: row.provider, model: row.model }, 'A test prompt', runtime);
132+
expect(Buffer.isBuffer(out.buffer)).toBe(true);
133+
expect(out.buffer.length).toBeGreaterThan(0);
134+
}
135+
});
136+
});

telegram/scripts/deploy-render-webhook.js

Lines changed: 100 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,72 @@ async function verifyHuggingFaceKey(apiKey) {
103103
if (!res.ok) throw new Error(`Hugging Face token invalid (${res.status}): ${text.slice(0, 300)}`);
104104
}
105105

106+
async function verifyFirecrawlKey(apiKey) {
107+
const { res, text } = await fetchTextWithTimeout('https://api.firecrawl.dev/v1/search', {
108+
method: 'POST',
109+
headers: {
110+
Authorization: `Bearer ${apiKey}`,
111+
'Content-Type': 'application/json'
112+
},
113+
body: JSON.stringify({
114+
query: 'example.com',
115+
limit: 1
116+
})
117+
}, 45000, 'Firecrawl auth');
118+
if (!res.ok) throw new Error(`Firecrawl key invalid (${res.status}): ${text.slice(0, 300)}`);
119+
}
120+
121+
async function verifyJinaKey(apiKey) {
122+
const { res, text } = await fetchTextWithTimeout('https://r.jina.ai/http://example.com', {
123+
method: 'GET',
124+
headers: {
125+
Authorization: `Bearer ${apiKey}`,
126+
Accept: 'text/plain'
127+
}
128+
}, 45000, 'Jina auth');
129+
if (!res.ok) throw new Error(`Jina key invalid (${res.status}): ${text.slice(0, 300)}`);
130+
}
131+
132+
async function verifyDriftbotKey(apiKey) {
133+
const query = encodeURIComponent('type:Person');
134+
const url = `https://kg.diffbot.com/kg/v3/dql?token=${encodeURIComponent(apiKey)}&query=${query}&size=1`;
135+
const { res, text } = await fetchTextWithTimeout(url, {
136+
method: 'GET',
137+
headers: { Accept: 'application/json' }
138+
}, 45000, 'Driftbot auth');
139+
if (!res.ok) throw new Error(`Driftbot key invalid (${res.status}): ${text.slice(0, 300)}`);
140+
}
141+
142+
async function verifyLlamaCloudKey(apiKey) {
143+
if (!String(apiKey || '').trim().startsWith('llx-')) {
144+
throw new Error('LlamaCloud key format invalid (expected llx-)');
145+
}
146+
const base = String(process.env.LLAMAPARSE_BASE_URL || process.env.LLAMA_CLOUD_BASE_URL || 'https://api.cloud.llamaindex.ai')
147+
.trim()
148+
.replace(/\/+$/, '');
149+
const { res, text } = await fetchTextWithTimeout(`${base}/api/v1/parsing/upload`, {
150+
method: 'POST',
151+
headers: { Authorization: `Bearer ${apiKey}` }
152+
}, 30000, 'LlamaParse auth');
153+
if (res.status === 401 || res.status === 403) {
154+
throw new Error(`LlamaParse key invalid (${res.status}): ${text.slice(0, 300)}`);
155+
}
156+
}
157+
158+
async function verifyAssemblyAiKey(apiKey) {
159+
const { res, text } = await fetchTextWithTimeout('https://api.assemblyai.com/v2/transcript', {
160+
method: 'POST',
161+
headers: {
162+
Authorization: apiKey,
163+
'Content-Type': 'application/json'
164+
},
165+
body: JSON.stringify({})
166+
}, 30000, 'AssemblyAI auth');
167+
if (res.status === 401 || res.status === 403) {
168+
throw new Error(`AssemblyAI key invalid (${res.status}): ${text.slice(0, 300)}`);
169+
}
170+
}
171+
106172
async function verifyAllProviderCredentials(providerEnv, options = {}) {
107173
const strictAll = Boolean(options.strictAll);
108174
const checks = [];
@@ -133,6 +199,31 @@ async function verifyAllProviderCredentials(providerEnv, options = {}) {
133199
enabled: Boolean(key('HUGGINGFACE_INFERENCE_API_TOKEN')),
134200
run: () => verifyHuggingFaceKey(key('HUGGINGFACE_INFERENCE_API_TOKEN'))
135201
});
202+
checks.push({
203+
name: 'firecrawl',
204+
enabled: Boolean(key('FIRECRAWL_API_KEY')),
205+
run: () => verifyFirecrawlKey(key('FIRECRAWL_API_KEY'))
206+
});
207+
checks.push({
208+
name: 'jina',
209+
enabled: Boolean(key('JINA_API_KEY')),
210+
run: () => verifyJinaKey(key('JINA_API_KEY'))
211+
});
212+
checks.push({
213+
name: 'driftbot',
214+
enabled: Boolean(key('DRIFTBOT_API_KEY')),
215+
run: () => verifyDriftbotKey(key('DRIFTBOT_API_KEY'))
216+
});
217+
checks.push({
218+
name: 'llamaparse',
219+
enabled: Boolean(key('LLAMA_CLOUD_API_KEY')),
220+
run: () => verifyLlamaCloudKey(key('LLAMA_CLOUD_API_KEY'))
221+
});
222+
checks.push({
223+
name: 'assemblyai',
224+
enabled: Boolean(key('ASSEMBLYAI_API_KEY')),
225+
run: () => verifyAssemblyAiKey(key('ASSEMBLYAI_API_KEY'))
226+
});
136227

137228
const failures = [];
138229
for (const check of checks) {
@@ -455,8 +546,10 @@ async function main() {
455546
loadEnvFiles([
456547
path.join(repoRoot, '.env.local'),
457548
path.join(repoRoot, '.env.e2e.local'),
549+
path.join(repoRoot, '.crawler'),
458550
path.join(repoRoot, 'comicbot/.env'),
459-
path.join(repoRoot, 'telegram/.env')
551+
path.join(repoRoot, 'telegram/.env'),
552+
path.join(repoRoot, 'telegram/.crawler')
460553
]);
461554

462555
const args = preArgs;
@@ -508,7 +601,12 @@ async function main() {
508601
OPENROUTER_API_KEY: firstNonEmpty(args['openrouter-key'], process.env.OPENROUTER_API_KEY),
509602
CLOUDFLARE_ACCOUNT_ID: cloudflareAccountId,
510603
CLOUDFLARE_API_TOKEN: cloudflareAiToken,
511-
HUGGINGFACE_INFERENCE_API_TOKEN: firstNonEmpty(args['huggingface-token'], process.env.HUGGINGFACE_INFERENCE_API_TOKEN)
604+
HUGGINGFACE_INFERENCE_API_TOKEN: firstNonEmpty(args['huggingface-token'], process.env.HUGGINGFACE_INFERENCE_API_TOKEN),
605+
FIRECRAWL_API_KEY: firstNonEmpty(args['firecrawl-key'], process.env.FIRECRAWL_API_KEY),
606+
JINA_API_KEY: firstNonEmpty(args['jina-key'], process.env.JINA_API_KEY),
607+
DRIFTBOT_API_KEY: firstNonEmpty(args['driftbot-key'], process.env.DRIFTBOT_API_KEY),
608+
LLAMA_CLOUD_API_KEY: firstNonEmpty(args['llama-cloud-key'], process.env.LLAMA_CLOUD_API_KEY, process.env.LLAMAPARSE_API_KEY),
609+
ASSEMBLYAI_API_KEY: firstNonEmpty(args['assemblyai-key'], process.env.ASSEMBLYAI_API_KEY)
512610
};
513611
const resolvedR2 = resolveR2Config(args, cfYaml, awsYaml);
514612
const r2Env = {

telegram/src/data/messages.js

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,36 @@
11
const BOT_DISPLAY_NAME = 'Web2Comic';
2-
const BOT_SHORT_DESCRIPTION = 'AI comic maker from text or URL.';
2+
const BOT_SHORT_DESCRIPTION = 'AI comic maker from text, URL, PDF, image, or voice.';
33
const BOT_COLD_START_NOTICE = 'I just woke up. First response may take a bit longer.';
44

55
const REPO_DOCS_BASE_URL = 'https://github.com/ApartsinProjects/Web2Comics/tree/main/telegram';
66
const PROMPT_MANUAL_URL = `${REPO_DOCS_BASE_URL}/docs/deployment-runbook.md`;
77

88
function buildOnboardingMessage(chatId, options = {}) {
99
const isAdmin = Boolean(options.isAdmin);
10+
const defaults = options && typeof options.defaults === 'object' ? options.defaults : {};
11+
const textProvider = String(defaults.textProvider || '').trim();
12+
const textModel = String(defaults.textModel || '').trim();
13+
const imageProvider = String(defaults.imageProvider || '').trim();
14+
const imageModel = String(defaults.imageModel || '').trim();
15+
const extractor = String(defaults.extractor || '').trim();
16+
const pdfExtractor = String(defaults.pdfExtractor || '').trim();
17+
const imageExtractor = String(defaults.imageExtractor || '').trim();
18+
const voiceExtractor = String(defaults.voiceExtractor || '').trim();
19+
const enrichmentProvider = String(defaults.enrichmentProvider || '').trim();
20+
const enrichmentFallback = String(defaults.enrichmentFallback || '').trim();
1021
const lines = [
1122
`Welcome to ${BOT_DISPLAY_NAME}.`,
1223
BOT_SHORT_DESCRIPTION,
1324
'',
25+
'Default stack:',
26+
`- text: ${textProvider || 'gemini'}${textModel ? `/${textModel}` : ''}`,
27+
`- image: ${imageProvider || 'gemini'}${imageModel ? `/${imageModel}` : ''}`,
28+
`- page extractor: ${extractor || 'jina'}`,
29+
`- pdf extractor: ${pdfExtractor || 'llamaparse'}`,
30+
`- image extractor: ${imageExtractor || 'gemini'}`,
31+
`- voice extractor: ${voiceExtractor || 'assemblyai'}`,
32+
`- short-story enrichment: ${enrichmentProvider || 'wikipedia'}${enrichmentFallback ? ` -> ${enrichmentFallback}` : ''}`,
33+
'',
1434
'Fastest way to start:',
1535
'1) Run /user',
1636
'2) Send your ID to Sasha and ask for shared key access',
@@ -22,7 +42,9 @@ function buildOnboardingMessage(chatId, options = {}) {
2242
'4) Verify: /keys',
2343
'',
2444
'Once connected, useful commands:',
25-
'/help /config /vendor <name> /panels <count> /style <preset>'
45+
'/help /config /vendors /vendor <role> <name> /panels <count>',
46+
'',
47+
'You can send: plain text, web links, PDF links/files, image links/files, or voice/audio.'
2648
];
2749
if (isAdmin) {
2850
lines.push('');
@@ -40,7 +62,7 @@ function buildHelpMessage(chatId, options = {}) {
4062
BOT_DISPLAY_NAME,
4163
BOT_SHORT_DESCRIPTION,
4264
'',
43-
'Send plain text or URL to generate a comic.',
65+
'Send plain text, URL, or PDF to generate a comic.',
4466
'',
4567
'Commands:',
4668
'/start - show welcome message.',
@@ -62,14 +84,19 @@ function buildHelpMessage(chatId, options = {}) {
6284
...styleShortcutLines,
6385
'/new_style <name> <text> - save a custom named style.',
6486
'/language <code> - set output language.',
65-
'/extractor <gemini|firecrawl|jina|chromium> - set URL story extraction vendor.',
87+
'/vendors [role] - inspect roles, current vendor, and allowed options.',
88+
'/vendor <role> <name> - set vendor for any role (text/image/url/pdf/image_extract/voice/enrich/enrich_fallback).',
89+
'/vendor <name> - quick set text+image provider together.',
90+
'/extractor <gemini|firecrawl|jina|driftbot|chromium> - set URL story extraction vendor (alias: /vendor url <name>).',
91+
'/pdf_extractor <llamaparse> - set PDF story extraction vendor (alias: /vendor pdf <name>).',
92+
'/image_extractor <gemini|openai> - set image story extraction vendor (alias: /vendor image_extract <name>).',
93+
'/voice_extractor <assemblyai> - set voice/audio story extraction vendor (alias: /vendor voice <name>).',
6694
'/mode <default|media_group|single> - set delivery mode.',
6795
'/consistency <on|off> - toggle reference-style consistency flow.',
6896
'/detail <low|medium|high> - set output detail level.',
6997
'/crazyness <0..2> - set story invention temperature.',
7098
'/concurrency <1..5> - set parallel image generation.',
7199
'/retries <0..3> - set provider retry attempts.',
72-
'/vendor <name> - set both text/image provider.',
73100
'/text_vendor <name> - set text provider only.',
74101
'/image_vendor <name> - set image provider only.',
75102
'/models [text|image] [model] - list/set model for current provider.',

0 commit comments

Comments
 (0)