diff --git a/gulp/helpers/offline-bundle.js b/gulp/helpers/offline-bundle.js new file mode 100644 index 00000000000..b3397c9675a --- /dev/null +++ b/gulp/helpers/offline-bundle.js @@ -0,0 +1,46 @@ +const fs = require('fs'); +const path = require('path'); +const { execFileSync } = require('child_process'); + +const PUBLIC_DIR = 'public'; +const BUNDLE_NAME = 'camel-docs-offline.zip'; + +/** + * Generates an offline documentation bundle: a single .zip archive of all generated Markdown (.md) + * files plus /llms.txt, preserving the website directory structure. It lets agents (and humans) + * with no or restricted internet access read the Camel docs locally - download, unzip (e.g. into + * /tmp) and read the Markdown from there. See CAMEL-23781. + * + * Must run after the .md files have been generated (see generate-markdown task). Uses the system + * `zip` tool, so no extra dependency is required. + */ +function generateOfflineBundle() { + const bundlePath = path.join(PUBLIC_DIR, BUNDLE_NAME); + + if (!fs.existsSync(PUBLIC_DIR)) { + console.error(`Cannot generate ${BUNDLE_NAME}: '${PUBLIC_DIR}' directory not found`); + return; + } + + // remove any stale bundle so it is never zipped into itself + if (fs.existsSync(bundlePath)) { + fs.unlinkSync(bundlePath); + } + + try { + // run from public/ so paths stay relative to the site root; include only .md files and llms.txt + execFileSync('zip', ['-r', '-q', BUNDLE_NAME, '.', '-i', '*.md', 'llms.txt'], { + cwd: PUBLIC_DIR, + stdio: 'inherit' + }); + + const sizeMb = (fs.statSync(bundlePath).size / (1024 * 1024)).toFixed(1); + console.log(`Generated /${BUNDLE_NAME} (${sizeMb} MB)`); + } catch (error) { + console.error(`Failed to generate ${BUNDLE_NAME}:`, error.message); + } +} + +module.exports = { + generateOfflineBundle +}; diff --git a/gulp/tasks/generate-markdown.js b/gulp/tasks/generate-markdown.js index cc5964e8fe5..923d5936e77 100644 --- a/gulp/tasks/generate-markdown.js +++ b/gulp/tasks/generate-markdown.js @@ -3,6 +3,7 @@ const { parse } = require('node-html-parser'); const { createTurndownService } = require('../helpers/turndown-config'); const { generateToonSitemaps } = require('../helpers/toon-format'); const { generateLlmsTxt } = require('../helpers/llms-txt'); +const { generateOfflineBundle } = require('../helpers/offline-bundle'); const { generateReleasesIndex, generateBlogIndex } = require('../helpers/rss-feed'); const { generateAllIndexes } = require('../helpers/html-index'); @@ -106,6 +107,9 @@ async function generateMarkdown(done) { // Generate llms.txt file generateLlmsTxt(processedPages); + // Generate the offline documentation bundle (all .md files + llms.txt) as a single .zip + generateOfflineBundle(); + // Generate toon format sitemaps await generateToonSitemaps(); diff --git a/llms-txt-template.md b/llms-txt-template.md index badf76d9c4c..1ee43e1f709 100644 --- a/llms-txt-template.md +++ b/llms-txt-template.md @@ -7,6 +7,8 @@ For example: - HTML: `https://camel.apache.org/components/next/languages/simple-language.html` - Markdown: `https://camel.apache.org/components/next/languages/simple-language.md` +For agents or environments with no or restricted internet access, the complete documentation is also available as a single offline archive of all Markdown files, preserving the site structure: `https://camel.apache.org/camel-docs-offline.zip`. Download it once, unzip it locally (for example into `/tmp`) and read the `.md` files from there. + ## Key facts - Apache Camel is a **library**, not a platform — it embeds in your existing Spring Boot or Quarkus application