From d13b01a2a747bef1cbc266d1d6649e0966366414 Mon Sep 17 00:00:00 2001 From: Kyle Mistele Date: Thu, 16 Jan 2025 16:00:12 -0600 Subject: [PATCH 1/2] feat: add support for following domain redirects (disabled by default) This is useful in cases where the top-level site e.g. mywebsite.com redirects to www.mywebsite.com --- .gitignore | 3 +- package-lock.json | 1081 +++++++++++++++++++++++++++++++++++++++++++++ src/cli.ts | 3 + src/index.ts | 2 +- src/types.ts | 6 + 5 files changed, 1093 insertions(+), 2 deletions(-) create mode 100644 package-lock.json diff --git a/.gitignore b/.gitignore index 765dd28..b59ab01 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ node_modules foo.txt *.log .DS_Store -dist/ \ No newline at end of file +dist/ +.idea/ \ No newline at end of file diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..530f45a --- /dev/null +++ b/package-lock.json @@ -0,0 +1,1081 @@ +{ + "name": "sitefetch", + "version": "0.0.16", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "sitefetch", + "version": "0.0.16", + "license": "MIT", + "dependencies": { + "cheerio": "^1.0.0", + "gpt-tokenizer": "^2.8.1", + "happy-dom": "^16.5.3", + "micromatch": "^4.0.8", + "turndown": "^7.2.0", + "turndown-plugin-gfm": "^1.0.2" + }, + "bin": { + "sitefetch": "dist/cli.js" + }, + "devDependencies": { + "@mozilla/readability": "^0.5.0", + "@types/bun": "^1.1.15", + "@types/micromatch": "^4.0.9", + "@types/turndown": "^5.0.5", + "cac": "^6.7.14", + "p-queue": "^8.0.1", + "picocolors": "^1.1.1", + "rolldown": "^1.0.0-beta.1", + "typescript": "^5.7.3", + "unplugin-isolated-decl": "^0.10.4" + } + }, + "node_modules/@emnapi/core": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.3.1.tgz", + "integrity": "sha512-pVGjBIt1Y6gg3EJN8jTcfpP/+uuRksIo055oE/OBkDNcjZqVbfkWCksG1Jp4yZnj3iKWyWX8fdG/j6UDYPbFog==", + "dev": true, + "optional": true, + "dependencies": { + "@emnapi/wasi-threads": "1.0.1", + "tslib": "^2.4.0" + } + }, + "node_modules/@emnapi/runtime": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.3.1.tgz", + "integrity": "sha512-kEBmG8KyqtxJZv+ygbEim+KCGtIq1fC22Ms3S4ziXmYKm8uyoLX0MHONVKwp+9opg390VaKRNt4a7A9NwmpNhw==", + "dev": true, + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@emnapi/wasi-threads": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.0.1.tgz", + "integrity": "sha512-iIBu7mwkq4UQGeMEM8bLwNK962nXdhodeScX4slfQnRhEMMzvYivHhutCIk8uojvmASXXPC2WNEjwxFWk72Oqw==", + "dev": true, + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz", + "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==", + "dev": true + }, + "node_modules/@mixmark-io/domino": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@mixmark-io/domino/-/domino-2.2.0.tgz", + "integrity": "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==" + }, + "node_modules/@mozilla/readability": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/@mozilla/readability/-/readability-0.5.0.tgz", + "integrity": "sha512-Z+CZ3QaosfFaTqvhQsIktyGrjFjSC0Fa4EMph4mqKnWhmyoGICsV/8QK+8HpXut6zV7zwfWwqDmEjtk1Qf6EgQ==", + "dev": true, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@napi-rs/wasm-runtime": { + "version": "0.2.6", + "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-0.2.6.tgz", + "integrity": "sha512-z8YVS3XszxFTO73iwvFDNpQIzdMmSDTP/mB3E/ucR37V3Sx57hSExcXyMoNwaucWxnsWf4xfbZv0iZ30jr0M4Q==", + "dev": true, + "optional": true, + "dependencies": { + "@emnapi/core": "^1.3.1", + "@emnapi/runtime": "^1.3.1", + "@tybys/wasm-util": "^0.9.0" + } + }, + "node_modules/@oxc-parser/binding-darwin-arm64": { + "version": "0.45.0", + "resolved": "https://registry.npmjs.org/@oxc-parser/binding-darwin-arm64/-/binding-darwin-arm64-0.45.0.tgz", + "integrity": "sha512-AcpoTQNIS9k+c2HK2Sr2mCTdcBMbWnH6gmBPFhKB1ZOZbph2OcErvy+RYnXAxW0kocCtKLdhvmzhq/IOuqbN3w==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@oxc-parser/binding-darwin-x64": { + "version": "0.45.0", + "resolved": "https://registry.npmjs.org/@oxc-parser/binding-darwin-x64/-/binding-darwin-x64-0.45.0.tgz", + "integrity": "sha512-HSbKLiW22eOajRn3pgxQurl7bMq8QWbOVK9L1UQXOKzXiId6y7i6uAP/O8f26QYyJfasIeLB1c2Z2bgUqhF/2A==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@oxc-parser/binding-linux-arm64-gnu": { + "version": "0.45.0", + "resolved": "https://registry.npmjs.org/@oxc-parser/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-0.45.0.tgz", + "integrity": "sha512-9rINcFE18xFe8Mab/EU+R4As9amJtNG6d6OSvDKYTqIVqyF0pnsM76/5a4FAViuibw2rAwAQSksXcBSvEKCJNg==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@oxc-parser/binding-linux-arm64-musl": { + "version": "0.45.0", + "resolved": "https://registry.npmjs.org/@oxc-parser/binding-linux-arm64-musl/-/binding-linux-arm64-musl-0.45.0.tgz", + "integrity": "sha512-Sm/uAum0Fopykvqbwol0S6abncfuqtRk3HIuMVrP0YtZUoiQRzg+3WJYvkgBRr61ASNK5uvNQz7seS6zMmx/rQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@oxc-parser/binding-linux-x64-gnu": { + "version": "0.45.0", + "resolved": "https://registry.npmjs.org/@oxc-parser/binding-linux-x64-gnu/-/binding-linux-x64-gnu-0.45.0.tgz", + "integrity": "sha512-vj6gEmE704wVZjGqhA9s0tQgWRVnSBljgCvpVe3VA0koqEgeKz6BgR2BcYUYs6m9fC/bp8OjhS9QIqFsgRho/A==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@oxc-parser/binding-linux-x64-musl": { + "version": "0.45.0", + "resolved": "https://registry.npmjs.org/@oxc-parser/binding-linux-x64-musl/-/binding-linux-x64-musl-0.45.0.tgz", + "integrity": "sha512-TNiXietxgLwWtL8U/ui7zi6vAQpHnO/nhMHrFTvUMDsqrLAr3N2nY+lPNeF3bWwxKano418Tt8JNZP+E4NsL/A==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@oxc-parser/binding-win32-arm64-msvc": { + "version": "0.45.0", + "resolved": "https://registry.npmjs.org/@oxc-parser/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-0.45.0.tgz", + "integrity": "sha512-hoYn7xXsMBEstlQgF2j0WKw2Vj+lkS0UhN1ULcNNdO60QKIX/Auh5Gb9gF2jQO0rec56xIounSk4VrYJAvVVtQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@oxc-parser/binding-win32-x64-msvc": { + "version": "0.45.0", + "resolved": "https://registry.npmjs.org/@oxc-parser/binding-win32-x64-msvc/-/binding-win32-x64-msvc-0.45.0.tgz", + "integrity": "sha512-FMPsYYmQcX1AU6/Ny4gl81FSgBZ/YGg9PwGeYtairgytRUj2SHwXSRKagNnoOij1wpafbEfvfp+AzhxU8jVV8Q==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@oxc-project/types": { + "version": "0.45.0", + "resolved": "https://registry.npmjs.org/@oxc-project/types/-/types-0.45.0.tgz", + "integrity": "sha512-s1xCyuYV024s4Jh9l3a9/gSyIG5qr6P0gdwz03UMx6UqaXRkhD2INeRSNxGM/XXKfYVbAqUBy3q/QEMkTNio9Q==", + "dev": true, + "funding": { + "url": "https://github.com/sponsors/Boshen" + } + }, + "node_modules/@rolldown/binding-darwin-arm64": { + "version": "1.0.0-beta.1", + "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.0.0-beta.1.tgz", + "integrity": "sha512-e4QpTp7eu61JilK958i21RK/HniwVLjZgfShqoQY1VM+KDYz90cNuopKQ3Z3oCkvyAN3xI8IaRhy02nlxdR/DA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rolldown/binding-darwin-x64": { + "version": "1.0.0-beta.1", + "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-x64/-/binding-darwin-x64-1.0.0-beta.1.tgz", + "integrity": "sha512-+WHRLrogJl99EQ6HtYhy7EwIZ1wicD0RSX2T5mjfOM6AmPwPTXQ0n6MKOs1abU6ZyCj5Izlo6rLsao0h9FMUDA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rolldown/binding-freebsd-x64": { + "version": "1.0.0-beta.1", + "resolved": "https://registry.npmjs.org/@rolldown/binding-freebsd-x64/-/binding-freebsd-x64-1.0.0-beta.1.tgz", + "integrity": "sha512-ZP9Q1q4IfvJ8dfWTHOF3cquNpAKuQQ+kZJQTxo85fGnKqtqMWFNouaBVd79pqCxU3w4oIjuZ8o55qNDomMTbVA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rolldown/binding-linux-arm-gnueabihf": { + "version": "1.0.0-beta.1", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm-gnueabihf/-/binding-linux-arm-gnueabihf-1.0.0-beta.1.tgz", + "integrity": "sha512-B/R4Vt8f8z/WmW9Y9NMgA+t5bCfRLmgZohs5mWf8KoD5FRlpvJtCo/SnD7fEg9npHEP5A28+Cikiyd7aCcKPSA==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rolldown/binding-linux-arm64-gnu": { + "version": "1.0.0-beta.1", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-1.0.0-beta.1.tgz", + "integrity": "sha512-xkGD+YLH+vQZiqxKEsXe8xS/owQXkyARaNB9NfFrAacLoNIRZM5UEZGNKxXyRWd1kSEkYkJ3/WiqvGGCcqUg1A==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rolldown/binding-linux-arm64-musl": { + "version": "1.0.0-beta.1", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-musl/-/binding-linux-arm64-musl-1.0.0-beta.1.tgz", + "integrity": "sha512-Ey2UxKFL74JuWpdNl9stpV0kxHZIgCWCEUnDnpQ1hcBwO9KwDM5qicLtXfsjozD6vt+xzbrL2D/uTrziYZ7IDQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rolldown/binding-linux-x64-gnu": { + "version": "1.0.0-beta.1", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-gnu/-/binding-linux-x64-gnu-1.0.0-beta.1.tgz", + "integrity": "sha512-a8QP35x/3mggWqCpFtaF3/PbWl5P9QKpP/muk3iMPgzrXto8zPsEl3imsP3EBh4KwanBVHIf8pEkBQ+/7iMTgQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rolldown/binding-linux-x64-musl": { + "version": "1.0.0-beta.1", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-musl/-/binding-linux-x64-musl-1.0.0-beta.1.tgz", + "integrity": "sha512-uIqKwnkZjTY8FmqGMaSjwtWlCdV88LV9bjdkv+mb7I+BBw+9cJlIQy0P8YnGEOEcnDPis/SiraCpkJ/eHYaSZw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rolldown/binding-wasm32-wasi": { + "version": "1.0.0-beta.1", + "resolved": "https://registry.npmjs.org/@rolldown/binding-wasm32-wasi/-/binding-wasm32-wasi-1.0.0-beta.1.tgz", + "integrity": "sha512-RB+gbhwZtTbKbvHzUcaRFva2ONCUTuxDEb/b3/rd3O82OTPUZzOY24mqreiXH1XG09p6WFXSE8dzUrN120Q29w==", + "cpu": [ + "wasm32" + ], + "dev": true, + "optional": true, + "dependencies": { + "@napi-rs/wasm-runtime": "^0.2.4" + }, + "engines": { + "node": ">=14.21.3" + } + }, + "node_modules/@rolldown/binding-win32-arm64-msvc": { + "version": "1.0.0-beta.1", + "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.0.0-beta.1.tgz", + "integrity": "sha512-NSccQD7+9vhEfDMc8HyODuUU1jLYEsEiICc1zwmbeg0FXx1pwpFpZZQby4bAMnK2obav7D9FfsruYWodhNdIqQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rolldown/binding-win32-ia32-msvc": { + "version": "1.0.0-beta.1", + "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-ia32-msvc/-/binding-win32-ia32-msvc-1.0.0-beta.1.tgz", + "integrity": "sha512-bUQOqqHfqgX9gHGZFGVYQRtc4+9diFDS/f85dKrzzUg7MF91ZU9mJUoemL4eyyj3B83N3FlHZtAPvDX3N2Zz8A==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rolldown/binding-win32-x64-msvc": { + "version": "1.0.0-beta.1", + "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.0.0-beta.1.tgz", + "integrity": "sha512-k8Ld05OlxkzR/+Ob8+IESaZ4uFcgLwbbwtUZLoryn3S6lCogkclcN/4m1wo/PyWtUAWF5mdz83SrkRL8dS4AqA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/pluginutils": { + "version": "5.1.4", + "resolved": "https://registry.npmjs.org/@rollup/pluginutils/-/pluginutils-5.1.4.tgz", + "integrity": "sha512-USm05zrsFxYLPdWWq+K3STlWiT/3ELn3RcV5hJMghpeAIhxfsUIg6mt12CBJBInWMV4VneoV7SfGv8xIwo2qNQ==", + "dev": true, + "dependencies": { + "@types/estree": "^1.0.0", + "estree-walker": "^2.0.2", + "picomatch": "^4.0.2" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "rollup": "^1.20.0||^2.0.0||^3.0.0||^4.0.0" + }, + "peerDependenciesMeta": { + "rollup": { + "optional": true + } + } + }, + "node_modules/@rollup/pluginutils/node_modules/picomatch": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.2.tgz", + "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==", + "dev": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/@tybys/wasm-util": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.9.0.tgz", + "integrity": "sha512-6+7nlbMVX/PVDCwaIQ8nTOPveOcFLSt8GcXdx8hD0bt39uWxYT88uXzqTd4fTvqta7oeUJqudepapKNt2DYJFw==", + "dev": true, + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@types/braces": { + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/@types/braces/-/braces-3.0.5.tgz", + "integrity": "sha512-SQFof9H+LXeWNz8wDe7oN5zu7ket0qwMu5vZubW4GCJ8Kkeh6nBWUz87+KTz/G3Kqsrp0j/W253XJb3KMEeg3w==", + "dev": true + }, + "node_modules/@types/bun": { + "version": "1.1.16", + "resolved": "https://registry.npmjs.org/@types/bun/-/bun-1.1.16.tgz", + "integrity": "sha512-E+ue6NMcn4FXC5bDRE1W/BXUVs01h5Mt02qH8/8HGCox9akuh8KNOFdwvaQS9TDgT2RmUyJYFRRqA60WtTnm2g==", + "dev": true, + "dependencies": { + "bun-types": "1.1.43" + } + }, + "node_modules/@types/estree": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.6.tgz", + "integrity": "sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw==", + "dev": true + }, + "node_modules/@types/micromatch": { + "version": "4.0.9", + "resolved": "https://registry.npmjs.org/@types/micromatch/-/micromatch-4.0.9.tgz", + "integrity": "sha512-7V+8ncr22h4UoYRLnLXSpTxjQrNUXtWHGeMPRJt1nULXI57G9bIcpyrHlmrQ7QK24EyyuXvYcSSWAM8GA9nqCg==", + "dev": true, + "dependencies": { + "@types/braces": "*" + } + }, + "node_modules/@types/node": { + "version": "20.12.14", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.14.tgz", + "integrity": "sha512-scnD59RpYD91xngrQQLGkE+6UrHUPzeKZWhhjBSa3HSkwjbQc38+q3RoIVEwxQGRw3M+j5hpNAM+lgV3cVormg==", + "dev": true, + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/@types/turndown": { + "version": "5.0.5", + "resolved": "https://registry.npmjs.org/@types/turndown/-/turndown-5.0.5.tgz", + "integrity": "sha512-TL2IgGgc7B5j78rIccBtlYAnkuv8nUQqhQc+DSYV5j9Be9XOcm/SKOVRuA47xAVI3680Tk9B1d8flK2GWT2+4w==", + "dev": true + }, + "node_modules/@types/ws": { + "version": "8.5.13", + "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.5.13.tgz", + "integrity": "sha512-osM/gWBTPKgHV8XkTunnegTRIsvF6owmf5w+JtAfOw472dptdm0dlGv4xCt6GwQRcC2XVOvvRE/0bAoQcL2QkA==", + "dev": true, + "dependencies": { + "@types/node": "*" + } + }, + "node_modules/acorn": { + "version": "8.14.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.0.tgz", + "integrity": "sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==", + "dev": true, + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==" + }, + "node_modules/braces": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", + "dependencies": { + "fill-range": "^7.1.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/bun-types": { + "version": "1.1.43", + "resolved": "https://registry.npmjs.org/bun-types/-/bun-types-1.1.43.tgz", + "integrity": "sha512-W0wCtVH+bwFp7p3Zgs03CqxEDmXxEvmmUM/FBKgWIv9T8gyeotvIjIbHzuDScc2DphhRNtr7hJLCR5PspYL5qw==", + "dev": true, + "dependencies": { + "@types/node": "~20.12.8", + "@types/ws": "~8.5.10" + } + }, + "node_modules/cac": { + "version": "6.7.14", + "resolved": "https://registry.npmjs.org/cac/-/cac-6.7.14.tgz", + "integrity": "sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/cheerio": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0.tgz", + "integrity": "sha512-quS9HgjQpdaXOvsZz82Oz7uxtXiy6UIsIQcpBj7HRw2M63Skasm9qlDocAM7jNuaxdhpPU7c4kJN+gA5MCu4ww==", + "dependencies": { + "cheerio-select": "^2.1.0", + "dom-serializer": "^2.0.0", + "domhandler": "^5.0.3", + "domutils": "^3.1.0", + "encoding-sniffer": "^0.2.0", + "htmlparser2": "^9.1.0", + "parse5": "^7.1.2", + "parse5-htmlparser2-tree-adapter": "^7.0.0", + "parse5-parser-stream": "^7.1.2", + "undici": "^6.19.5", + "whatwg-mimetype": "^4.0.0" + }, + "engines": { + "node": ">=18.17" + }, + "funding": { + "url": "https://github.com/cheeriojs/cheerio?sponsor=1" + } + }, + "node_modules/cheerio-select": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz", + "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==", + "dependencies": { + "boolbase": "^1.0.0", + "css-select": "^5.1.0", + "css-what": "^6.1.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-select": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz", + "integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz", + "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/debug": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz", + "integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==", + "dev": true, + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ] + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz", + "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, + "node_modules/encoding-sniffer": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.0.tgz", + "integrity": "sha512-ju7Wq1kg04I3HtiYIOrUrdfdDvkyO9s5XM8QAj/bN61Yo/Vb4vgJxy5vi4Yxk01gWHbrofpPtpxM8bKger9jhg==", + "dependencies": { + "iconv-lite": "^0.6.3", + "whatwg-encoding": "^3.1.1" + }, + "funding": { + "url": "https://github.com/fb55/encoding-sniffer?sponsor=1" + } + }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/estree-walker": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz", + "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==", + "dev": true + }, + "node_modules/eventemitter3": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-5.0.1.tgz", + "integrity": "sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA==", + "dev": true + }, + "node_modules/fill-range": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/gpt-tokenizer": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/gpt-tokenizer/-/gpt-tokenizer-2.8.1.tgz", + "integrity": "sha512-8+a9ojzqfgiF3TK4oivGYjlycD8g5igLt8NQw3ndOIgLVKSGJDhUDNAfYSbtyyuTkha3R/R9F8XrwC7/B5TKfQ==" + }, + "node_modules/happy-dom": { + "version": "16.6.0", + "resolved": "https://registry.npmjs.org/happy-dom/-/happy-dom-16.6.0.tgz", + "integrity": "sha512-Zz5S9sog8a3p8XYZbO+eI1QMOAvCNnIoyrH8A8MLX+X2mJrzADTy+kdETmc4q+uD9AGAvQYGn96qBAn2RAciKw==", + "dependencies": { + "webidl-conversions": "^7.0.0", + "whatwg-mimetype": "^3.0.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/happy-dom/node_modules/whatwg-mimetype": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-3.0.0.tgz", + "integrity": "sha512-nt+N2dzIutVRxARx1nghPKGv1xHikU7HKdfafKkLNLindmPU/ch3U31NOCGGA/dmPcmb1VlofO0vnKAcsm0o/Q==", + "engines": { + "node": ">=12" + } + }, + "node_modules/htmlparser2": { + "version": "9.1.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-9.1.0.tgz", + "integrity": "sha512-5zfg6mHUoaer/97TxnGpxmbR7zJtPwIYFMZ/H5ucTlPZhKvtum05yiPK3Mgai3a0DyVxv7qYqoweaEd2nrYQzQ==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.1.0", + "entities": "^4.5.0" + } + }, + "node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/magic-string": { + "version": "0.30.17", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.17.tgz", + "integrity": "sha512-sNPKHvyjVf7gyjwS4xGTaW/mCnF8wnjtifKBEhxfZ7E/S8tQ0rssrwGNn6q8JH/ohItJfSQp9mBtQYuTlH5QnA==", + "dev": true, + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.0" + } + }, + "node_modules/micromatch": { + "version": "4.0.8", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", + "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", + "dependencies": { + "braces": "^3.0.3", + "picomatch": "^2.3.1" + }, + "engines": { + "node": ">=8.6" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true + }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, + "node_modules/oxc-parser": { + "version": "0.45.0", + "resolved": "https://registry.npmjs.org/oxc-parser/-/oxc-parser-0.45.0.tgz", + "integrity": "sha512-K8mcXfWGrO8XZX+ymjAtit2CTV4nsXXp3LLaQhtnIbappyiZmwTEdtyrt8VZcdBIKQMwj04LOl5FuKom/G5ykw==", + "dev": true, + "dependencies": { + "@oxc-project/types": "^0.45.0" + }, + "funding": { + "url": "https://github.com/sponsors/Boshen" + }, + "optionalDependencies": { + "@oxc-parser/binding-darwin-arm64": "0.45.0", + "@oxc-parser/binding-darwin-x64": "0.45.0", + "@oxc-parser/binding-linux-arm64-gnu": "0.45.0", + "@oxc-parser/binding-linux-arm64-musl": "0.45.0", + "@oxc-parser/binding-linux-x64-gnu": "0.45.0", + "@oxc-parser/binding-linux-x64-musl": "0.45.0", + "@oxc-parser/binding-win32-arm64-msvc": "0.45.0", + "@oxc-parser/binding-win32-x64-msvc": "0.45.0" + } + }, + "node_modules/p-queue": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-8.0.1.tgz", + "integrity": "sha512-NXzu9aQJTAzbBqOt2hwsR63ea7yvxJc0PwN/zobNAudYfb1B7R08SzB4TsLeSbUCuG467NhnoT0oO6w1qRO+BA==", + "dev": true, + "dependencies": { + "eventemitter3": "^5.0.1", + "p-timeout": "^6.1.2" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-timeout": { + "version": "6.1.4", + "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-6.1.4.tgz", + "integrity": "sha512-MyIV3ZA/PmyBN/ud8vV9XzwTrNtR4jFrObymZYnZqMmW0zA8Z17vnT0rBgFE/TlohB+YCHqXMgZzb3Csp49vqg==", + "dev": true, + "engines": { + "node": ">=14.16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/parse5": { + "version": "7.2.1", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.2.1.tgz", + "integrity": "sha512-BuBYQYlv1ckiPdQi/ohiivi9Sagc9JG+Ozs0r7b/0iK3sKmrb0b9FdWdBbOdx6hBCM/F9Ir82ofnBhtZOjCRPQ==", + "dependencies": { + "entities": "^4.5.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-htmlparser2-tree-adapter": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz", + "integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==", + "dependencies": { + "domhandler": "^5.0.3", + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-parser-stream": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz", + "integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==", + "dependencies": { + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "dev": true + }, + "node_modules/picomatch": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", + "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/rolldown": { + "version": "1.0.0-beta.1", + "resolved": "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-beta.1.tgz", + "integrity": "sha512-19B2HoY3zcR7Um+zVDOvV1gQ1d6acUIouCUMGxvlZ/0kTjcMSFr8tuLWmRRYIV7y1mrgPbJRd1cPFVd4p1l8nQ==", + "dev": true, + "dependencies": { + "zod": "^3.23.8" + }, + "bin": { + "rolldown": "bin/cli.js" + }, + "optionalDependencies": { + "@rolldown/binding-darwin-arm64": "1.0.0-beta.1", + "@rolldown/binding-darwin-x64": "1.0.0-beta.1", + "@rolldown/binding-freebsd-x64": "1.0.0-beta.1", + "@rolldown/binding-linux-arm-gnueabihf": "1.0.0-beta.1", + "@rolldown/binding-linux-arm64-gnu": "1.0.0-beta.1", + "@rolldown/binding-linux-arm64-musl": "1.0.0-beta.1", + "@rolldown/binding-linux-x64-gnu": "1.0.0-beta.1", + "@rolldown/binding-linux-x64-musl": "1.0.0-beta.1", + "@rolldown/binding-wasm32-wasi": "1.0.0-beta.1", + "@rolldown/binding-win32-arm64-msvc": "1.0.0-beta.1", + "@rolldown/binding-win32-ia32-msvc": "1.0.0-beta.1", + "@rolldown/binding-win32-x64-msvc": "1.0.0-beta.1" + }, + "peerDependencies": { + "@babel/runtime": ">=7" + }, + "peerDependenciesMeta": { + "@babel/runtime": { + "optional": true + } + } + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/tslib": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "dev": true, + "optional": true + }, + "node_modules/turndown": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.2.0.tgz", + "integrity": "sha512-eCZGBN4nNNqM9Owkv9HAtWRYfLA4h909E/WGAWWBpmB275ehNhZyk87/Tpvjbp0jjNl9XwCsbe6bm6CqFsgD+A==", + "dependencies": { + "@mixmark-io/domino": "^2.2.0" + } + }, + "node_modules/turndown-plugin-gfm": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz", + "integrity": "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg==" + }, + "node_modules/typescript": { + "version": "5.7.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.3.tgz", + "integrity": "sha512-84MVSjMEHP+FQRPy3pX9sTVV/INIex71s9TL2Gm5FG/WG1SqXeKyZ0k7/blY/4FdOzI12CBy1vGc4og/eus0fw==", + "dev": true, + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici": { + "version": "6.21.1", + "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.1.tgz", + "integrity": "sha512-q/1rj5D0/zayJB2FraXdaWxbhWiNKDvu8naDT2dl1yTlvJp4BLtOcp2a5BvgGNQpYYJzau7tf1WgKv3b+7mqpQ==", + "engines": { + "node": ">=18.17" + } + }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "dev": true + }, + "node_modules/unplugin": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/unplugin/-/unplugin-2.1.2.tgz", + "integrity": "sha512-Q3LU0e4zxKfRko1wMV2HmP8lB9KWislY7hxXpxd+lGx0PRInE4vhMBVEZwpdVYHvtqzhSrzuIfErsob6bQfCzw==", + "dev": true, + "dependencies": { + "acorn": "^8.14.0", + "webpack-virtual-modules": "^0.6.2" + }, + "engines": { + "node": ">=18.12.0" + } + }, + "node_modules/unplugin-isolated-decl": { + "version": "0.10.5", + "resolved": "https://registry.npmjs.org/unplugin-isolated-decl/-/unplugin-isolated-decl-0.10.5.tgz", + "integrity": "sha512-sFwvrWgqjPg4JOjzUQoB7uQYGohMVEGt4MxV2bFmRehVVTber6IaweiAz0DJh2NNjgx3RzRiBay2BvuC+E9udg==", + "dev": true, + "dependencies": { + "@rollup/pluginutils": "^5.1.4", + "debug": "^4.4.0", + "magic-string": "^0.30.17", + "oxc-parser": "^0.45.0", + "unplugin": "^2.1.2" + }, + "engines": { + "node": ">=18.12.0" + }, + "peerDependencies": { + "@swc/core": "^1.6.6", + "oxc-transform": ">=0.42.0", + "typescript": "^5.5.2" + }, + "peerDependenciesMeta": { + "@swc/core": { + "optional": true + }, + "oxc-transform": { + "optional": true + }, + "typescript": { + "optional": true + } + } + }, + "node_modules/webidl-conversions": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz", + "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==", + "engines": { + "node": ">=12" + } + }, + "node_modules/webpack-virtual-modules": { + "version": "0.6.2", + "resolved": "https://registry.npmjs.org/webpack-virtual-modules/-/webpack-virtual-modules-0.6.2.tgz", + "integrity": "sha512-66/V2i5hQanC51vBQKPH4aI8NMAcBW59FVBs+rC7eGHupMyfn34q7rZIE+ETlJ+XTevqfUhVVBgSUNSW2flEUQ==", + "dev": true + }, + "node_modules/whatwg-encoding": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", + "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", + "dependencies": { + "iconv-lite": "0.6.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-mimetype": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", + "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", + "engines": { + "node": ">=18" + } + }, + "node_modules/zod": { + "version": "3.24.1", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.1.tgz", + "integrity": "sha512-muH7gBL9sI1nciMZV67X5fTKKBLtwpZ5VBp1vsOQzj1MhrBZ4wlVCm3gedKZWLp0Oyel8sIGfeiz54Su+OVT+A==", + "dev": true, + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + } + } +} diff --git a/src/cli.ts b/src/cli.ts index 4c90095..a34421a 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -19,7 +19,9 @@ cli .option("--content-selector ", "The CSS selector to find content") .option("--limit ", "Limit the result to this amount of pages") .option("--silent", "Do not print any logs") + .option("--follow-domain-redirects", "Follow redirects from one domain to another, e.g. somedomain.com -> www.somedomain.com") .action(async (url, flags) => { + if (!url) { cli.outputHelp() return @@ -34,6 +36,7 @@ cli match: flags.match && ensureArray(flags.match), contentSelector: flags.contentSelector, limit: flags.limit, + followDomainRedirects: flags.followDomainRedirects }) if (pages.size === 0) { diff --git a/src/index.ts b/src/index.ts index 028402e..e4b993d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -108,7 +108,7 @@ class Fetcher { // redirected to other site, ignore if (resUrl.host !== host) { logger.warn(`Redirected from ${host} to ${resUrl.host}`) - return + if (!this.options.followDomainRedirects) return } const extraUrls: string[] = [] diff --git a/src/types.ts b/src/types.ts index 773f9f8..58a1f2c 100644 --- a/src/types.ts +++ b/src/types.ts @@ -24,6 +24,12 @@ export type Options = { * A custom function to fetch URL */ fetch?: (url: string, init: RequestInit) => Promise + + /** + * Follow redirects from one domain to another. Useful for 2nd-level domains + * that solve to a www / wwX subdomain. + */ + followDomainRedirects?: boolean } export type Page = { From cc3470c5a640177eb7323c1d4450a6003cf36be6 Mon Sep 17 00:00:00 2001 From: Kyle Mistele Date: Thu, 16 Jan 2025 16:42:20 -0600 Subject: [PATCH 2/2] feat: add sitemap parser --- README.md | 19 ++++++++++ package-lock.json | 27 +++++++++++++ package.json | 7 ++-- src/cli.ts | 4 +- src/index.ts | 96 +++++++++++++++++++++++++++++++++++++---------- src/types.ts | 6 +++ 6 files changed, 135 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 87fcea5..4109f0e 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,25 @@ We use [mozilla/readability](https://github.com/mozilla/readability) to extract sitefetch https://vite.dev --content-selector ".content" ``` +### Follow domain redirects + +You can opt-in to following domain redirects (e.g. from mywebsite.com to www.mywebsite.com): +```sitefetch +sitefetch https://vite.dev --follow-domain-redirects +``` + +### Sitemap crawler + +Many websites include a special file `sitemap.xml` that lists all the pages on the website to help +search engines understand and crawl the site more efficiently. This is often done automatically for blogs, +documentation websites, and other types of sites managed through a CMS. It is usually not present for SPAs. + +The sitemap crawler can be enabled through `--enable-sitemap`, although if a `sitemap.xml` is not present the site will be +crawled as normal: +``` +sitefetch https://nextjs.org --enable-sitemap +``` + ## Plug If you like this, please check out my LLM chat app: https://chatwise.app diff --git a/package-lock.json b/package-lock.json index 530f45a..8153223 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,6 +10,7 @@ "license": "MIT", "dependencies": { "cheerio": "^1.0.0", + "fast-xml-parser": "^4.5.1", "gpt-tokenizer": "^2.8.1", "happy-dom": "^16.5.3", "micromatch": "^4.0.8", @@ -681,6 +682,27 @@ "integrity": "sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA==", "dev": true }, + "node_modules/fast-xml-parser": { + "version": "4.5.1", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.5.1.tgz", + "integrity": "sha512-y655CeyUQ+jj7KBbYMc4FG01V8ZQqjN+gDYGJ50RtfsUB8iG9AmwmwoAgeKLJdmueKKMrH1RJ7yXHTSoczdv5w==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + }, + { + "type": "paypal", + "url": "https://paypal.me/naturalintelligence" + } + ], + "dependencies": { + "strnum": "^1.0.5" + }, + "bin": { + "fxparser": "src/cli/cli.js" + } + }, "node_modules/fill-range": { "version": "7.1.1", "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", @@ -932,6 +954,11 @@ "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" }, + "node_modules/strnum": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/strnum/-/strnum-1.0.5.tgz", + "integrity": "sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA==" + }, "node_modules/to-regex-range": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", diff --git a/package.json b/package.json index 305ae21..9f6d9d6 100644 --- a/package.json +++ b/package.json @@ -18,12 +18,13 @@ "author": "EGOIST ", "license": "MIT", "dependencies": { - "happy-dom": "^16.5.3", "cheerio": "^1.0.0", + "fast-xml-parser": "^4.5.1", "gpt-tokenizer": "^2.8.1", + "happy-dom": "^16.5.3", + "micromatch": "^4.0.8", "turndown": "^7.2.0", - "turndown-plugin-gfm": "^1.0.2", - "micromatch": "^4.0.8" + "turndown-plugin-gfm": "^1.0.2" }, "devDependencies": { "@mozilla/readability": "^0.5.0", diff --git a/src/cli.ts b/src/cli.ts index a34421a..5453272 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -20,6 +20,7 @@ cli .option("--limit ", "Limit the result to this amount of pages") .option("--silent", "Do not print any logs") .option("--follow-domain-redirects", "Follow redirects from one domain to another, e.g. somedomain.com -> www.somedomain.com") + .option("--enable-sitemap", "Enable sitemap.xml-based crawling if a sitemap is available") .action(async (url, flags) => { if (!url) { @@ -36,7 +37,8 @@ cli match: flags.match && ensureArray(flags.match), contentSelector: flags.contentSelector, limit: flags.limit, - followDomainRedirects: flags.followDomainRedirects + followDomainRedirects: flags.followDomainRedirects, + enableSitemap: flags.enableSitemap }) if (pages.size === 0) { diff --git a/src/index.ts b/src/index.ts index e4b993d..65ef7e3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -7,6 +7,7 @@ import { logger } from "./logger.ts" import { load } from "cheerio" import { matchPath } from "./utils.ts" import type { Options, FetchSiteResult } from "./types.ts" +import { XMLParser } from "fast-xml-parser" export async function fetchSite( url: string, @@ -22,22 +23,13 @@ class Fetcher { #fetched: Set = new Set() #queue: Queue + #userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3" + constructor(public options: Options) { const concurrency = options.concurrency || 3 this.#queue = new Queue({ concurrency }) } - #limitReached() { - return this.options.limit && this.#pages.size >= this.options.limit - } - - #getContentSelector(pathname: string) { - if (typeof this.options.contentSelector === "function") - return this.options.contentSelector({ pathname }) - - return this.options.contentSelector - } - async fetchSite(url: string) { logger.info( `Started fetching ${c.green(url)} with a concurrency of ${ @@ -45,8 +37,13 @@ class Fetcher { }` ) + if (this.options.enableSitemap) { + logger.info(`Crawling sitemap...`) + await this.#crawlSitemap(url) + } + await this.#fetchPage(url, { - skipMatch: true, + skipMatch: true }) await this.#queue.onIdle() @@ -54,6 +51,64 @@ class Fetcher { return this.#pages } + async #crawlSitemap(url: string) { + logger.warn(`Crawling sitemap for ${url}`) + const sitemapUrl = new URL(url) + sitemapUrl.pathname = `/sitemap.xml` + + logger.info(`Fetching sitemap at ${sitemapUrl}`) + + try { + const sitemapResponse = await (this.options.fetch || fetch)( + sitemapUrl.toString(), { + headers: { + "user-agent": this.#userAgent + } + } + ) + if (!sitemapResponse.ok) { + logger.warn(`Unable to fetch sitemap`) + } + else { + const parser = new XMLParser() + const sitemap = parser.parse(await sitemapResponse.text())["urlset"] + if (!sitemap || !sitemap["url"]) { + throw new Error(`invalid sitemap.xml`) + } + const urls = Array.isArray(sitemap["url"]) + ? sitemap["url"].map((url: any) => url["loc"]) + : [sitemap["url"]["loc"]] + + if (urls.length > 0) { + logger.info(`Located URLs in sitemap:\n\t${urls.join("\n\t")}`) + const options = this.options + urls.map(u => this.#queue.add(() => + this.#fetchPage(u, { ...options, skipMatch: false }) + )) + logger.info(`Sitemap URLs added to queue.`) + + } + + } + } + catch (err: any) { + logger.warn(`Unable to get or parse sitemap:`, err.message) + } + + + } + + #limitReached() { + return this.options.limit && this.#pages.size >= this.options.limit + } + + #getContentSelector(pathname: string) { + if (typeof this.options.contentSelector === "function") + return this.options.contentSelector({ pathname }) + + return this.options.contentSelector + } + async #fetchPage( url: string, options: { @@ -82,9 +137,8 @@ class Fetcher { const res = await (this.options.fetch || fetch)(url, { headers: { - "user-agent": - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", - }, + "user-agent": this.#userAgent + } }) if (!res.ok) { @@ -107,8 +161,10 @@ class Fetcher { // redirected to other site, ignore if (resUrl.host !== host) { - logger.warn(`Redirected from ${host} to ${resUrl.host}`) - if (!this.options.followDomainRedirects) return + if (!this.options.followDomainRedirects) { + logger.warn(`Redirected from ${host} to ${resUrl.host}`) + return + } } const extraUrls: string[] = [] @@ -143,8 +199,8 @@ class Fetcher { settings: { disableJavaScriptFileLoading: true, disableJavaScriptEvaluation: true, - disableCSSFileLoading: true, - }, + disableCSSFileLoading: true + } }) const pageTitle = $("title").text() @@ -175,7 +231,7 @@ class Fetcher { this.#pages.set(pathname, { title: article.title || pageTitle, url, - content, + content }) } } diff --git a/src/types.ts b/src/types.ts index 58a1f2c..3276d26 100644 --- a/src/types.ts +++ b/src/types.ts @@ -25,6 +25,12 @@ export type Options = { */ fetch?: (url: string, init: RequestInit) => Promise + /** + * Enable crawling the site using a sitemap.xml, if available. + * Falls back to default behavior if a sitemap.xml is not available. + */ + enableSitemap?: boolean + /** * Follow redirects from one domain to another. Useful for 2nd-level domains * that solve to a www / wwX subdomain.