diff --git a/README.md b/README.md index 985c102..a2c2675 100644 --- a/README.md +++ b/README.md @@ -19,44 +19,107 @@ [![GitHub issues](https://img.shields.io/github/issues/Automations-Project/n8n-nodes-scrappey)](https://github.com/Automations-Project/n8n-nodes-scrappey/issues) [![Last commit](https://img.shields.io/github/last-commit/Automations-Project/n8n-nodes-scrappey)](https://github.com/Automations-Project/n8n-nodes-scrappey/commits) -> πŸš€ **Advanced web scraping and anti-bot bypass node for n8n workflows** +> **Advanced web scraping and anti-bot bypass node for n8n workflows - Now with complete API coverage!** A powerful n8n community node that integrates with the [Scrappey.com API](https://scrappey.com) to provide advanced web scraping capabilities with built-in anti-bot protection bypass. Perfect for automating data extraction from protected websites, handling CAPTCHAs, and managing complex browser interactions. -## ✨ Key Features +## What's New in v1.0.0 -### πŸ› οΈ **Three Operation Modes** +This major update brings **complete API coverage** matching the official Scrappey documentation: + +- **Session Management** - Create, destroy, list, and check session status +- **WebSocket Connections** - Advanced persistent browser control +- **20+ Browser Actions** - Click, type, scroll, wait, execute JavaScript, solve captchas, and more +- **Full Antibot Support** - Cloudflare, Datadome, Kasada, Incapsula, PerimeterX bypass +- **Response Options** - Screenshots, PDF generation, regex extraction, filters +- **Request Interception** - Abort patterns, domain blocking, XHR/Fetch interception +- **AI-Powered Parsing** - Automatic HTML structure extraction + +## Key Features + +### Eight Operation Modes 1. **Request Builder** - Create fully customized HTTP/browser requests with granular control 2. **HTTP Auto-Retry** - Automatically retry failed HTTP requests through Scrappey's anti-bot network 3. **Browser Auto-Retry** - Advanced browser-based retry with full anti-bot protection - -### πŸ”’ **Anti-Bot Protection Bypass** - -- **Cloudflare** challenge solving -- **Datadome** bypass capabilities -- **hCaptcha & reCAPTCHA** automatic solving +4. **Session Create** - Create persistent browser sessions for multi-request workflows +5. **Session Destroy** - Clean up sessions when done +6. **Session List** - View all active sessions +7. **Session Check** - Verify if a session is still active +8. **WebSocket Create** - Create WebSocket-based browser connections + +### Anti-Bot Protection Bypass + +- **Cloudflare** challenge solving with dedicated bypass +- **Datadome** bypass with debug mode +- **Kasada** protection bypass +- **Incapsula/Imperva** detection and solving +- **PerimeterX** bypass +- **hCaptcha, reCAPTCHA, Turnstile** automatic solving +- **FunCaptcha (Arkose Labs)** support - **JavaScript-heavy websites** full browser simulation - **Mouse movement simulation** for enhanced stealth -### 🌍 **Advanced Proxy Management** - -- **Residential proxies** with country targeting +### 20+ Browser Actions + +Execute complex browser automation sequences: + +| Action | Description | +|--------|-------------| +| `click` | Click elements with CSS selector | +| `type` | Type text into input fields | +| `goto` | Navigate to URLs | +| `wait` | Wait for specified time | +| `wait_for_selector` | Wait for elements to appear | +| `wait_for_function` | Wait for JavaScript conditions | +| `wait_for_load_state` | Wait for page load states | +| `wait_for_cookie` | Wait for cookies to be set | +| `execute_js` | Run JavaScript code | +| `scroll` | Scroll to elements or page bottom | +| `hover` | Hover over elements | +| `keyboard` | Simulate key presses | +| `dropdown` | Select dropdown options | +| `switch_iframe` | Switch to iframe context | +| `set_viewport` | Change viewport size | +| `if` | Conditional action execution | +| `while` | Loop actions with conditions | +| `solve_captcha` | Solve various captcha types | +| `discord_login` | Discord authentication | +| `remove_iframes` | Remove all iframes | + +### Advanced Proxy Management + +- **Residential proxies** with country targeting (150+ countries) +- **Premium residential proxies** for better success rates - **Datacenter proxies** for fast requests - **Mobile proxies** for mobile-specific content - **Custom proxy** support (SOCKS4/5, HTTP/HTTPS) -- **150+ countries** available for geo-targeting +- **No proxy** option for direct connections + +### Response Options + +- **Screenshots** with custom dimensions and base64/URL output +- **PDF generation** of pages +- **Regex extraction** for pattern matching +- **Field filtering** to reduce response size +- **Base64 encoding** of responses +- **Redirect tracking** for all redirect URLs +- **Inner text extraction** for clean content + +### Request Interception -### βš™οΈ **Flexible Configuration** +- **Abort patterns** to block unwanted requests +- **Domain blacklisting** for blocking specific domains +- **XHR/Fetch interception** to capture API responses +- **Wait for abort detection** before continuing -- **Multiple request types**: Standard HTTP, Browser, Patched Chrome -- **Custom headers & cookies** with field-based or JSON input -- **Session management** for maintaining state across requests -- **POST/PUT/PATCH support** with body or form parameters -- **CSS selector waiting** for dynamic content -- **XHR/Fetch interception** for API data extraction +### AI-Powered Parsing -## πŸš€ Installation +- **Automatic structure extraction** with AI models +- **DeepSeek, GPT-4, GPT-3.5** support +- **Custom structure definitions** for targeted extraction + +## Installation ### Method 1: n8n Community Nodes (Recommended) @@ -86,26 +149,26 @@ git clone https://github.com/Automations-Project/n8n-nodes-scrappey.git cd n8n-nodes-scrappey # Install dependencies -pnpm install +npm install # Build the node -pnpm run build +npm run build # Link for development -pnpm run start:dev +npm run start:dev ``` -## πŸ”§ Configuration +## Configuration ### 1. Set Up Scrappey API Credentials -1. Sign up at [Scrappey.com](/#) to get your API key. +1. Sign up at [Scrappey.com](https://scrappey.com) to get your API key. 2. In n8n, create new **Scrappey API** credentials 3. Enter your API key and optional proxy settings - > 🎯 **Get Started Free!** Try Scrappey with **750 Direct requests** and **150 Browser requests** at no cost. - > [Start your free trial β†’](https://nodes.n8n.community/scrappey/signup) - > - > **Affordable scaling**: For just €100, you can get 600,000 request credits including proxies, captcha etc... + +> **Get Started Free!** Try Scrappey with **750 Direct requests** and **150 Browser requests** at no cost. +> +> **Affordable scaling**: For just €100, you can get 600,000 request credits including proxies, captcha etc... ### 2. Credential Options @@ -113,64 +176,11 @@ pnpm run start:dev - **Custom Proxy** (optional): Your own proxy URL (SOCKS4/5, HTTP/HTTPS) - **Whitelisted Domains** (optional): JSON array of allowed domains for enhanced security -## πŸ“‹ Operation Modes -![Operations Types](.github/assets/operations.jpg) -### πŸ› οΈ Request Builder (Manual) - -**Primary mode for creating custom requests with full control** - -```typescript -// Example configuration options: -{ - "url": "https://example.com/api/data", - "httpMethod": "GET", - "request_type": "Browser", // or "Request", "PatchedChrome" - "whichProxyToUse": "proxyFromScrappey", - "proxyType": "residential", // residential, datacenter, mobile - "customProxyCountry": "UnitedStates", - "antibot": true, - "mouseMovements": true, - "datadome": true -} -``` - -**Use Cases:** - -- Complex form submissions with CAPTCHA solving -- JavaScript-heavy SPA scraping -- API data extraction with anti-bot protection -- Multi-step workflows with session management - -### πŸ” HTTP Auto-Retry - -**Fallback solution for failed n8n HTTP Request nodes** -![Banner](.github/assets/example.svg) -Connect the **error output** (red connector) of a standard HTTP Request node to this operation. It automatically retries the same request through Scrappey's network when blocked by: - -- Cloudflare challenges -- Rate limiting -- IP blocks -- Basic anti-bot measures - - -### 🌐 Browser Auto-Retry - -**Advanced browser-based retry with full anti-bot protection** - -Similar to HTTP Auto-Retry but uses a full browser environment with: - -- Automatic CAPTCHA solving (hCaptcha, reCAPTCHA) -- Mouse movement simulation -- Datadome bypass enabled -- JavaScript execution -- 3 automatic retries - -## πŸ’‘ Usage Examples +## Usage Examples ### Basic Web Scraping ```javascript -// Request Builder - Simple GET request { "operation": "requestBuilder", "url": "https://httpbin.org/get", @@ -179,10 +189,9 @@ Similar to HTTP Auto-Retry but uses a full browser environment with: } ``` -### Advanced Browser Automation +### Browser Automation with Anti-Bot Protection ```javascript -// Browser request with anti-bot protection { "operation": "requestBuilder", "url": "https://protected-site.com", @@ -190,132 +199,173 @@ Similar to HTTP Auto-Retry but uses a full browser environment with: "antibot": true, "mouseMovements": true, "datadome": true, - "cssSelector": ".content-loaded", + "cloudflareBypass": true, "proxyType": "residential", "customProxyCountry": "UnitedStates" } ``` -### Form Submission with CAPTCHA +### Session-Based Workflow ```javascript -// POST request with CAPTCHA solving +// 1. Create session +{ + "operation": "sessionCreate", + "sessionId": "my-session-123", + "sessionTtl": 300 +} + +// 2. Use session for requests { "operation": "requestBuilder", - "url": "https://example.com/submit", - "httpMethod": "request.post", + "url": "https://example.com/login", + "userSession": "my-session-123", + "browserActions": [ + {"type": "type", "cssSelector": "#username", "text": "myuser"}, + {"type": "type", "cssSelector": "#password", "text": "mypass"}, + {"type": "click", "cssSelector": "#login-btn"} + ] +} + +// 3. Destroy session when done +{ + "operation": "sessionDestroy", + "sessionToDestroy": "my-session-123" +} +``` + +### Captcha Solving + +```javascript +{ + "operation": "requestBuilder", + "url": "https://example.com", "request_type": "Browser", - "bodyOrParams": "body_used", - "body_for_request": "{\"name\":\"John\",\"email\":\"john@example.com\"}", - "antibot": true + "antibot": true, + "alwaysLoad": ["recaptcha", "hcaptcha", "turnstile"], + "browserActions": [ + { + "type": "solve_captcha", + "captcha": "turnstile", + "captchaCssSelector": ".cf-turnstile" + } + ] } ``` -### Auto-Retry Fallback +### Screenshot and PDF Generation ```javascript -// Connect HTTP Request node error output to Scrappey node input -// Set operation to "httpRequestAutoRetry" or "httpRequestAutoRetryBrowser" { - "operation": "httpRequestAutoRetry", - "whichProxyToUse": "proxyFromScrappey", - "proxyType": "residential" + "operation": "requestBuilder", + "url": "https://example.com", + "request_type": "Browser", + "screenshot": true, + "screenshotWidth": 1920, + "screenshotHeight": 1080, + "screenshotUpload": true, + "pdf": true } ``` -## πŸ”’ Error Handling +### XHR/Fetch Interception -The node provides detailed error messages for common Scrappey API error codes: +```javascript +{ + "operation": "requestBuilder", + "url": "https://example.com", + "request_type": "Browser", + "interceptXhrFetchRequest": "https://api.example.com/data", + "abortOnDetection": "analytics.com, tracking.js" +} +``` -| Code | Description | Solution | -| --------- | ------------------ | ----------------------------------- | -| CODE-0001 | Server overloaded | Retry after a few minutes | +## Error Handling + +The node provides detailed error messages for all Scrappey API error codes: + +| Code | Description | Solution | +|------|-------------|----------| +| CODE-0001 | Server overloaded | Retry after a few minutes | | CODE-0002 | Cloudflare blocked | Try different proxy or browser mode | -| CODE-0003 | Too many attempts | Wait before retrying | -| CODE-0004 | Invalid command | Check request configuration | -| CODE-0005 | Tunnel failed | Retry with different proxy | +| CODE-0003 | Too many attempts | Wait before retrying | +| CODE-0004 | Invalid command | Check request configuration | +| CODE-0005 | Tunnel failed | Retry with different proxy | +| CODE-0010 | Datadome blocked | Try different proxy | +| CODE-0029 | Too many sessions | Destroy unused sessions | +| CODE-0032 | Turnstile not solved | Try different proxy | +| CODE-0038 | FingerprintJS failed | Retry request | -## πŸ—οΈ Development +## Development ### Building from Source ```bash # Install dependencies -pnpm install +npm install # Development build with watch -pnpm run build:watch +npm run build:watch # Production build -pnpm run build +npm run build # Linting & formatting -pnpm run lint -pnpm run format +npm run lint +npm run format # Type checking -pnpm run type-check - -# Full validation -pnpm run validate +npm run type-check ``` ### Project Structure ``` n8n-nodes-scrappey/ -β”œβ”€β”€ nodes/Scrappey/ # Main node implementation -β”‚ β”œβ”€β”€ Scrappey.node.ts # Node definition and execution -β”‚ β”œβ”€β”€ execute.ts # Operation dispatcher -β”‚ β”œβ”€β”€ RequestMethods.ts # HTTP/Browser request handlers +β”œβ”€β”€ nodes/Scrappey/ +β”‚ β”œβ”€β”€ Scrappey.node.ts # Node definition and execution +β”‚ β”œβ”€β”€ execute.ts # Operation dispatcher +β”‚ β”œβ”€β”€ methods.ts # Request handlers β”‚ β”œβ”€β”€ requestBodyBuilder.ts # Request body construction -β”‚ β”œβ”€β”€ fields.ts # Node field definitions -β”‚ β”œβ”€β”€ GenericFunctions.ts # API integration utilities -β”‚ └── utils.ts # Helper functions -β”œβ”€β”€ credentials/ # Credential definitions +β”‚ β”œβ”€β”€ fields.ts # Node field definitions +β”‚ β”œβ”€β”€ browserActions.ts # Browser action definitions +β”‚ β”œβ”€β”€ types.ts # TypeScript type definitions +β”‚ β”œβ”€β”€ GenericFunctions.ts # API integration utilities +β”‚ β”œβ”€β”€ operators.ts # Operation definitions +β”‚ └── utils.ts # Helper functions +β”œβ”€β”€ credentials/ β”‚ └── ScrappeyApi.credentials.ts -β”œβ”€β”€ scripts/ # Build and deployment scripts -β”œβ”€β”€ .github/workflows/ # CI/CD pipelines -└── dist/ # Built output +β”œβ”€β”€ scripts/ +β”œβ”€β”€ .github/workflows/ +└── dist/ ``` -### CI/CD Pipeline - -This project includes a comprehensive CI/CD setup: +## Contributing -- **Continuous Integration**: Automated testing, linting, and building on every PR -- **Auto-versioning**: Automatic version bumps based on commit messages -- **Automated Releases**: Publishes to GitHub Packages and optionally npm -- **Security Scanning**: CodeQL analysis and dependency auditing -- **Dependabot**: Automated dependency updates +1. Fork the repository +2. Create a feature branch: `git checkout -b feature/amazing-feature` +3. Commit changes: `git commit -m 'feat: add amazing feature'` +4. Push to branch: `git push origin feature/amazing-feature` +5. Open a Pull Request -#### Commit Message Conventions +### Commit Message Conventions - `feat: description` β†’ Minor version bump - `fix: description` β†’ Patch version bump - `BREAKING CHANGE` or `[major]` β†’ Major version bump - `[skip ci]` or `[skip version]` β†’ Skip automation -## 🀝 Contributing - -1. Fork the repository -2. Create a feature branch: `git checkout -b feature/amazing-feature` -3. Commit changes: `git commit -m 'feat: add amazing feature'` -4. Push to branch: `git push origin feature/amazing-feature` -5. Open a Pull Request - -## πŸ“„ License +## License This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details. -## πŸ”— Links +## Links - **Scrappey Website**: [https://scrappey.com](https://scrappey.com) - **Scrappey Documentation**: [https://wiki.scrappey.com](https://wiki.scrappey.com) - **n8n Community**: [https://community.n8n.io](https://community.n8n.io) - **GitHub Issues**: [Report bugs or request features](https://github.com/Automations-Project/n8n-nodes-scrappey/issues) -- **Nskha Discord**: [⚠️Incative community](https://nskha.com/discord) --- -**Made with ❀️ for the n8n community** +**Made with love for the n8n community** diff --git a/nodes/Scrappey/GenericFunctions.ts b/nodes/Scrappey/GenericFunctions.ts index 3833360..34ab64a 100644 --- a/nodes/Scrappey/GenericFunctions.ts +++ b/nodes/Scrappey/GenericFunctions.ts @@ -228,12 +228,17 @@ export async function genericHttpRequest( message: 'Keyboard action value not found', details: 'Keyboard action value not found.', }, - 'CODE-0037': { - code: 'CODE-0037', - message: 'Datadome was blocked', - details: 'Datadome was blocked, please try again with a different proxy.', - }, - 'CODE-10000': { + 'CODE-0037': { + code: 'CODE-0037', + message: 'Datadome was blocked', + details: 'Datadome was blocked, please try again with a different proxy.', + }, + 'CODE-0038': { + code: 'CODE-0038', + message: 'Could not solve FingerprintJS challenge', + details: 'The FingerprintJS challenge could not be solved, please try again.', + }, + 'CODE-10000': { code: 'CODE-10000', message: 'Unknown error - has to be specified', details: 'An unknown error occurred and needs to be specified.', diff --git a/nodes/Scrappey/Scrappey.node.ts b/nodes/Scrappey/Scrappey.node.ts index e8eb78c..7779097 100644 --- a/nodes/Scrappey/Scrappey.node.ts +++ b/nodes/Scrappey/Scrappey.node.ts @@ -1,5 +1,5 @@ import { INodeType, INodeTypeDescription } from 'n8n-workflow'; -import { AdvancedSettingsForBrowser, publicFields } from './fields'; +import { allFields } from './fields'; import { executeScrappey } from './execute'; import { scrappeyOperators } from './operators'; import { IExecuteFunctions, INodeExecutionData, IDataObject, NodeOperationError } from 'n8n-workflow'; @@ -73,7 +73,7 @@ export class Scrappey implements INodeType { displayName: 'Scrappey', name: 'scrappey', icon: 'file:Scrappey.svg', - group: ['web-scraping'], + group: ['transform'], version: 1, subtitle: '={{ { requestBuilder: "πŸ› οΈ Request Builder", httpRequestAutoRetry: "πŸ” Auto β€’ HTTP Mode", httpRequestAutoRetryBrowser: "🌐 Auto β€’ Browser Mode" }[$parameter["scrappeyOperations"]] }}', @@ -97,6 +97,6 @@ export class Scrappey implements INodeType { 'Content-Type': 'application/json', }, }, - properties: [...scrappeyOperators, ...publicFields, ...AdvancedSettingsForBrowser], + properties: [...scrappeyOperators, ...allFields], }; } \ No newline at end of file diff --git a/nodes/Scrappey/browserActions.ts b/nodes/Scrappey/browserActions.ts new file mode 100644 index 0000000..fafb8f2 --- /dev/null +++ b/nodes/Scrappey/browserActions.ts @@ -0,0 +1,861 @@ +/* eslint-disable n8n-nodes-base/node-param-display-name-miscased */ +/* eslint-disable n8n-nodes-base/node-param-description-excess-final-period */ +/* eslint-disable n8n-nodes-base/node-param-options-type-unsorted-items */ +import { INodeProperties } from 'n8n-workflow'; + +// ============================================ +// Browser Actions Field Definitions +// ============================================ + +export const browserActionsFields: INodeProperties[] = [ + // Main Browser Actions Collection + { + displayName: 'Browser Actions', + name: 'browserActions', + type: 'fixedCollection', + default: {}, + placeholder: 'Add Browser Action', + description: 'Define a sequence of browser actions to execute on the page', + typeOptions: { + multipleValues: true, + sortable: true, + }, + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + options: [ + { + name: 'actions', + displayName: 'Actions', + values: [ + // Action Type Selector + { + displayName: 'Action Type', + name: 'type', + type: 'options', + default: 'click', + options: [ + { name: 'Click', value: 'click', description: 'Click on an element using CSS selector' }, + { name: 'Type', value: 'type', description: 'Type text into an input field' }, + { name: 'Navigate (goto)', value: 'goto', description: 'Navigate to a new URL' }, + { name: 'Wait', value: 'wait', description: 'Wait for a specified time' }, + { name: 'Wait for Selector', value: 'wait_for_selector', description: 'Wait for an element to appear' }, + { name: 'Wait for Function', value: 'wait_for_function', description: 'Wait for JavaScript condition' }, + { name: 'Wait for Load State', value: 'wait_for_load_state', description: 'Wait for page load state' }, + { name: 'Wait for Cookie', value: 'wait_for_cookie', description: 'Wait for a cookie to be set' }, + { name: 'Execute JavaScript', value: 'execute_js', description: 'Execute JavaScript code on the page' }, + { name: 'Scroll', value: 'scroll', description: 'Scroll to an element or page bottom' }, + { name: 'Hover', value: 'hover', description: 'Hover over an element' }, + { name: 'Keyboard', value: 'keyboard', description: 'Simulate keyboard key presses' }, + { name: 'Dropdown', value: 'dropdown', description: 'Select an option from a dropdown' }, + { name: 'Switch Iframe', value: 'switch_iframe', description: 'Switch context to an iframe' }, + { name: 'Set Viewport', value: 'set_viewport', description: 'Change browser viewport size' }, + { name: 'Conditional (if)', value: 'if', description: 'Execute actions conditionally' }, + { name: 'Loop (while)', value: 'while', description: 'Loop actions while condition is true' }, + { name: 'Solve Captcha', value: 'solve_captcha', description: 'Solve various captcha types' }, + { name: 'Discord Login', value: 'discord_login', description: 'Login to Discord with token' }, + { name: 'Remove Iframes', value: 'remove_iframes', description: 'Remove all iframes from page' }, + ], + }, + // When to Execute + { + displayName: 'When', + name: 'when', + type: 'options', + default: 'afterload', + options: [ + { name: 'After Page Load', value: 'afterload' }, + { name: 'Before Page Load', value: 'beforeload' }, + ], + description: 'When to execute this action', + }, + // Ignore Errors + { + displayName: 'Ignore Errors', + name: 'ignoreErrors', + type: 'boolean', + default: false, + description: 'Whether to continue execution if this action fails', + }, + // Timeout + { + displayName: 'Timeout (ms)', + name: 'timeout', + type: 'number', + default: 60000, + description: 'Timeout in milliseconds for this action', + }, + + // ============ CLICK ACTION FIELDS ============ + { + displayName: 'CSS Selector', + name: 'cssSelector', + type: 'string', + default: '', + placeholder: '#submit-button', + description: 'CSS selector of the element to interact with', + displayOptions: { + show: { + type: ['click', 'type', 'wait_for_selector', 'scroll', 'hover', 'dropdown', 'switch_iframe'], + }, + }, + }, + { + displayName: 'Wait After (ms)', + name: 'wait', + type: 'number', + default: 0, + description: 'Time to wait after performing the action', + displayOptions: { + show: { + type: ['click', 'type', 'goto', 'keyboard', 'dropdown', 'set_viewport', 'discord_login'], + }, + }, + }, + { + displayName: 'Wait for Selector', + name: 'waitForSelector', + type: 'string', + default: '', + placeholder: '.success-message', + description: 'Wait for this selector to appear after the action', + displayOptions: { + show: { + type: ['click', 'keyboard', 'dropdown'], + }, + }, + }, + { + displayName: 'Direct Click', + name: 'direct', + type: 'boolean', + default: false, + description: 'Whether to use direct click instead of cursor simulation', + displayOptions: { + show: { + type: ['click', 'type', 'discord_login'], + }, + }, + }, + + // ============ TYPE ACTION FIELDS ============ + { + displayName: 'Text to Type', + name: 'text', + type: 'string', + default: '', + placeholder: 'Enter your text here', + description: 'The text to type into the input field', + displayOptions: { + show: { + type: ['type'], + }, + }, + }, + + // ============ GOTO ACTION FIELDS ============ + { + displayName: 'URL', + name: 'url', + type: 'string', + default: '', + placeholder: 'https://example.com/page2', + description: 'The URL to navigate to', + displayOptions: { + show: { + type: ['goto'], + }, + }, + }, + + // ============ WAIT ACTION FIELDS ============ + { + displayName: 'Wait Time (ms)', + name: 'waitTime', + type: 'number', + default: 1000, + description: 'Time to wait in milliseconds', + displayOptions: { + show: { + type: ['wait'], + }, + }, + }, + + // ============ WAIT FOR FUNCTION FIELDS ============ + { + displayName: 'JavaScript Code', + name: 'code', + type: 'string', + default: '', + placeholder: 'window.dataLoaded === true', + description: 'JavaScript code that returns truthy when condition is met', + typeOptions: { + rows: 3, + }, + displayOptions: { + show: { + type: ['wait_for_function', 'execute_js'], + }, + }, + }, + { + displayName: "Don't Return Value", + name: 'dontReturnValue', + type: 'boolean', + default: false, + description: 'Whether to skip capturing the return value', + displayOptions: { + show: { + type: ['execute_js'], + }, + }, + }, + + // ============ WAIT FOR LOAD STATE FIELDS ============ + { + displayName: 'Load State', + name: 'waitForLoadState', + type: 'options', + default: 'networkidle', + options: [ + { name: 'DOM Content Loaded', value: 'domcontentloaded' }, + { name: 'Network Idle', value: 'networkidle' }, + { name: 'Full Load', value: 'load' }, + ], + displayOptions: { + show: { + type: ['wait_for_load_state'], + }, + }, + }, + + // ============ WAIT FOR COOKIE FIELDS ============ + { + displayName: 'Cookie Name', + name: 'cookieName', + type: 'string', + default: '', + placeholder: 'session_id', + description: 'Name of the cookie to wait for', + displayOptions: { + show: { + type: ['wait_for_cookie'], + }, + }, + }, + { + displayName: 'Cookie Value', + name: 'cookieValue', + type: 'string', + default: '', + placeholder: 'optional-expected-value', + description: 'Optional expected value of the cookie', + displayOptions: { + show: { + type: ['wait_for_cookie'], + }, + }, + }, + { + displayName: 'Cookie Domain', + name: 'cookieDomain', + type: 'string', + default: '', + placeholder: 'example.com', + description: 'Domain the cookie should be set on', + displayOptions: { + show: { + type: ['wait_for_cookie'], + }, + }, + }, + { + displayName: 'Poll Interval (ms)', + name: 'pollIntervalMs', + type: 'number', + default: 200, + description: 'How often to check for the cookie', + displayOptions: { + show: { + type: ['wait_for_cookie'], + }, + }, + }, + + // ============ SCROLL ACTION FIELDS ============ + { + displayName: 'Repeat', + name: 'repeat', + type: 'number', + default: 1, + description: 'Number of times to repeat the scroll', + displayOptions: { + show: { + type: ['scroll'], + }, + }, + }, + { + displayName: 'Delay Between Scrolls (ms)', + name: 'delayMs', + type: 'number', + default: 100, + description: 'Delay between scroll actions', + displayOptions: { + show: { + type: ['scroll'], + }, + }, + }, + + // ============ KEYBOARD ACTION FIELDS ============ + { + displayName: 'Key', + name: 'value', + type: 'options', + default: 'enter', + options: [ + { name: 'Enter', value: 'enter' }, + { name: 'Tab', value: 'tab' }, + { name: 'Space', value: 'space' }, + { name: 'Arrow Down', value: 'arrowdown' }, + { name: 'Arrow Up', value: 'arrowup' }, + { name: 'Arrow Left', value: 'arrowleft' }, + { name: 'Arrow Right', value: 'arrowright' }, + { name: 'Backspace', value: 'backspace' }, + { name: 'Clear', value: 'clear' }, + ], + displayOptions: { + show: { + type: ['keyboard'], + }, + }, + }, + { + displayName: 'Focus Element First', + name: 'keyboardCssSelector', + type: 'string', + default: '', + placeholder: '#input-field', + description: 'CSS selector of element to focus before pressing key', + displayOptions: { + show: { + type: ['keyboard'], + }, + }, + }, + + // ============ DROPDOWN ACTION FIELDS ============ + { + displayName: 'Selection Method', + name: 'dropdownMethod', + type: 'options', + default: 'value', + options: [ + { name: 'By Value', value: 'value' }, + { name: 'By Index', value: 'index' }, + ], + displayOptions: { + show: { + type: ['dropdown'], + }, + }, + }, + { + displayName: 'Option Value', + name: 'dropdownValue', + type: 'string', + default: '', + placeholder: 'US', + description: 'The value of the option to select', + displayOptions: { + show: { + type: ['dropdown'], + dropdownMethod: ['value'], + }, + }, + }, + { + displayName: 'Option Index', + name: 'dropdownIndex', + type: 'number', + default: 0, + description: 'The index of the option to select (0-based)', + displayOptions: { + show: { + type: ['dropdown'], + dropdownMethod: ['index'], + }, + }, + }, + + // ============ SET VIEWPORT FIELDS ============ + { + displayName: 'Width', + name: 'viewportWidth', + type: 'number', + default: 1280, + description: 'Viewport width in pixels', + displayOptions: { + show: { + type: ['set_viewport'], + }, + }, + }, + { + displayName: 'Height', + name: 'viewportHeight', + type: 'number', + default: 1024, + description: 'Viewport height in pixels', + displayOptions: { + show: { + type: ['set_viewport'], + }, + }, + }, + + // ============ IF/WHILE ACTION FIELDS ============ + { + displayName: 'Condition', + name: 'condition', + type: 'string', + default: '', + placeholder: "document.querySelector('.captcha') !== null", + description: 'JavaScript condition to evaluate', + typeOptions: { + rows: 2, + }, + displayOptions: { + show: { + type: ['if', 'while'], + }, + }, + }, + { + displayName: 'Then Actions (JSON)', + name: 'thenActions', + type: 'string', + default: '[]', + description: 'Array of actions to execute if condition is true (JSON format)', + typeOptions: { + rows: 4, + }, + displayOptions: { + show: { + type: ['if', 'while'], + }, + }, + }, + { + displayName: 'Else Actions (JSON)', + name: 'orActions', + type: 'string', + default: '[]', + description: 'Array of actions to execute if condition is false (JSON format)', + typeOptions: { + rows: 4, + }, + displayOptions: { + show: { + type: ['if'], + }, + }, + }, + { + displayName: 'Max Iterations', + name: 'maxAttempts', + type: 'number', + default: 10, + description: 'Maximum number of loop iterations to prevent infinite loops', + displayOptions: { + show: { + type: ['while'], + }, + }, + }, + + // ============ SOLVE CAPTCHA FIELDS ============ + { + displayName: 'Captcha Type', + name: 'captcha', + type: 'options', + default: 'turnstile', + options: [ + { name: 'Cloudflare Turnstile', value: 'turnstile' }, + { name: 'reCAPTCHA v2', value: 'recaptcha' }, + { name: 'reCAPTCHA v2 (with sitekey)', value: 'recaptchav2' }, + { name: 'reCAPTCHA v3', value: 'recaptchav3' }, + { name: 'hCaptcha', value: 'hcaptcha' }, + { name: 'hCaptcha (with sitekey)', value: 'hcaptcha_inside' }, + { name: 'hCaptcha Enterprise', value: 'hcaptcha_enterprise_inside' }, + { name: 'FunCaptcha (Arkose Labs)', value: 'funcaptcha' }, + { name: 'PerimeterX', value: 'perimeterx' }, + { name: 'MTCaptcha', value: 'mtcaptcha' }, + { name: 'MTCaptcha Isolated', value: 'mtcaptchaisolated' }, + { name: 'v4Guard', value: 'v4guard' }, + { name: 'Custom (Image)', value: 'custom' }, + { name: 'FingerprintJS', value: 'fingerprintjscom' }, + { name: 'FingerprintJS CurseForge', value: 'fingerprintjs_curseforge' }, + ], + displayOptions: { + show: { + type: ['solve_captcha'], + }, + }, + }, + { + displayName: 'Site Key', + name: 'sitekey', + type: 'string', + default: '', + placeholder: '0x4AAAAAAA...', + description: 'The captcha site key', + displayOptions: { + show: { + type: ['solve_captcha'], + }, + }, + }, + { + displayName: 'Captcha CSS Selector', + name: 'captchaCssSelector', + type: 'string', + default: '', + placeholder: '.cf-turnstile', + description: 'CSS selector of the captcha container', + displayOptions: { + show: { + type: ['solve_captcha'], + }, + }, + }, + { + displayName: 'Website URL', + name: 'websiteUrl', + type: 'string', + default: '', + placeholder: 'https://example.com', + description: 'The website URL for captcha solving', + displayOptions: { + show: { + type: ['solve_captcha'], + }, + }, + }, + { + displayName: 'Website Key', + name: 'websiteKey', + type: 'string', + default: '', + description: 'Alternative website key for captcha', + displayOptions: { + show: { + type: ['solve_captcha'], + }, + }, + }, + { + displayName: 'Input Selector', + name: 'inputSelector', + type: 'string', + default: '', + placeholder: '#captcha-input', + description: 'CSS selector of the input to fill with captcha token', + displayOptions: { + show: { + type: ['solve_captcha'], + }, + }, + }, + { + displayName: 'Click Selector', + name: 'clickSelector', + type: 'string', + default: '', + placeholder: '#submit', + description: 'CSS selector of button to click after solving', + displayOptions: { + show: { + type: ['solve_captcha'], + }, + }, + }, + { + displayName: 'Iframe Selector', + name: 'iframeSelector', + type: 'string', + default: '', + placeholder: '#captcha-iframe', + description: 'CSS selector of captcha iframe if applicable', + displayOptions: { + show: { + type: ['solve_captcha'], + }, + }, + }, + { + displayName: 'Action', + name: 'captchaAction', + type: 'string', + default: '', + description: 'reCAPTCHA action parameter', + displayOptions: { + show: { + type: ['solve_captcha'], + captcha: ['recaptchav3'], + }, + }, + }, + { + displayName: 'Invisible Captcha', + name: 'invisible', + type: 'boolean', + default: false, + description: 'Whether the captcha is invisible', + displayOptions: { + show: { + type: ['solve_captcha'], + }, + }, + }, + { + displayName: 'Reset Before Solving', + name: 'captchaReset', + type: 'boolean', + default: false, + description: 'Whether to reset captcha state before solving', + displayOptions: { + show: { + type: ['solve_captcha'], + }, + }, + }, + { + displayName: 'Fast Mode', + name: 'captchaFast', + type: 'boolean', + default: false, + description: 'Whether to use fast solving mode', + displayOptions: { + show: { + type: ['solve_captcha'], + }, + }, + }, + { + displayName: 'Base64 Image', + name: 'base64Image', + type: 'string', + default: '', + description: 'Base64 encoded image for custom captcha', + typeOptions: { + rows: 3, + }, + displayOptions: { + show: { + type: ['solve_captcha'], + captcha: ['custom'], + }, + }, + }, + + // ============ DISCORD LOGIN FIELDS ============ + { + displayName: 'Discord Token', + name: 'token', + type: 'string', + default: '', + typeOptions: { + password: true, + }, + description: 'Discord authentication token', + displayOptions: { + show: { + type: ['discord_login'], + }, + }, + }, + ], + }, + ], + }, +]; + +// Helper function to build browser actions array from n8n UI data +export function buildBrowserActionsArray(actionsData: any): any[] { + if (!actionsData || !actionsData.actions || !Array.isArray(actionsData.actions)) { + return []; + } + + return actionsData.actions.map((action: any) => { + const baseAction: any = { + type: action.type, + }; + + // Add optional common properties if they have non-default values + if (action.when && action.when !== 'afterload') { + baseAction.when = action.when; + } + if (action.ignoreErrors === true) { + baseAction.ignoreErrors = true; + } + if (action.timeout && action.timeout !== 60000) { + baseAction.timeout = action.timeout; + } + + switch (action.type) { + case 'click': + baseAction.cssSelector = action.cssSelector; + if (action.wait) baseAction.wait = action.wait; + if (action.waitForSelector) baseAction.waitForSelector = action.waitForSelector; + if (action.direct) baseAction.direct = action.direct; + break; + + case 'type': + baseAction.cssSelector = action.cssSelector; + baseAction.text = action.text; + if (action.wait) baseAction.wait = action.wait; + if (action.direct) baseAction.direct = action.direct; + break; + + case 'goto': + baseAction.url = action.url; + if (action.wait) baseAction.wait = action.wait; + break; + + case 'wait': + baseAction.wait = action.waitTime || 1000; + break; + + case 'wait_for_selector': + baseAction.cssSelector = action.cssSelector; + break; + + case 'wait_for_function': + baseAction.code = action.code; + break; + + case 'wait_for_load_state': + baseAction.waitForLoadState = action.waitForLoadState; + break; + + case 'wait_for_cookie': + baseAction.cookieName = action.cookieName; + if (action.cookieValue) baseAction.cookieValue = action.cookieValue; + if (action.cookieDomain) baseAction.cookieDomain = action.cookieDomain; + if (action.pollIntervalMs) baseAction.pollIntervalMs = action.pollIntervalMs; + break; + + case 'execute_js': + baseAction.code = action.code; + if (action.dontReturnValue) baseAction.dontReturnValue = action.dontReturnValue; + break; + + case 'scroll': + if (action.cssSelector) baseAction.cssSelector = action.cssSelector; + if (action.repeat) baseAction.repeat = action.repeat; + if (action.delayMs) baseAction.delayMs = action.delayMs; + break; + + case 'hover': + baseAction.cssSelector = action.cssSelector; + break; + + case 'keyboard': + baseAction.value = action.value; + if (action.keyboardCssSelector) baseAction.cssSelector = action.keyboardCssSelector; + if (action.wait) baseAction.wait = action.wait; + if (action.waitForSelector) baseAction.waitForSelector = action.waitForSelector; + break; + + case 'dropdown': + baseAction.cssSelector = action.cssSelector; + if (action.dropdownMethod === 'index') { + baseAction.index = action.dropdownIndex; + } else { + baseAction.value = action.dropdownValue; + } + if (action.wait) baseAction.wait = action.wait; + if (action.waitForSelector) baseAction.waitForSelector = action.waitForSelector; + break; + + case 'switch_iframe': + baseAction.cssSelector = action.cssSelector; + break; + + case 'set_viewport': + if (action.viewportWidth) baseAction.width = action.viewportWidth; + if (action.viewportHeight) baseAction.height = action.viewportHeight; + if (action.wait) baseAction.wait = action.wait; + break; + + case 'if': + baseAction.condition = action.condition; + try { + baseAction.then = JSON.parse(action.thenActions || '[]'); + } catch (error) { + throw new Error(`Invalid JSON in 'Then Actions' for 'if' action: ${error instanceof Error ? error.message : 'Unknown error'}. Please provide valid JSON array format.`); + } + if (action.orActions && action.orActions.trim() !== '' && action.orActions.trim() !== '[]') { + try { + const orActions = JSON.parse(action.orActions); + if (Array.isArray(orActions) && orActions.length > 0) { + baseAction.or = orActions; + } + } catch (error) { + throw new Error(`Invalid JSON in 'Else Actions' for 'if' action: ${error instanceof Error ? error.message : 'Unknown error'}. Please provide valid JSON array format.`); + } + } + break; + + case 'while': + baseAction.condition = action.condition; + try { + baseAction.then = JSON.parse(action.thenActions || '[]'); + } catch (error) { + throw new Error(`Invalid JSON in 'Then Actions' for 'while' action: ${error instanceof Error ? error.message : 'Unknown error'}. Please provide valid JSON array format.`); + } + if (action.maxAttempts) baseAction.maxAttempts = action.maxAttempts; + break; + + case 'solve_captcha': + baseAction.captcha = action.captcha; + const captchaData: any = {}; + if (action.sitekey) captchaData.sitekey = action.sitekey; + if (action.captchaAction) captchaData.action = action.captchaAction; + if (action.invisible) captchaData.invisible = action.invisible; + if (action.base64Image) captchaData.base64Image = action.base64Image; + if (action.captchaCssSelector) captchaData.cssSelector = action.captchaCssSelector; + if (action.captchaReset) captchaData.reset = action.captchaReset; + if (action.captchaFast) captchaData.fast = action.captchaFast; + + if (Object.keys(captchaData).length > 0) { + baseAction.captchaData = captchaData; + } + if (action.websiteUrl) baseAction.websiteUrl = action.websiteUrl; + if (action.websiteKey) baseAction.websiteKey = action.websiteKey; + if (action.inputSelector) baseAction.inputSelector = action.inputSelector; + if (action.clickSelector) baseAction.clickSelector = action.clickSelector; + if (action.iframeSelector) baseAction.iframeSelector = action.iframeSelector; + break; + + case 'discord_login': + baseAction.token = action.token; + if (action.direct) baseAction.direct = action.direct; + if (action.wait) baseAction.wait = action.wait; + break; + + case 'remove_iframes': + // No additional properties needed + break; + } + + return baseAction; + }); +} + diff --git a/nodes/Scrappey/execute.ts b/nodes/Scrappey/execute.ts index f5391d4..c891637 100644 --- a/nodes/Scrappey/execute.ts +++ b/nodes/Scrappey/execute.ts @@ -1,18 +1,45 @@ -import { AutoRetryTypeBrowser, PostRequest, AutoRetryTypeRequest } from './methods'; +import { + AutoRetryTypeBrowser, + PostRequest, + AutoRetryTypeRequest, + SessionCreate, + SessionDestroy, + SessionList, + SessionActive, + WebSocketCreate, +} from './methods'; import { IExecuteFunctions, NodeOperationError } from 'n8n-workflow'; export async function executeScrappey(this: IExecuteFunctions, operation: string, itemIndex: number = 0) { switch (operation) { + // Request Builder case 'requestBuilder': return await PostRequest.call(this, itemIndex); + + // Auto-Retry Operations case 'httpRequestAutoRetry': return await AutoRetryTypeRequest.call(this, itemIndex); case 'httpRequestAutoRetryBrowser': return await AutoRetryTypeBrowser.call(this, itemIndex); + + // Session Management + case 'sessionCreate': + return await SessionCreate.call(this, itemIndex); + case 'sessionDestroy': + return await SessionDestroy.call(this, itemIndex); + case 'sessionList': + return await SessionList.call(this, itemIndex); + case 'sessionActive': + return await SessionActive.call(this, itemIndex); + + // WebSocket + case 'websocketCreate': + return await WebSocketCreate.call(this, itemIndex); + default: throw new NodeOperationError(this.getNode(), `Operation "${operation}" is not supported`, { description: 'Please select a valid operation from the available options.', - itemIndex // item index in error for better debugging + itemIndex, }); } -} \ No newline at end of file +} diff --git a/nodes/Scrappey/fields.ts b/nodes/Scrappey/fields.ts index d48b59e..27ece06 100644 --- a/nodes/Scrappey/fields.ts +++ b/nodes/Scrappey/fields.ts @@ -3,9 +3,126 @@ /* eslint-disable n8n-nodes-base/node-param-options-type-unsorted-items */ /* eslint-disable n8n-nodes-base/node-param-required-false */ import { INodeProperties } from 'n8n-workflow'; -import { Static_Country_Proxies , generateUUID} from './utils'; +import { Static_Country_Proxies, generateUUID } from './utils'; +import { browserActionsFields } from './browserActions'; + +// ============================================ +// Session Operation Fields +// ============================================ + +export const sessionFields: INodeProperties[] = [ + // Session Create Fields + { + displayName: 'Session ID', + name: 'sessionId', + type: 'string', + default: '', + placeholder: 'my-session-123', + hint: 'Optional custom session ID. If not provided, one will be generated.', + displayOptions: { + show: { + scrappeyOperations: ['sessionCreate'], + }, + }, + }, + { + displayName: 'Session TTL (seconds)', + name: 'sessionTtl', + type: 'number', + default: 180, + hint: 'Time-to-live for the session in seconds', + displayOptions: { + show: { + scrappeyOperations: ['sessionCreate', 'websocketCreate'], + }, + }, + }, + { + displayName: 'Headless Mode', + name: 'headless', + type: 'options', + default: 'true', + options: [ + { name: 'Headless (No UI)', value: 'true' }, + { name: 'Headful (With UI)', value: 'false' }, + ], + hint: 'Whether to run browser without visible UI', + displayOptions: { + show: { + scrappeyOperations: ['sessionCreate', 'websocketCreate'], + }, + }, + }, + { + displayName: 'GeoIP Detection', + name: 'geoip', + type: 'options', + default: 'false', + options: [ + { name: 'Disabled', value: 'false' }, + { name: 'Enabled', value: 'true' }, + ], + hint: 'Enable GeoIP-based locale detection', + displayOptions: { + show: { + scrappeyOperations: ['sessionCreate', 'websocketCreate'], + }, + }, + }, + + // Session Destroy Fields + { + displayName: 'Session to Destroy', + name: 'sessionToDestroy', + type: 'string', + default: '', + required: true, + placeholder: 'session-id-to-destroy', + hint: 'The session ID to destroy', + displayOptions: { + show: { + scrappeyOperations: ['sessionDestroy'], + }, + }, + }, + + // Session List Fields + { + displayName: 'User ID', + name: 'userId', + type: 'number', + default: 0, + hint: 'Optional user ID to filter sessions', + displayOptions: { + show: { + scrappeyOperations: ['sessionList'], + }, + }, + }, + + // Session Active Check Fields + { + displayName: 'Session to Check', + name: 'sessionToCheck', + type: 'string', + default: '', + required: true, + placeholder: 'session-id-to-check', + hint: 'The session ID to check', + displayOptions: { + show: { + scrappeyOperations: ['sessionActive'], + }, + }, + }, +]; + +// ============================================ +// Main Public Fields +// ============================================ export const publicFields: INodeProperties[] = [ + // Auto-retry notice { displayName: '⚠️This is a fallback solution and works only if the previous node is an HTTP node.

🚦 For best results, connect the error path of the HTTP node to this operation.

πŸ‘‰ See the example workflow.', @@ -19,6 +136,7 @@ export const publicFields: INodeProperties[] = [ }, }, + // URL Field { displayName: 'URL', name: 'url', @@ -33,6 +151,8 @@ export const publicFields: INodeProperties[] = [ }, }, }, + + // HTTP Method { displayName: 'HTTP Method', name: 'httpMethod', @@ -40,30 +160,12 @@ export const publicFields: INodeProperties[] = [ default: 'request.get', hint: 'HTTP method to use with the URL', options: [ - { - name: 'GET', - value: 'request.get', - }, - { - name: 'POST', - value: 'request.post', - }, - { - name: 'PUT', - value: 'request.put', - }, - { - name: 'DELETE', - value: 'request.delete', - }, - { - name: 'PATCH', - value: 'request.patch', - }, - { - name: 'PUBLISH', - value: 'request.publish', - }, + { name: 'GET', value: 'request.get' }, + { name: 'POST', value: 'request.post' }, + { name: 'PUT', value: 'request.put' }, + { name: 'DELETE', value: 'request.delete' }, + { name: 'PATCH', value: 'request.patch' }, + { name: 'PUBLISH', value: 'request.publish' }, ], displayOptions: { show: { @@ -71,24 +173,17 @@ export const publicFields: INodeProperties[] = [ }, }, }, + + // Request Type { displayName: 'Request Type', name: 'request_type', type: 'options', default: 'Request', options: [ - { - name: 'Browser', - value: 'Browser', - }, - { - name: 'Request', - value: 'Request', - }, - { - name: 'Patched Chrome Browser', - value: 'PatchedChrome', - }, + { name: 'Browser', value: 'Browser', description: 'Full browser with JavaScript execution' }, + { name: 'Request', value: 'Request', description: 'HTTP-only mode (faster, no browser)' }, + { name: 'Patched Chrome Browser', value: 'PatchedChrome', description: 'Chrome with enhanced anti-detection' }, ], displayOptions: { show: { @@ -96,6 +191,26 @@ export const publicFields: INodeProperties[] = [ }, }, }, + + // Referer + { + displayName: 'Referer', + name: 'referer', + type: 'string', + default: '', + placeholder: 'https://google.com', + hint: 'HTTP Referer header value', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + }, + }, + }, + + // ============================================ + // Proxy Configuration + // ============================================ + { displayName: 'Which Proxy To Use', name: 'whichProxyToUse', @@ -116,6 +231,11 @@ export const publicFields: INodeProperties[] = [ value: 'proxyFromScrappey', description: 'Use the proxy defined in Scrappey for this request', }, + { + name: 'No Proxy', + value: 'noProxy', + description: 'Disable proxy usage for this request', + }, ], default: 'proxyFromCredentials', displayOptions: { @@ -124,6 +244,8 @@ export const publicFields: INodeProperties[] = [ 'requestBuilder', 'httpRequestAutoRetry', 'httpRequestAutoRetryBrowser', + 'sessionCreate', + 'websocketCreate', ], }, }, @@ -135,22 +257,10 @@ export const publicFields: INodeProperties[] = [ default: '', hint: 'Proxy type to use for the request', options: [ - { - name: 'Residential proxy', - value: '', - }, - { - name: 'Premium residential proxy', - value: 'premiumProxy', - }, - { - name: 'Datacenter proxy', - value: 'datacenter', - }, - { - name: 'Mobile proxy', - value: 'mobileProxy', - }, + { name: 'Residential proxy', value: '' }, + { name: 'Premium residential proxy', value: 'premiumProxy' }, + { name: 'Datacenter proxy', value: 'datacenter' }, + { name: 'Mobile proxy', value: 'mobileProxy' }, ], displayOptions: { show: { @@ -158,6 +268,8 @@ export const publicFields: INodeProperties[] = [ 'requestBuilder', 'httpRequestAutoRetry', 'httpRequestAutoRetryBrowser', + 'sessionCreate', + 'websocketCreate', ], whichProxyToUse: ['proxyFromScrappey'], }, @@ -176,6 +288,8 @@ export const publicFields: INodeProperties[] = [ 'requestBuilder', 'httpRequestAutoRetry', 'httpRequestAutoRetryBrowser', + 'sessionCreate', + 'websocketCreate', ], whichProxyToUse: ['proxyFromScrappey'], }, @@ -195,6 +309,8 @@ export const publicFields: INodeProperties[] = [ 'requestBuilder', 'httpRequestAutoRetry', 'httpRequestAutoRetryBrowser', + 'sessionCreate', + 'websocketCreate', ], customProxyCountryBoolean: [true], }, @@ -209,12 +325,30 @@ export const publicFields: INodeProperties[] = [ required: false, displayOptions: { show: { - scrappeyOperations: ['requestBuilder'], + scrappeyOperations: ['requestBuilder', 'sessionCreate', 'websocketCreate'], proxyType: [''], whichProxyToUse: ['proxyFromScrappey'], }, }, }, + { + displayName: "Don't Change Proxy", + name: 'dontChangeProxy', + type: 'boolean', + default: false, + hint: 'Keep the same proxy for session reuse', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + whichProxyToUse: ['proxyFromScrappey'], + }, + }, + }, + + // ============================================ + // Body/Params Configuration + // ============================================ + { displayName: 'Body OR Params?', name: 'bodyOrParams', @@ -222,14 +356,8 @@ export const publicFields: INodeProperties[] = [ default: 'params_used', hint: 'Select whether to use Body or Params for the request', options: [ - { - name: 'Body', - value: 'body_used', - }, - { - name: 'Params', - value: 'params_used', - }, + { name: 'Body', value: 'body_used' }, + { name: 'Params', value: 'params_used' }, ], displayOptions: { show: { @@ -289,6 +417,11 @@ export const publicFields: INodeProperties[] = [ editor: 'jsEditor', }, }, + + // ============================================ + // Session Management + // ============================================ + { displayName: 'User Session', name: 'userSession', @@ -305,6 +438,22 @@ export const publicFields: INodeProperties[] = [ loadOptionsDependsOn: ['refreshSession'], }, }, + { + displayName: 'Close After Use', + name: 'closeAfterUse', + type: 'boolean', + default: false, + hint: 'Automatically close the session after the request', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + }, + }, + }, + + // ============================================ + // Headers Configuration + // ============================================ { displayName: 'Headers Input Method', @@ -313,14 +462,8 @@ export const publicFields: INodeProperties[] = [ default: 'fields', hint: 'Choose how to input headers', options: [ - { - name: 'Using Fields Below', - value: 'fields', - }, - { - name: 'Using JSON', - value: 'json', - }, + { name: 'Using Fields Below', value: 'fields' }, + { name: 'Using JSON', value: 'json' }, ], required: false, displayOptions: { @@ -384,6 +527,11 @@ export const publicFields: INodeProperties[] = [ rows: 4, }, }, + + // ============================================ + // Cookies Configuration + // ============================================ + { displayName: 'One String Cookie', name: 'oneStringCookie', @@ -405,7 +553,6 @@ export const publicFields: INodeProperties[] = [ placeholder: 'sessionid=abc123;csrftoken=xyz456;theme=light', hint: 'Cookie string to use for the request (format: name=value;name2=value2)', required: false, - displayOptions: { show: { scrappeyOperations: ['requestBuilder'], @@ -454,18 +601,27 @@ export const publicFields: INodeProperties[] = [ }, }, { - displayName: 'Datadome', - name: 'datadome', - type: 'boolean', - default: false, - hint: 'Enable Datadome protection bypass. Get the best results by selecting a preconfigured option. Advanced includes all common antibot protections.', + displayName: 'Cookie Jar (JSON)', + name: 'cookiejar', + type: 'string', + default: '', + placeholder: '[{"name": "session", "value": "abc", "domain": "example.com"}]', + hint: 'Array of cookie objects in JSON format', + typeOptions: { + rows: 3, + }, displayOptions: { show: { scrappeyOperations: ['requestBuilder'], - request_type: ['Browser'], + oneStringCookie: [false], }, }, }, + + // ============================================ + // Retry/Attempts + // ============================================ + { displayName: 'Attempts', name: 'attempts', @@ -474,7 +630,7 @@ export const publicFields: INodeProperties[] = [ hint: 'Number of attempts to make the request if it fails', typeOptions: { minValue: 1, - maxValue: 3, + maxValue: 5, }, required: false, displayOptions: { @@ -483,16 +639,43 @@ export const publicFields: INodeProperties[] = [ }, }, }, + { + displayName: 'Timeout (ms)', + name: 'timeout', + type: 'number', + default: 60000, + hint: 'Request timeout in milliseconds', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + }, + }, + }, ]; -export const AdvancedSettingsForBrowser: INodeProperties[] = [ +// ============================================ +// Antibot Bypass Options +// ============================================ + +export const antibotFields: INodeProperties[] = [ { - displayName: 'Antibot', - name: 'antibot', + displayName: 'Antibot Bypass Options', + name: 'antibotNotice', + type: 'notice', + default: '', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Cloudflare Bypass', + name: 'cloudflareBypass', type: 'boolean', default: false, - hint: 'Enable automatic solving of hCaptcha and reCAPTCHA challenges', - required: false, + hint: 'Enable Cloudflare-specific bypass techniques', displayOptions: { show: { scrappeyOperations: ['requestBuilder'], @@ -501,12 +684,11 @@ export const AdvancedSettingsForBrowser: INodeProperties[] = [ }, }, { - displayName: 'Add Random mouse movement', - name: 'addRandomMouseMovement', + displayName: 'Datadome Bypass', + name: 'datadome', type: 'boolean', default: false, - hint: 'Add random mouse movements to simulate human interaction during the session', - required: false, + hint: 'Enable Datadome protection bypass using specialized solver', displayOptions: { show: { scrappeyOperations: ['requestBuilder'], @@ -515,12 +697,64 @@ export const AdvancedSettingsForBrowser: INodeProperties[] = [ }, }, { - displayName: 'Record Video Session', - name: 'recordVideoSession', + displayName: 'Datadome Debug', + name: 'datadomeDebug', type: 'boolean', default: false, - hint: 'Record a video of the browser session for debugging purposes', - required: false, + hint: 'Include debug info in Datadome bypass failures', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + datadome: [true], + }, + }, + }, + { + displayName: 'Kasada Bypass', + name: 'kasadaBypass', + type: 'boolean', + default: false, + hint: 'Enable Kasada protection bypass', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Disable Antibot Detection', + name: 'disableAntiBot', + type: 'boolean', + default: false, + hint: 'Disable automatic antibot detection (use manual settings)', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Detect Incapsula', + name: 'detectIncapsula', + type: 'boolean', + default: false, + hint: 'Enable Incapsula/Imperva detection and solving', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'SPSNSPID Challenge', + name: 'spsnspidChallenge', + type: 'boolean', + default: false, + hint: 'Handle SPSNSPID challenges', displayOptions: { show: { scrappeyOperations: ['requestBuilder'], @@ -528,16 +762,19 @@ export const AdvancedSettingsForBrowser: INodeProperties[] = [ }, }, }, +]; +// ============================================ +// Captcha Solving Options +// ============================================ + +export const captchaFields: INodeProperties[] = [ { - displayName: 'CSS Selector', - name: 'cssSelector', - type: 'string', - default: '', - placeholder: - 'div[class="px-mobile-1 px-tablet-1 pt-mobile-0 pt-desktop-6 pt-tablet-6 pt-widescreen-6 pb-mobile-7 pb-desktop-6 pb-tablet-6 pb-widescreen-6"]', - hint: 'CSS selector to target specific elements on the page', - required: false, + displayName: 'Automatically Solve Captchas', + name: 'antibot', + type: 'boolean', + default: false, + hint: 'Automatically detect and solve captchas (hCaptcha, reCAPTCHA, etc.)', displayOptions: { show: { scrappeyOperations: ['requestBuilder'], @@ -546,13 +783,29 @@ export const AdvancedSettingsForBrowser: INodeProperties[] = [ }, }, { - displayName: 'Href (Optional)', - name: 'href', + displayName: 'Always Load Captcha Types', + name: 'alwaysLoad', + type: 'multiOptions', + default: [], + hint: 'Always load scripts for these captcha types', + options: [ + { name: 'reCAPTCHA', value: 'recaptcha' }, + { name: 'hCaptcha', value: 'hcaptcha' }, + { name: 'Turnstile', value: 'turnstile' }, + ], + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Captcha Answer', + name: 'captchaAnswer', type: 'string', default: '', - placeholder: 'https://example.com', - hint: 'URL to navigate to when the CSS selector is used', - required: false, + hint: 'Manual captcha answer (if you already have the solution)', displayOptions: { show: { scrappeyOperations: ['requestBuilder'], @@ -561,13 +814,12 @@ export const AdvancedSettingsForBrowser: INodeProperties[] = [ }, }, { - displayName: 'Intercept XHR/Fetch Request', - name: 'interceptXhrFetchRequest', + displayName: 'Captcha Success Intercept', + name: 'captchaSuccessIntercept', type: 'string', default: '', - placeholder: 'https://example.com/api/v2/Test', - hint: 'Intercept and return data from a specific XHR/Fetch request rather than the main page. For example, instead of returning google.com content, it will return the data from google.com/result.json in text format.', - required: false, + placeholder: 'https://example.com/success', + hint: 'URL pattern to intercept on captcha success', displayOptions: { show: { scrappeyOperations: ['requestBuilder'], @@ -576,3 +828,760 @@ export const AdvancedSettingsForBrowser: INodeProperties[] = [ }, }, ]; + +// ============================================ +// Browser Configuration Options +// ============================================ + +export const browserConfigFields: INodeProperties[] = [ + { + displayName: 'Browser Type', + name: 'browserType', + type: 'options', + default: 'firefox', + options: [ + { name: 'Firefox', value: 'firefox' }, + { name: 'Chrome', value: 'chrome' }, + { name: 'Safari', value: 'safari' }, + ], + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Browser Min Version', + name: 'browserMinVersion', + type: 'number', + default: 0, + hint: 'Minimum browser version (0 = any)', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Browser Max Version', + name: 'browserMaxVersion', + type: 'number', + default: 0, + hint: 'Maximum browser version (0 = any)', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Custom User Agent', + name: 'userAgent', + type: 'string', + default: '', + placeholder: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)...', + hint: 'Custom user agent string', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Locales', + name: 'locales', + type: 'string', + default: '', + placeholder: 'en-US,en', + hint: 'Browser locales (comma-separated)', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Auto Set Locale', + name: 'setLocale', + type: 'boolean', + default: false, + hint: 'Automatically set locale based on proxy location', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Force Unique Fingerprint', + name: 'forceUniqueFingerprint', + type: 'boolean', + default: false, + hint: 'Create new browser instance with unique fingerprint for each request', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'WebRTC IPv4', + name: 'webrtcIpv4', + type: 'string', + default: '', + placeholder: '1.2.3.4', + hint: 'IPv4 address for WebRTC spoofing', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + forceUniqueFingerprint: [true], + }, + }, + }, + { + displayName: 'WebRTC IPv6', + name: 'webrtcIpv6', + type: 'string', + default: '', + placeholder: '2001:0db8::1', + hint: 'IPv6 address for WebRTC spoofing', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + forceUniqueFingerprint: [true], + }, + }, + }, +]; + +// ============================================ +// Response Options +// ============================================ + +export const responseOptionsFields: INodeProperties[] = [ + { + displayName: 'Response Options', + name: 'responseOptionsNotice', + type: 'notice', + default: '', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + }, + }, + }, + { + displayName: 'Only Status Code', + name: 'onlyStatusCode', + type: 'boolean', + default: false, + hint: 'Return only the HTTP status code', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + }, + }, + }, + { + displayName: 'Include Inner Text', + name: 'innerText', + type: 'boolean', + default: false, + hint: 'Include the inner text of the page', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Include Images', + name: 'includeImages', + type: 'boolean', + default: false, + hint: 'Include image URLs in the response', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Include Links', + name: 'includeLinks', + type: 'boolean', + default: false, + hint: 'Include link URLs in the response', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Regex Pattern', + name: 'regex', + type: 'string', + default: '', + placeholder: 'price: \\$([0-9.]+)', + hint: 'Extract content matching regex pattern(s). Use JSON array for multiple patterns.', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + }, + }, + }, + { + displayName: 'Filter Fields', + name: 'filter', + type: 'multiOptions', + default: [], + hint: 'Return only specified fields in response', + options: [ + { name: 'Response', value: 'response' }, + { name: 'Cookies', value: 'cookies' }, + { name: 'Status Code', value: 'statusCode' }, + { name: 'Headers', value: 'responseHeaders' }, + { name: 'Current URL', value: 'currentUrl' }, + { name: 'User Agent', value: 'userAgent' }, + ], + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + }, + }, + }, + { + displayName: 'Screenshot', + name: 'screenshot', + type: 'boolean', + default: false, + hint: 'Capture a screenshot of the page', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Upload Screenshot', + name: 'screenshotUpload', + type: 'boolean', + default: false, + hint: 'Upload screenshot to storage and return URL', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + screenshot: [true], + }, + }, + }, + { + displayName: 'Screenshot Width', + name: 'screenshotWidth', + type: 'number', + default: 1280, + hint: 'Screenshot width in pixels', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + screenshot: [true], + }, + }, + }, + { + displayName: 'Screenshot Height', + name: 'screenshotHeight', + type: 'number', + default: 1024, + hint: 'Screenshot height in pixels', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + screenshot: [true], + }, + }, + }, + { + displayName: 'Generate PDF', + name: 'pdf', + type: 'boolean', + default: false, + hint: 'Generate a PDF of the page', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Base64 Screenshot', + name: 'base64', + type: 'boolean', + default: false, + hint: 'Return screenshot as base64 string', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + screenshot: [true], + }, + }, + }, + { + displayName: 'Base64 Response', + name: 'base64Response', + type: 'boolean', + default: false, + hint: 'Return HTML response as base64', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + }, + }, + }, + { + displayName: 'Binary Response', + name: 'binary', + type: 'boolean', + default: false, + hint: 'Return binary response data', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + }, + }, + }, + { + displayName: 'List All Redirects', + name: 'listAllRedirects', + type: 'boolean', + default: false, + hint: 'Track and return all redirect URLs', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + }, + }, + }, +]; + +// ============================================ +// Request Interception Options +// ============================================ + +export const interceptionFields: INodeProperties[] = [ + { + displayName: 'Abort On Detection', + name: 'abortOnDetection', + type: 'string', + default: '', + placeholder: 'analytics.com, tracking.js', + hint: 'URL patterns to block (comma-separated)', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Abort Only POST Requests', + name: 'abortOnPostRequest', + type: 'boolean', + default: false, + hint: 'Only abort POST requests matching patterns', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Wait For Abort Detection', + name: 'waitForAbortOnDetection', + type: 'boolean', + default: false, + hint: 'Wait for abort patterns before continuing', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Abort Detection Timeout (ms)', + name: 'waitForAbortOnDetectionTimeout', + type: 'number', + default: 45000, + hint: 'Timeout for waiting for abort patterns', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + waitForAbortOnDetection: [true], + }, + }, + }, + { + displayName: 'Blacklisted Domains', + name: 'blackListedDomains', + type: 'string', + default: '', + placeholder: 'ads.com, tracker.net', + hint: 'Domains to block (comma-separated)', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Never Cache Domains', + name: 'neverCacheDomains', + type: 'string', + default: '', + placeholder: 'api.example.com', + hint: 'Domains to never cache (comma-separated)', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: "Don't Load Main Site", + name: 'dontLoadMainSite', + type: 'boolean', + default: false, + hint: "Don't load main site resources", + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: "Don't Load First Request", + name: 'dontLoadFirstRequest', + type: 'boolean', + default: false, + hint: 'Skip initial page load', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, +]; + +// ============================================ +// Advanced Browser Settings +// ============================================ + +export const AdvancedSettingsForBrowser: INodeProperties[] = [ + { + displayName: 'Add Random mouse movement', + name: 'addRandomMouseMovement', + type: 'boolean', + default: false, + hint: 'Add random mouse movements to simulate human interaction during the session', + required: false, + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Force Mouse Movement', + name: 'forceMouseMovement', + type: 'boolean', + default: false, + hint: 'Force mouse movement simulation', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Record Video Session', + name: 'recordVideoSession', + type: 'boolean', + default: false, + hint: 'Record a video of the browser session for debugging purposes', + required: false, + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'CSS Selector', + name: 'cssSelector', + type: 'string', + default: '', + placeholder: 'div.content', + hint: 'CSS selector to wait for or extract content from', + required: false, + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Href (Optional)', + name: 'href', + type: 'string', + default: '', + placeholder: 'https://example.com', + hint: 'URL to navigate to when the CSS selector is used', + required: false, + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Intercept XHR/Fetch Request', + name: 'interceptXhrFetchRequest', + type: 'string', + default: '', + placeholder: 'https://example.com/api/v2/Test', + hint: 'Intercept and return data from a specific XHR/Fetch request. For multiple patterns, use JSON array.', + required: false, + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Full Page Load', + name: 'fullPageLoad', + type: 'boolean', + default: false, + hint: 'Wait for full page load before continuing', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: "Don't Wait on Page Load", + name: 'dontWaitOnPageLoad', + type: 'boolean', + default: false, + hint: "Don't wait for page load to complete", + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Wait For URL Pattern', + name: 'waitForUrl', + type: 'string', + default: '', + placeholder: 'https://example.com/success', + hint: 'Wait for URL to match this pattern before returning', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Remove Iframes', + name: 'removeIframes', + type: 'boolean', + default: false, + hint: 'Remove all iframes from the page', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Block Cookie Banners', + name: 'blockCookieBanners', + type: 'boolean', + default: false, + hint: 'Automatically block cookie consent banners', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Use Legacy Connection', + name: 'legacy', + type: 'boolean', + default: false, + hint: 'Use legacy browser connection (no WebSocket)', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'Use WebSocket Connection', + name: 'websocketConnection', + type: 'boolean', + default: false, + hint: 'Use WebSocket browser connection', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, + { + displayName: 'LocalStorage Data (JSON)', + name: 'localStorage', + type: 'string', + default: '', + placeholder: '{"key": "value"}', + hint: 'LocalStorage data to set (JSON format)', + typeOptions: { + rows: 3, + }, + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + request_type: ['Browser'], + }, + }, + }, +]; + +// ============================================ +// AI Parsing Options +// ============================================ + +export const aiParsingFields: INodeProperties[] = [ + { + displayName: 'Enable AI Parsing', + name: 'autoparse', + type: 'boolean', + default: false, + hint: 'Use AI to parse and structure HTML content', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + }, + }, + }, + { + displayName: 'AI Model', + name: 'model', + type: 'options', + default: 'deepseek', + options: [ + { name: 'DeepSeek', value: 'deepseek' }, + { name: 'GPT-4', value: 'gpt-4' }, + { name: 'GPT-3.5', value: 'gpt-3.5-turbo' }, + ], + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + autoparse: [true], + }, + }, + }, + { + displayName: 'AI API Key', + name: 'aiApiKey', + type: 'string', + default: '', + typeOptions: { + password: true, + }, + hint: 'API key for the AI parsing service', + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + autoparse: [true], + }, + }, + }, + { + displayName: 'Structure Definition (JSON)', + name: 'structure', + type: 'string', + default: '', + placeholder: '{"title": "Extract page title", "products": [{"name": "Product name", "price": "Price"}]}', + hint: 'Define the structure for AI parsing (JSON format)', + typeOptions: { + rows: 5, + }, + displayOptions: { + show: { + scrappeyOperations: ['requestBuilder'], + autoparse: [true], + }, + }, + }, +]; + +// ============================================ +// Export All Fields +// ============================================ + +export const allFields: INodeProperties[] = [ + ...sessionFields, + ...publicFields, + ...antibotFields, + ...captchaFields, + ...browserConfigFields, + ...responseOptionsFields, + ...interceptionFields, + ...AdvancedSettingsForBrowser, + ...aiParsingFields, + ...browserActionsFields, +]; diff --git a/nodes/Scrappey/methods.ts b/nodes/Scrappey/methods.ts index 437ee75..6d9a7d4 100644 --- a/nodes/Scrappey/methods.ts +++ b/nodes/Scrappey/methods.ts @@ -3,12 +3,20 @@ import { handleBody, HTTPRequest_Extract_Parameters } from './requestBodyBuilder import type { ScrappeyRequestBody } from './types'; import { genericHttpRequest } from './GenericFunctions'; +// ============================================ +// Request Builder Operation +// ============================================ + export const PostRequest = async function (this: IExecuteFunctions, itemIndex: number = 0) { const body = await handleBody(this, itemIndex); const response = await genericHttpRequest.call(this, 'POST', '', { body }); return response; }; +// ============================================ +// Auto-Retry Operations +// ============================================ + export const AutoRetryTypeBrowser = async function (this: IExecuteFunctions, itemIndex: number = 0) { const prev_HTTPRequest = await HTTPRequest_Extract_Parameters(this, itemIndex); @@ -47,7 +55,7 @@ export const AutoRetryTypeBrowser = async function (this: IExecuteFunctions, ite if (customProxyCountryBoolean) { const customProxyCountry = this.getNodeParameter('customProxyCountry', itemIndex, '') as string; if (customProxyCountry && customProxyCountry.trim() !== '') { - body.country = customProxyCountry; + body.proxyCountry = customProxyCountry; } } } else if (whichProxyToUse === 'proxyFromNode' && prev_HTTPRequest.processedProxy) { @@ -81,7 +89,7 @@ export const AutoRetryTypeRequest = async function (this: IExecuteFunctions, ite let body: ScrappeyRequestBody = { cmd: prev_HTTPRequest.cmd, url: prev_HTTPRequest.url as string, - requestType: 'request', // Add this to ensure it's a request type + requestType: 'request', }; if (prev_HTTPRequest.processedHeaders) { @@ -119,4 +127,137 @@ export const AutoRetryTypeRequest = async function (this: IExecuteFunctions, ite const response = await genericHttpRequest.call(this, 'POST', '', { body }); return response; -}; \ No newline at end of file +}; + +// ============================================ +// Session Management Operations +// ============================================ + +export const SessionCreate = async function (this: IExecuteFunctions, itemIndex: number = 0) { + const credentials = await this.getCredentials('scrappeyApi'); + + const sessionId = this.getNodeParameter('sessionId', itemIndex, '') as string; + const sessionTtl = this.getNodeParameter('sessionTtl', itemIndex, 180) as number; + const headless = this.getNodeParameter('headless', itemIndex, 'true') as string; + const geoip = this.getNodeParameter('geoip', itemIndex, 'false') as string; + const whichProxyToUse = this.getNodeParameter('whichProxyToUse', itemIndex, 'proxyFromCredentials') as string; + + const body: any = { + cmd: 'sessions.create', + session_ttl: sessionTtl, + headless: headless, + geoip: geoip, + }; + + // Add optional session ID + if (sessionId && sessionId.trim() !== '') { + body.session = sessionId; + } + + // Handle proxy configuration + if (whichProxyToUse === 'proxyFromCredentials' && credentials?.proxyUrl) { + body.proxy = credentials.proxyUrl as string; + } else if (whichProxyToUse === 'proxyFromScrappey') { + const proxyType = this.getNodeParameter('proxyType', itemIndex, '') as string; + if (proxyType && proxyType.trim() !== '') { + body[proxyType] = true; + } + + const customProxyCountryBoolean = this.getNodeParameter('customProxyCountryBoolean', itemIndex, false) as boolean; + if (customProxyCountryBoolean) { + const customProxyCountry = this.getNodeParameter('customProxyCountry', itemIndex, '') as string; + if (customProxyCountry) { + body.proxyCountry = customProxyCountry; + } + } + + const customProxy = this.getNodeParameter('custom_proxy', itemIndex, false) as boolean; + if (customProxy && credentials?.proxyUrl) { + body.proxy = credentials.proxyUrl as string; + } + } + + const response = await genericHttpRequest.call(this, 'POST', '', { body }); + return response; +}; + +export const SessionDestroy = async function (this: IExecuteFunctions, itemIndex: number = 0) { + const sessionToDestroy = this.getNodeParameter('sessionToDestroy', itemIndex, '') as string; + + const body = { + cmd: 'sessions.destroy', + session: sessionToDestroy, + }; + + const response = await genericHttpRequest.call(this, 'POST', '', { body }); + return response; +}; + +export const SessionList = async function (this: IExecuteFunctions, itemIndex: number = 0) { + const userId = this.getNodeParameter('userId', itemIndex, 0) as number; + + const body: any = { + cmd: 'sessions.list', + }; + + if (userId > 0) { + body.userId = userId; + } + + const response = await genericHttpRequest.call(this, 'POST', '', { body }); + return response; +}; + +export const SessionActive = async function (this: IExecuteFunctions, itemIndex: number = 0) { + const sessionToCheck = this.getNodeParameter('sessionToCheck', itemIndex, '') as string; + + const body = { + cmd: 'sessions.active', + session: sessionToCheck, + }; + + const response = await genericHttpRequest.call(this, 'POST', '', { body }); + return response; +}; + +export const WebSocketCreate = async function (this: IExecuteFunctions, itemIndex: number = 0) { + const credentials = await this.getCredentials('scrappeyApi'); + + const sessionTtl = this.getNodeParameter('sessionTtl', itemIndex, 180) as number; + const headless = this.getNodeParameter('headless', itemIndex, 'true') as string; + const geoip = this.getNodeParameter('geoip', itemIndex, 'false') as string; + const whichProxyToUse = this.getNodeParameter('whichProxyToUse', itemIndex, 'proxyFromCredentials') as string; + + const body: any = { + cmd: 'websocket.create', + session_ttl: sessionTtl, + headless: headless, + geoip: geoip, + }; + + // Handle proxy configuration + if (whichProxyToUse === 'proxyFromCredentials' && credentials?.proxyUrl) { + body.proxy = credentials.proxyUrl as string; + } else if (whichProxyToUse === 'proxyFromScrappey') { + const proxyType = this.getNodeParameter('proxyType', itemIndex, '') as string; + if (proxyType && proxyType.trim() !== '') { + body[proxyType] = true; + } + + const customProxyCountryBoolean = this.getNodeParameter('customProxyCountryBoolean', itemIndex, false) as boolean; + if (customProxyCountryBoolean) { + const customProxyCountry = this.getNodeParameter('customProxyCountry', itemIndex, '') as string; + if (customProxyCountry) { + body.proxyCountry = customProxyCountry; + } + } + + const customProxy = this.getNodeParameter('custom_proxy', itemIndex, false) as boolean; + if (customProxy && credentials?.proxyUrl) { + body.proxy = credentials.proxyUrl as string; + } + } + + const response = await genericHttpRequest.call(this, 'POST', '', { body }); + return response; +}; diff --git a/nodes/Scrappey/operators.ts b/nodes/Scrappey/operators.ts index 2fc0c3e..af7e75b 100644 --- a/nodes/Scrappey/operators.ts +++ b/nodes/Scrappey/operators.ts @@ -1,4 +1,5 @@ import { INodeProperties } from 'n8n-workflow'; + export const scrappeyOperators: INodeProperties[] = [ { displayName: 'Scrappey Operations', @@ -27,6 +28,41 @@ export const scrappeyOperators: INodeProperties[] = [ 'Executes a browser-based request with built-in anti-bot techniques (movement emulation, hCaptcha/Cloudflare bypass, etc.) and automatically retries if protection pages are encountered', action: 'Handle Error HTTPs Node (Browser)', }, + { + name: 'Session β€’ Create', + value: 'sessionCreate', + description: + 'Create a new persistent browser session that can be reused across multiple requests, preserving cookies, local storage, and browser context', + action: 'Create a session', + }, + { + name: 'Session β€’ Destroy', + value: 'sessionDestroy', + description: + 'Destroy an existing browser session and release its resources', + action: 'Destroy a session', + }, + { + name: 'Session β€’ List', + value: 'sessionList', + description: + 'List all active browser sessions for the current user', + action: 'List sessions', + }, + { + name: 'Session β€’ Check Active', + value: 'sessionActive', + description: + 'Check if a specific browser session is currently active', + action: 'Check session status', + }, + { + name: 'WebSocket β€’ Create', + value: 'websocketCreate', + description: + 'Create a WebSocket-based browser connection for advanced use cases with persistent control', + action: 'Create WebSocket connection', + }, ], }, ]; diff --git a/nodes/Scrappey/requestBodyBuilder.ts b/nodes/Scrappey/requestBodyBuilder.ts index c0773f4..25f5c7f 100644 --- a/nodes/Scrappey/requestBodyBuilder.ts +++ b/nodes/Scrappey/requestBodyBuilder.ts @@ -1,4 +1,7 @@ import { IExecuteFunctions, NodeOperationError } from 'n8n-workflow'; +import { buildBrowserActionsArray } from './browserActions'; +import type { ScrappeyRequestBody } from './types'; + type BodyEntry = Record< string, string | number | boolean | Object | BodyEntry[] | Record @@ -123,6 +126,22 @@ const processUrlExpressions = ( return processedUrl; }; +// Helper to safely parse comma-separated strings into arrays +const parseCommaSeparated = (value: string): string[] => { + if (!value || value.trim() === '') return []; + return value.split(',').map(s => s.trim()).filter(s => s !== ''); +}; + +// Helper to safely parse JSON +const safeParseJson = (value: string, defaultValue: any = null): any => { + if (!value || value.trim() === '') return defaultValue; + try { + return JSON.parse(value); + } catch { + return defaultValue; + } +}; + const Request_Type_Choice = (choice: string, eFn: IExecuteFunctions, itemIndex: number) => { switch (choice) { case 'Browser': @@ -147,33 +166,176 @@ const Request_Type_Choice = (choice: string, eFn: IExecuteFunctions, itemIndex: }; const handleAdvancedBrowser = (eFn: IExecuteFunctions, itemIndex: number) => { + // Basic browser options const antibot = eFn.getNodeParameter('antibot', itemIndex, false) as boolean; - const addRandomMouseMovement = eFn.getNodeParameter( - 'addRandomMouseMovement', - itemIndex, - false, - ) as boolean; + const addRandomMouseMovement = eFn.getNodeParameter('addRandomMouseMovement', itemIndex, false) as boolean; + const forceMouseMovement = eFn.getNodeParameter('forceMouseMovement', itemIndex, false) as boolean; const recordVideoSession = eFn.getNodeParameter('recordVideoSession', itemIndex, false) as boolean; const cssSelector = eFn.getNodeParameter('cssSelector', itemIndex, '') as string; const href = eFn.getNodeParameter('href', itemIndex, '') as string; - const interceptXhrFetchRequest = eFn.getNodeParameter( - 'interceptXhrFetchRequest', - itemIndex, - '', - ) as string; + const interceptXhrFetchRequest = eFn.getNodeParameter('interceptXhrFetchRequest', itemIndex, '') as string; + // Antibot options + const cloudflareBypass = eFn.getNodeParameter('cloudflareBypass', itemIndex, false) as boolean; + const datadome = eFn.getNodeParameter('datadome', itemIndex, false) as boolean; + const datadomeDebug = eFn.getNodeParameter('datadomeDebug', itemIndex, false) as boolean; + const kasadaBypass = eFn.getNodeParameter('kasadaBypass', itemIndex, false) as boolean; + const disableAntiBot = eFn.getNodeParameter('disableAntiBot', itemIndex, false) as boolean; + const detectIncapsula = eFn.getNodeParameter('detectIncapsula', itemIndex, false) as boolean; + const spsnspidChallenge = eFn.getNodeParameter('spsnspidChallenge', itemIndex, false) as boolean; + + // Captcha options + const alwaysLoad = eFn.getNodeParameter('alwaysLoad', itemIndex, []) as string[]; + const captchaAnswer = eFn.getNodeParameter('captchaAnswer', itemIndex, '') as string; + const captchaSuccessIntercept = eFn.getNodeParameter('captchaSuccessIntercept', itemIndex, '') as string; + + // Browser configuration + const browserType = eFn.getNodeParameter('browserType', itemIndex, 'firefox') as string; + const browserMinVersion = eFn.getNodeParameter('browserMinVersion', itemIndex, 0) as number; + const browserMaxVersion = eFn.getNodeParameter('browserMaxVersion', itemIndex, 0) as number; + const userAgent = eFn.getNodeParameter('userAgent', itemIndex, '') as string; + const locales = eFn.getNodeParameter('locales', itemIndex, '') as string; + const setLocale = eFn.getNodeParameter('setLocale', itemIndex, false) as boolean; + const forceUniqueFingerprint = eFn.getNodeParameter('forceUniqueFingerprint', itemIndex, false) as boolean; + const webrtcIpv4 = eFn.getNodeParameter('webrtcIpv4', itemIndex, '') as string; + const webrtcIpv6 = eFn.getNodeParameter('webrtcIpv6', itemIndex, '') as string; + + // Response options + const innerText = eFn.getNodeParameter('innerText', itemIndex, false) as boolean; + const includeImages = eFn.getNodeParameter('includeImages', itemIndex, false) as boolean; + const includeLinks = eFn.getNodeParameter('includeLinks', itemIndex, false) as boolean; + const screenshot = eFn.getNodeParameter('screenshot', itemIndex, false) as boolean; + const screenshotUpload = eFn.getNodeParameter('screenshotUpload', itemIndex, false) as boolean; + const screenshotWidth = eFn.getNodeParameter('screenshotWidth', itemIndex, 1280) as number; + const screenshotHeight = eFn.getNodeParameter('screenshotHeight', itemIndex, 1024) as number; + const pdf = eFn.getNodeParameter('pdf', itemIndex, false) as boolean; + const base64 = eFn.getNodeParameter('base64', itemIndex, false) as boolean; + + // Request interception + const abortOnDetection = eFn.getNodeParameter('abortOnDetection', itemIndex, '') as string; + const abortOnPostRequest = eFn.getNodeParameter('abortOnPostRequest', itemIndex, false) as boolean; + const waitForAbortOnDetection = eFn.getNodeParameter('waitForAbortOnDetection', itemIndex, false) as boolean; + const waitForAbortOnDetectionTimeout = eFn.getNodeParameter('waitForAbortOnDetectionTimeout', itemIndex, 45000) as number; + const blackListedDomains = eFn.getNodeParameter('blackListedDomains', itemIndex, '') as string; + const neverCacheDomains = eFn.getNodeParameter('neverCacheDomains', itemIndex, '') as string; + const dontLoadMainSite = eFn.getNodeParameter('dontLoadMainSite', itemIndex, false) as boolean; + const dontLoadFirstRequest = eFn.getNodeParameter('dontLoadFirstRequest', itemIndex, false) as boolean; + + // Advanced browser options + const fullPageLoad = eFn.getNodeParameter('fullPageLoad', itemIndex, false) as boolean; + const dontWaitOnPageLoad = eFn.getNodeParameter('dontWaitOnPageLoad', itemIndex, false) as boolean; + const waitForUrl = eFn.getNodeParameter('waitForUrl', itemIndex, '') as string; + const removeIframes = eFn.getNodeParameter('removeIframes', itemIndex, false) as boolean; + const blockCookieBanners = eFn.getNodeParameter('blockCookieBanners', itemIndex, false) as boolean; + const legacy = eFn.getNodeParameter('legacy', itemIndex, false) as boolean; + const websocketConnection = eFn.getNodeParameter('websocketConnection', itemIndex, false) as boolean; + const localStorage = eFn.getNodeParameter('localStorage', itemIndex, '') as string; + + // Browser Actions + const browserActions = eFn.getNodeParameter('browserActions', itemIndex, {}) as any; + + // Apply basic browser options if (antibot) body.automaticallySolveCaptchas = true; - if (addRandomMouseMovement) body.mouseMovements = true; - + if (forceMouseMovement) body.forceMouseMovement = true; if (recordVideoSession) body.video = true; - if (cssSelector && cssSelector.trim() !== '') body.cssSelector = cssSelector; - if (href && href.trim() !== '') body.customAttribute = href; - if (interceptXhrFetchRequest && interceptXhrFetchRequest.trim() !== '') - body.interceptFetchRequest = interceptXhrFetchRequest; + // Handle intercept fetch request (can be string or array) + if (interceptXhrFetchRequest && interceptXhrFetchRequest.trim() !== '') { + // Check if it's a JSON array + const parsed = safeParseJson(interceptXhrFetchRequest); + if (Array.isArray(parsed)) { + body.interceptFetchRequest = parsed; + } else { + body.interceptFetchRequest = interceptXhrFetchRequest; + } + } + + // Apply antibot options + if (cloudflareBypass) body.cloudflareBypass = true; + if (datadome) body.datadomeBypass = true; + if (datadomeDebug) body.datadomeDebug = true; + if (kasadaBypass) body.kasadaBypass = true; + if (disableAntiBot) body.disableAntiBot = true; + if (detectIncapsula) body.detectIncapsula = true; + if (spsnspidChallenge) body.spsnspidChallenge = true; + + // Apply captcha options + if (alwaysLoad && alwaysLoad.length > 0) body.alwaysLoad = alwaysLoad; + if (captchaAnswer && captchaAnswer.trim() !== '') body.captchaAnswer = captchaAnswer; + if (captchaSuccessIntercept && captchaSuccessIntercept.trim() !== '') body.captchaSuccessIntercept = captchaSuccessIntercept; + + // Apply browser configuration + if (browserType || browserMinVersion > 0 || browserMaxVersion > 0) { + const browserSpec: any = { name: browserType || 'firefox' }; + if (browserMinVersion > 0) browserSpec.minVersion = browserMinVersion; + if (browserMaxVersion > 0) browserSpec.maxVersion = browserMaxVersion; + body.browser = [browserSpec]; + } + if (userAgent && userAgent.trim() !== '') body.userAgent = userAgent; + if (locales && locales.trim() !== '') { + body.locales = parseCommaSeparated(locales); + } + if (setLocale) body.setLocale = true; + if (forceUniqueFingerprint) body.forceUniqueFingerprint = true; + if (webrtcIpv4 && webrtcIpv4.trim() !== '') body.webrtcIpv4 = webrtcIpv4; + if (webrtcIpv6 && webrtcIpv6.trim() !== '') body.webrtcIpv6 = webrtcIpv6; + + // Apply response options + if (innerText) body.innerText = true; + if (includeImages) body.includeImages = true; + if (includeLinks) body.includeLinks = true; + if (screenshot) { + body.screenshot = true; + if (screenshotUpload) body.screenshotUpload = true; + if (screenshotWidth !== 1280) body.screenshotWidth = screenshotWidth; + if (screenshotHeight !== 1024) body.screenshotHeight = screenshotHeight; + if (base64) body.base64 = true; + } + if (pdf) body.pdf = true; + + // Apply request interception options + if (abortOnDetection && abortOnDetection.trim() !== '') { + body.abortOnDetection = parseCommaSeparated(abortOnDetection); + } + if (abortOnPostRequest) body.abortOnPostRequest = true; + if (waitForAbortOnDetection) { + body.waitForAbortOnDetection = true; + if (waitForAbortOnDetectionTimeout !== 45000) { + body.waitForAbortOnDetectionTimeout = waitForAbortOnDetectionTimeout; + } + } + if (blackListedDomains && blackListedDomains.trim() !== '') { + body.blackListedDomains = parseCommaSeparated(blackListedDomains); + } + if (neverCacheDomains && neverCacheDomains.trim() !== '') { + body.neverCacheDomains = parseCommaSeparated(neverCacheDomains); + } + if (dontLoadMainSite) body.dontLoadMainSite = true; + if (dontLoadFirstRequest) body.dontLoadFirstRequest = true; + + // Apply advanced browser options + if (fullPageLoad) body.fullPageLoad = true; + if (dontWaitOnPageLoad) body.dontWaitOnPageLoad = true; + if (waitForUrl && waitForUrl.trim() !== '') body.waitForUrl = waitForUrl; + if (removeIframes) body.removeIframes = true; + if (blockCookieBanners) body.blockCookieBanners = true; + if (legacy) body.legacy = true; + if (websocketConnection) body.websocket = true; + if (localStorage && localStorage.trim() !== '') { + const parsedLocalStorage = safeParseJson(localStorage); + if (parsedLocalStorage) { + body.localStorage = parsedLocalStorage; + } + } + + // Build browser actions array + const actionsArray = buildBrowserActionsArray(browserActions); + if (actionsArray.length > 0) { + body.browserActions = actionsArray; + } }; export const handleBody = async (eFn: IExecuteFunctions, itemIndex: number = 0) => { @@ -196,18 +358,34 @@ export const handleBody = async (eFn: IExecuteFunctions, itemIndex: number = 0) const jsonHeaders = eFn.getNodeParameter('jsonHeaders', itemIndex, '') as string; const customCookies = eFn.getNodeParameter('customCookies', itemIndex, {}) as Record; const customProxyCountry = eFn.getNodeParameter('customProxyCountry', itemIndex, '') as string; - const customProxyCountryBoolean = eFn.getNodeParameter( - 'customProxyCountryBoolean', - itemIndex, - false, - ) as boolean; + const customProxyCountryBoolean = eFn.getNodeParameter('customProxyCountryBoolean', itemIndex, false) as boolean; const customProxy = eFn.getNodeParameter('custom_proxy', itemIndex, false) as boolean; const whichProxyToUse = eFn.getNodeParameter('whichProxyToUse', itemIndex, 'proxyFromCredentials') as string; - const attempts = eFn.getNodeParameter('attempts', itemIndex, 3) as number; - const datadome = eFn.getNodeParameter('datadome', itemIndex, false) as boolean; + const attempts = eFn.getNodeParameter('attempts', itemIndex, 1) as number; const oneStringCookie = eFn.getNodeParameter('oneStringCookie', itemIndex, false) as boolean; const cookie = eFn.getNodeParameter('cookie', itemIndex, '') as string; + // New fields + const referer = eFn.getNodeParameter('referer', itemIndex, '') as string; + const closeAfterUse = eFn.getNodeParameter('closeAfterUse', itemIndex, false) as boolean; + const dontChangeProxy = eFn.getNodeParameter('dontChangeProxy', itemIndex, false) as boolean; + const cookiejar = eFn.getNodeParameter('cookiejar', itemIndex, '') as string; + const timeout = eFn.getNodeParameter('timeout', itemIndex, 60000) as number; + + // Response options + const onlyStatusCode = eFn.getNodeParameter('onlyStatusCode', itemIndex, false) as boolean; + const regex = eFn.getNodeParameter('regex', itemIndex, '') as string; + const filter = eFn.getNodeParameter('filter', itemIndex, []) as string[]; + const base64Response = eFn.getNodeParameter('base64Response', itemIndex, false) as boolean; + const binary = eFn.getNodeParameter('binary', itemIndex, false) as boolean; + const listAllRedirects = eFn.getNodeParameter('listAllRedirects', itemIndex, false) as boolean; + + // AI parsing options + const autoparse = eFn.getNodeParameter('autoparse', itemIndex, false) as boolean; + const model = eFn.getNodeParameter('model', itemIndex, '') as string; + const aiApiKey = eFn.getNodeParameter('aiApiKey', itemIndex, '') as string; + const structure = eFn.getNodeParameter('structure', itemIndex, '') as string; + if (url && url.trim() !== '') { // Process URL expressions - starts with '=' or contains {{ $json.key }} url = processUrlExpressions(url, eFn, itemIndex); @@ -220,10 +398,14 @@ export const handleBody = async (eFn: IExecuteFunctions, itemIndex: number = 0) } if (httpMethod && httpMethod.trim() !== '') body.cmd = httpMethod; + if (referer && referer.trim() !== '') body.referer = referer; - if (proxyType && proxyType.trim() !== '') body[proxyType] = true; + if (proxyType && proxyType.trim() !== '' && whichProxyToUse === 'proxyFromScrappey') { + body[proxyType] = true; + } if (userSession && userSession.trim() !== '') body.session = userSession; + if (closeAfterUse) body.closeAfterUse = true; if (httpMethod !== 'request.get') { if (bodyOrParams === 'body_used') { @@ -260,7 +442,7 @@ export const handleBody = async (eFn: IExecuteFunctions, itemIndex: number = 0) } catch (error) { throw new NodeOperationError(eFn.getNode(), 'Invalid JSON headers format', { description: `The provided JSON headers are not valid: ${error instanceof Error ? error.message : 'Unknown error'}`, - itemIndex // Include item index in error + itemIndex, }); } } @@ -293,7 +475,16 @@ export const handleBody = async (eFn: IExecuteFunctions, itemIndex: number = 0) } } + // Handle cookie jar + if (cookiejar && cookiejar.trim() !== '') { + const parsedCookiejar = safeParseJson(cookiejar); + if (parsedCookiejar && Array.isArray(parsedCookiejar)) { + body.cookiejar = parsedCookiejar; + } + } + if (customProxyCountryBoolean) body.proxyCountry = customProxyCountry; + if (dontChangeProxy) body.dontChangeProxy = true; // Handle proxy configuration if (whichProxyToUse === 'proxyFromCredentials' && credentials?.proxyUrl) { @@ -302,14 +493,51 @@ export const handleBody = async (eFn: IExecuteFunctions, itemIndex: number = 0) if (customProxy && credentials?.proxyUrl) { body.proxy = credentials.proxyUrl as string; } + } else if (whichProxyToUse === 'noProxy') { + body.noProxy = true; } - if (datadome && request_type === 'Browser') { - body.datadomeBypass = true; + // Handle attempts/retries + if (attempts > 1) { + body.attempts = attempts; + body.retries = attempts; } - body.attempts = attempts; + // Handle timeout + if (timeout !== 60000) { + body.timeout = timeout; + } + // Handle response options + if (onlyStatusCode) body.onlyStatusCode = true; + if (regex && regex.trim() !== '') { + // Check if it's a JSON array + const parsedRegex = safeParseJson(regex); + if (Array.isArray(parsedRegex)) { + body.regex = parsedRegex; + } else { + body.regex = regex; + } + } + if (filter && filter.length > 0) body.filter = filter; + if (base64Response) body.base64Response = true; + if (binary) body.binary = true; + if (listAllRedirects) body.listAllRedirects = true; + + // Handle AI parsing + if (autoparse) { + body.autoparse = true; + if (model && model.trim() !== '') body.model = model; + if (aiApiKey && aiApiKey.trim() !== '') body.api_key = aiApiKey; + if (structure && structure.trim() !== '') { + const parsedStructure = safeParseJson(structure); + if (parsedStructure) { + body.structure = parsedStructure; + } + } + } + + // Handle whitelisted domains from credentials if (credentials?.whitelistedDomains) { // Ensure whitelistedDomains is passed as an array const domains = Array.isArray(credentials.whitelistedDomains) @@ -321,7 +549,7 @@ export const handleBody = async (eFn: IExecuteFunctions, itemIndex: number = 0) body.whitelistedDomains = domains; } - return body; + return body as ScrappeyRequestBody; }; export const HTTPRequest_Extract_Parameters = async (eFn: IExecuteFunctions, itemIndex: number = 0) => { @@ -458,4 +686,4 @@ export const HTTPRequest_Extract_Parameters = async (eFn: IExecuteFunctions, ite headerParameters, bodyParameters, }; -}; \ No newline at end of file +}; diff --git a/nodes/Scrappey/types.ts b/nodes/Scrappey/types.ts index 2fd75cf..6bda813 100644 --- a/nodes/Scrappey/types.ts +++ b/nodes/Scrappey/types.ts @@ -1,3 +1,489 @@ +// ============================================ +// Scrappey API Types - Complete Type Definitions +// Based on API Documentation +// ============================================ + +// ============================================ +// Browser Action Types +// ============================================ + +export interface BrowserActionBase { + type: string; + when?: 'beforeload' | 'afterload'; + ignoreErrors?: boolean; + timeout?: number; +} + +export interface ClickAction extends BrowserActionBase { + type: 'click'; + cssSelector: string; + wait?: number; + waitForSelector?: string; + direct?: boolean; +} + +export interface TypeAction extends BrowserActionBase { + type: 'type'; + cssSelector: string; + text: string; + wait?: number; + direct?: boolean; +} + +export interface GotoAction extends BrowserActionBase { + type: 'goto'; + url: string; + wait?: number; +} + +export interface WaitAction extends BrowserActionBase { + type: 'wait'; + wait: number; +} + +export interface WaitForSelectorAction extends BrowserActionBase { + type: 'wait_for_selector'; + cssSelector: string; +} + +export interface WaitForFunctionAction extends BrowserActionBase { + type: 'wait_for_function'; + code: string; +} + +export interface WaitForLoadStateAction extends BrowserActionBase { + type: 'wait_for_load_state'; + waitForLoadState: 'domcontentloaded' | 'networkidle' | 'load'; +} + +export interface WaitForCookieAction extends BrowserActionBase { + type: 'wait_for_cookie'; + cookieName: string; + cookieValue?: string; + cookieDomain?: string; + pollIntervalMs?: number; +} + +export interface ExecuteJsAction extends BrowserActionBase { + type: 'execute_js'; + code: string; + dontReturnValue?: boolean; +} + +export interface ScrollAction extends BrowserActionBase { + type: 'scroll'; + cssSelector?: string; + repeat?: number; + delayMs?: number; +} + +export interface HoverAction extends BrowserActionBase { + type: 'hover'; + cssSelector: string; +} + +export interface KeyboardAction extends BrowserActionBase { + type: 'keyboard'; + value: 'tab' | 'enter' | 'space' | 'arrowdown' | 'arrowup' | 'arrowleft' | 'arrowright' | 'backspace' | 'clear'; + cssSelector?: string; + wait?: number; + waitForSelector?: string; +} + +export interface DropdownAction extends BrowserActionBase { + type: 'dropdown'; + cssSelector: string; + index?: number; + value?: string; + wait?: number; + waitForSelector?: string; +} + +export interface SwitchIframeAction extends BrowserActionBase { + type: 'switch_iframe'; + cssSelector: string; +} + +export interface SetViewportAction extends BrowserActionBase { + type: 'set_viewport'; + width?: number; + height?: number; + wait?: number; +} + +export interface IfAction extends BrowserActionBase { + type: 'if'; + condition: string; + then: BrowserAction[]; + or?: BrowserAction[]; +} + +export interface WhileAction extends BrowserActionBase { + type: 'while'; + condition: string; + then: BrowserAction[]; + maxAttempts?: number; +} + +export interface CaptchaData { + sitekey?: string; + action?: string; + pageAction?: string; + invisible?: boolean; + base64Image?: string; + cssSelector?: string; + reset?: boolean; + fast?: boolean; +} + +export type CaptchaType = + | 'turnstile' + | 'recaptcha' + | 'recaptchav2' + | 'recaptchav3' + | 'hcaptcha' + | 'hcaptcha_inside' + | 'hcaptcha_enterprise_inside' + | 'funcaptcha' + | 'perimeterx' + | 'mtcaptcha' + | 'mtcaptchaisolated' + | 'v4guard' + | 'custom' + | 'fingerprintjscom' + | 'fingerprintjs_curseforge'; + +export interface SolveCaptchaAction extends BrowserActionBase { + type: 'solve_captcha'; + captcha: CaptchaType; + captchaData?: CaptchaData; + websiteUrl?: string; + websiteKey?: string; + cssSelector?: string; + inputSelector?: string; + clickSelector?: string; + iframeSelector?: string; + coreName?: string; +} + +export interface DiscordLoginAction extends BrowserActionBase { + type: 'discord_login'; + token: string; + direct?: boolean; + wait?: number; +} + +export interface RemoveIframesAction extends BrowserActionBase { + type: 'remove_iframes'; +} + +export type BrowserAction = + | ClickAction + | TypeAction + | GotoAction + | WaitAction + | WaitForSelectorAction + | WaitForFunctionAction + | WaitForLoadStateAction + | WaitForCookieAction + | ExecuteJsAction + | ScrollAction + | HoverAction + | KeyboardAction + | DropdownAction + | SwitchIframeAction + | SetViewportAction + | IfAction + | WhileAction + | SolveCaptchaAction + | DiscordLoginAction + | RemoveIframesAction; + +// ============================================ +// Browser Configuration Types +// ============================================ + +export interface BrowserSpec { + name: 'firefox' | 'chrome' | 'safari'; + minVersion?: number; + maxVersion?: number; +} + +export interface DeviceSpec { + [key: string]: unknown; +} + +export interface OperatingSystemSpec { + [key: string]: unknown; +} + +// ============================================ +// Cookie Types +// ============================================ + +export interface CookieObject { + name: string; + value: string; + domain?: string; + path?: string; + expires?: number; + httpOnly?: boolean; + secure?: boolean; + sameSite?: 'Strict' | 'Lax' | 'None'; +} + +// ============================================ +// Main Request Body Interface +// ============================================ + +export interface ScrappeyRequestBody { + // Command and URL (Required) + cmd: string; + url?: string; + + // Request Settings + referer?: string; + postData?: string | Record; + requestType?: 'request'; + onlyStatusCode?: boolean; + method?: string; + + // Browser Configuration + browser?: BrowserSpec[]; + userAgent?: string; + device?: DeviceSpec; + operatingSystem?: OperatingSystemSpec; + locales?: string[]; + setLocale?: boolean; + headful?: boolean; + devTools?: boolean; + showBrowser?: boolean; + + // Proxy Settings + proxy?: string; + proxyCountry?: string; + noProxy?: boolean; + premiumProxy?: boolean; + mobileProxy?: boolean; + dontChangeProxy?: boolean; + + // Session Management + session?: string; + closeAfterUse?: boolean; + userId?: number; + MAX_SESSIONS_OPEN?: number; + session_ttl?: number; + + // Antibot Bypass Options + cloudflareBypass?: boolean; + datadomeBypass?: boolean; + datadomeDebug?: boolean; + kasadaBypass?: boolean; + disableAntiBot?: boolean; + detectIncapsula?: boolean; + spsnspidChallenge?: boolean; + + // Captcha Solving + automaticallySolveCaptchas?: boolean; + alwaysLoad?: string[]; + captchaAnswer?: string; + captchaSuccessIntercept?: string; + + // Browser Actions + browserActions?: BrowserAction[]; + waitForSpecificActionOnSite?: boolean; + mouseMovements?: boolean; + forceMouseMovement?: boolean; + + // Response Data Options + cssSelector?: string; + innerText?: boolean; + includeImages?: boolean; + includeLinks?: boolean; + regex?: string | string[]; + screenshot?: boolean; + screenshotUpload?: boolean; + screenshotWidth?: number; + screenshotHeight?: number; + video?: boolean; + base64?: boolean; + base64Response?: boolean; + binary?: boolean; + pdf?: boolean; + filter?: string[]; + + // Request Interception + interceptFetchRequest?: string | string[]; + abortOnDetection?: string[]; + abortOnPostRequest?: boolean; + waitForAbortOnDetection?: boolean; + waitForAbortOnDetectionTimeout?: number; + whitelistedDomains?: string[]; + blackListedDomains?: string[]; + neverCacheDomains?: string[]; + dontLoadMainSite?: boolean; + dontLoadFirstRequest?: boolean; + + // Advanced Options + customHeaders?: Record; + setCustomHeaders?: boolean; + cookies?: string; + cookiejar?: CookieObject[]; + localStorage?: Record; + javascriptReturn?: unknown[]; + autoparse?: boolean; + structure?: Record; + model?: string; + api_key?: string; + fullPageLoad?: boolean; + dontWaitOnPageLoad?: boolean; + waitForUrl?: string; + listAllRedirects?: boolean; + removeIframes?: boolean; + blockCookieBanners?: boolean; + legacy?: boolean; + websocket?: boolean; + forceUniqueFingerprint?: boolean; + webrtcIpv4?: string; + webrtcIpv6?: string; + retries?: number; + attempts?: number; + timeout?: number; + + // For PatchedChrome + noDriver?: boolean; + + // Custom attribute (href) + customAttribute?: string; + + // Dynamic properties for proxy types + [key: string]: string | boolean | number | unknown[] | Record | undefined; +} + +// ============================================ +// Session Request Types +// ============================================ + +export interface SessionCreateRequest { + cmd: 'sessions.create'; + session?: string; + proxy?: string; + userId?: number; + session_ttl?: number; + headless?: string; + geoip?: string; +} + +export interface SessionDestroyRequest { + cmd: 'sessions.destroy'; + session: string; +} + +export interface SessionListRequest { + cmd: 'sessions.list'; + userId?: number; +} + +export interface SessionActiveRequest { + cmd: 'sessions.active'; + session: string; +} + +export interface WebSocketCreateRequest { + cmd: 'websocket.create'; + userId?: number; + proxy?: string; + session_ttl?: number; + headless?: string; + geoip?: string; +} + +// ============================================ +// Response Types +// ============================================ + +export interface DetectedAntibotProviders { + providers: string[]; + confidence: Record; + primaryProvider?: string; +} + +export interface InterceptedRequest { + url: string; + method: string; + headers: Record; + body?: string; + response?: unknown; +} + +export interface SolutionResponse { + verified: boolean; + type?: 'browser' | 'request'; + response?: string; + statusCode?: number; + currentUrl?: string; + userAgent?: string; + cookies?: CookieObject[]; + cookieString?: string; + responseHeaders?: Record; + requestHeaders?: Record; + requestBody?: string; + method?: string; + ipInfo?: Record; + innerText?: string; + localStorageData?: Record; + screenshot?: string; + screenshotUrl?: string; + videoUrl?: string; + interceptFetchRequestResponse?: InterceptedRequest | InterceptedRequest[]; + javascriptReturn?: unknown[]; + base64Response?: string; + listAllRedirectsResponse?: string[]; + additionalCost?: number; + wsEndpoint?: string; + ws?: string; + detectedAntibotProviders?: DetectedAntibotProviders; + captchaSolveResult?: { + type: string; + status: string; + timeTaken: number; + }; + abortOnDetectionResponse?: Array<{ + url: string; + headers: Record; + payload?: string; + }>; +} + +export interface ScrappeyResponse { + solution: SolutionResponse; + timeElapsed: number; + data: 'success' | 'error'; + session?: string; + error?: string; + info?: string; + fingerprint?: Record; + context?: Record; +} + +export interface SessionListResponse { + sessions: Array<{ + session: string; + lastAccessed: number; + }>; + open: number; + limit: number; + timeElapsed: number; +} + +export interface SessionActiveResponse { + active: boolean; +} + +// ============================================ +// Legacy Types (for backward compatibility) +// ============================================ + export interface HTTPRequest_Body { cmd: string; method: string; @@ -9,19 +495,3 @@ export interface HTTPRequest_Body { customHeaders: { [key: string]: string }; proxy: string; } - -export interface ScrappeyRequestBody { - cmd: string; - url: string; - datadomeBypass?: boolean; - retries?: number; - mouseMovements?: boolean; - automaticallySolveCaptchas?: boolean; - customHeaders?: any; - proxy?: string; - bodyParameters?: any; - postData?: string; - proxyCountry?: string; - // For proxy types and other dynamic properties - [key: string]: string | boolean | number | any | undefined; -}