From 47939edde5a0c62b782c10fa4964339098c05b8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Wed, 4 Feb 2026 17:05:45 +0100 Subject: [PATCH 1/5] feat: Zod-based Configuration extending Crawlee's new approach Refactors the Configuration class to use Zod-based field definitions, extending Crawlee's new Configuration class cleanly without monkey patching. Key changes: - Uses `crawleeConfigFields` spread with Apify-specific overrides and additions - Each field defines schema and env var aliases in one place - Supports multiple env var aliases per field (e.g., ACTOR_ID, APIFY_ACTOR_ID) - Removes all monkey patching of CoreConfiguration - Adds zod as direct dependency Example field definition: ```ts actorId: field(z.string().optional(), { env: ['ACTOR_ID', 'APIFY_ACTOR_ID'], }), ``` Requires: apify/crawlee#3387 Co-Authored-By: Claude Opus 4.5 --- packages/apify/package.json | 3 +- packages/apify/src/configuration.ts | 409 ++++++++++++++++------------ 2 files changed, 235 insertions(+), 177 deletions(-) diff --git a/packages/apify/package.json b/packages/apify/package.json index f3ddcf0be1..668f802f3f 100644 --- a/packages/apify/package.json +++ b/packages/apify/package.json @@ -62,6 +62,7 @@ "ow": "^2.0.0", "semver": "^7.7.2", "tslib": "^2.8.1", - "ws": "^8.18.2" + "ws": "^8.18.2", + "zod": "^3.24.0" } } diff --git a/packages/apify/src/configuration.ts b/packages/apify/src/configuration.ts index b8dfacd42c..0156872571 100644 --- a/packages/apify/src/configuration.ts +++ b/packages/apify/src/configuration.ts @@ -1,45 +1,223 @@ -import type { ConfigurationOptions as CoreConfigurationOptions } from '@crawlee/core'; -import { Configuration as CoreConfiguration } from '@crawlee/core'; +import { + coerceBoolean, + Configuration as CrawleeConfiguration, + crawleeConfigFields, + field, + type FieldDefinitions, + type InferInputOptions, + type InferOutputOptions, +} from '@crawlee/core'; +import { z } from 'zod'; import type { META_ORIGINS } from '@apify/consts'; -import { - ACTOR_ENV_VARS, - APIFY_ENV_VARS, - LOCAL_ACTOR_ENV_VARS, - LOCAL_APIFY_ENV_VARS, -} from '@apify/consts'; - -export interface ConfigurationOptions extends CoreConfigurationOptions { - metamorphAfterSleepMillis?: number; - actorEventsWsUrl?: string; - token?: string; - actorId?: string; - actorRunId?: string; - actorTaskId?: string; - apiBaseUrl?: string; - // apiBaseUrl is the internal API URL, accessible only within the platform(private network), - // while apiPublicBaseUrl is the public API URL, available externally(through internet). - apiPublicBaseUrl?: string; - containerPort?: number; - containerUrl?: string; - proxyHostname?: string; - proxyPassword?: string; - proxyPort?: number; - proxyStatusUrl?: string; - /** - * @deprecated use `containerPort` instead - */ - standbyPort?: number; - standbyUrl?: string; - isAtHome?: boolean; - userId?: string; - inputSecretsPrivateKeyPassphrase?: string; - inputSecretsPrivateKeyFile?: string; - maxTotalChargeUsd?: number; - metaOrigin?: (typeof META_ORIGINS)[keyof typeof META_ORIGINS]; - testPayPerEvent?: boolean; - useChargingLogDataset?: boolean; -} + +// ============================================================================ +// Apify Configuration Field Definitions +// ============================================================================ + +/** + * Field definitions for Apify SDK Configuration. + * Extends Crawlee's configuration with Apify-specific fields. + */ +export const apifyConfigFields = { + ...crawleeConfigFields, + + // Override storage IDs to also check ACTOR_* and APIFY_* env vars + defaultDatasetId: field(z.string().default('default'), { + env: [ + 'ACTOR_DEFAULT_DATASET_ID', + 'APIFY_DEFAULT_DATASET_ID', + 'CRAWLEE_DEFAULT_DATASET_ID', + ], + }), + defaultKeyValueStoreId: field(z.string().default('default'), { + env: [ + 'ACTOR_DEFAULT_KEY_VALUE_STORE_ID', + 'APIFY_DEFAULT_KEY_VALUE_STORE_ID', + 'CRAWLEE_DEFAULT_KEY_VALUE_STORE_ID', + ], + }), + defaultRequestQueueId: field(z.string().default('default'), { + env: [ + 'ACTOR_DEFAULT_REQUEST_QUEUE_ID', + 'APIFY_DEFAULT_REQUEST_QUEUE_ID', + 'CRAWLEE_DEFAULT_REQUEST_QUEUE_ID', + ], + }), + + // Override inputKey to also check ACTOR_INPUT_KEY + inputKey: field(z.string().default('INPUT'), { + env: ['ACTOR_INPUT_KEY', 'APIFY_INPUT_KEY', 'CRAWLEE_INPUT_KEY'], + }), + + // Override memoryMbytes to also check ACTOR_MEMORY_MBYTES + memoryMbytes: field(z.coerce.number().optional(), { + env: [ + 'ACTOR_MEMORY_MBYTES', + 'APIFY_MEMORY_MBYTES', + 'CRAWLEE_MEMORY_MBYTES', + ], + }), + + // Override persistStateIntervalMillis + persistStateIntervalMillis: field(z.coerce.number().default(60_000), { + env: [ + 'APIFY_PERSIST_STATE_INTERVAL_MILLIS', + 'APIFY_TEST_PERSIST_INTERVAL_MILLIS', + 'CRAWLEE_PERSIST_STATE_INTERVAL_MILLIS', + ], + }), + + // Override browser-related fields to also check APIFY_* env vars + headless: field(coerceBoolean.default(true), { + env: ['APIFY_HEADLESS', 'CRAWLEE_HEADLESS'], + }), + xvfb: field(coerceBoolean.default(false), { + env: ['APIFY_XVFB', 'CRAWLEE_XVFB'], + }), + chromeExecutablePath: field(z.string().optional(), { + env: ['APIFY_CHROME_EXECUTABLE_PATH', 'CRAWLEE_CHROME_EXECUTABLE_PATH'], + }), + defaultBrowserPath: field(z.string().optional(), { + env: ['APIFY_DEFAULT_BROWSER_PATH', 'CRAWLEE_DEFAULT_BROWSER_PATH'], + }), + disableBrowserSandbox: field(coerceBoolean.optional(), { + env: [ + 'APIFY_DISABLE_BROWSER_SANDBOX', + 'CRAWLEE_DISABLE_BROWSER_SANDBOX', + ], + }), + + // Override other crawlee fields with APIFY_* aliases + availableMemoryRatio: field(z.coerce.number().default(0.25), { + env: ['APIFY_AVAILABLE_MEMORY_RATIO', 'CRAWLEE_AVAILABLE_MEMORY_RATIO'], + }), + purgeOnStart: field(coerceBoolean.default(true), { + env: ['APIFY_PURGE_ON_START', 'CRAWLEE_PURGE_ON_START'], + }), + + // ========================================================================= + // Apify-specific fields + // ========================================================================= + + // Authentication + token: field(z.string().optional(), { + env: 'APIFY_TOKEN', + }), + + // Actor identification + actorId: field(z.string().optional(), { + env: ['ACTOR_ID', 'APIFY_ACTOR_ID'], + }), + actorRunId: field(z.string().optional(), { + env: ['ACTOR_RUN_ID', 'APIFY_ACTOR_RUN_ID'], + }), + actorTaskId: field(z.string().optional(), { + env: ['ACTOR_TASK_ID', 'APIFY_ACTOR_TASK_ID'], + }), + + // API URLs + apiBaseUrl: field(z.string().default('https://api.apify.com'), { + env: 'APIFY_API_BASE_URL', + }), + apiPublicBaseUrl: field(z.string().default('https://api.apify.com'), { + env: 'APIFY_API_PUBLIC_BASE_URL', + }), + + // Actor events + actorEventsWsUrl: field(z.string().optional(), { + env: ['ACTOR_EVENTS_WEBSOCKET_URL', 'APIFY_ACTOR_EVENTS_WS_URL'], + }), + + // Container/web server + containerPort: field(z.coerce.number().default(4321), { + env: ['ACTOR_WEB_SERVER_PORT', 'APIFY_CONTAINER_PORT'], + }), + containerUrl: field(z.string().default('http://localhost:4321'), { + env: ['ACTOR_WEB_SERVER_URL', 'APIFY_CONTAINER_URL'], + }), + + // Standby (deprecated in favor of containerPort/containerUrl) + /** @deprecated use `containerPort` instead */ + standbyPort: field(z.coerce.number().default(4321), { + env: 'ACTOR_STANDBY_PORT', + }), + standbyUrl: field(z.string().optional(), { + env: 'ACTOR_STANDBY_URL', + }), + + // Proxy + proxyHostname: field(z.string().default('proxy.apify.com'), { + env: 'APIFY_PROXY_HOSTNAME', + }), + proxyPassword: field(z.string().optional(), { + env: 'APIFY_PROXY_PASSWORD', + }), + proxyPort: field(z.coerce.number().default(8000), { + env: 'APIFY_PROXY_PORT', + }), + proxyStatusUrl: field(z.string().default('http://proxy.apify.com'), { + env: 'APIFY_PROXY_STATUS_URL', + }), + + // Platform detection + isAtHome: field(coerceBoolean.default(false), { + env: 'APIFY_IS_AT_HOME', + }), + + // User + userId: field(z.string().optional(), { + env: 'APIFY_USER_ID', + }), + + // Input secrets + inputSecretsPrivateKeyFile: field(z.string().optional(), { + env: 'APIFY_INPUT_SECRETS_PRIVATE_KEY_FILE', + }), + inputSecretsPrivateKeyPassphrase: field(z.string().optional(), { + env: 'APIFY_INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE', + }), + + // Metamorph + metamorphAfterSleepMillis: field(z.coerce.number().default(300_000), { + env: 'APIFY_METAMORPH_AFTER_SLEEP_MILLIS', + }), + + // Pay per event + maxTotalChargeUsd: field(z.coerce.number().optional(), { + env: 'ACTOR_MAX_TOTAL_CHARGE_USD', + }), + testPayPerEvent: field(coerceBoolean.default(false), { + env: 'ACTOR_TEST_PAY_PER_EVENT', + }), + useChargingLogDataset: field(coerceBoolean.default(false), { + env: 'ACTOR_USE_CHARGING_LOG_DATASET', + }), + + // Meta origin + metaOrigin: field( + z.custom<(typeof META_ORIGINS)[keyof typeof META_ORIGINS]>().optional(), + { + env: 'APIFY_META_ORIGIN', + }, + ), +} as const; + +export type ApifyConfigFields = typeof apifyConfigFields; + +// ============================================================================ +// Configuration Options Types +// ============================================================================ + +/** Input options for Configuration constructor (all fields optional) */ +export type ConfigurationOptions = InferInputOptions; + +/** Output options from Configuration.get() (respects defaults) */ +export type ConfigurationValues = InferOutputOptions; + +// ============================================================================ +// Configuration Class +// ============================================================================ /** * `Configuration` is a value object holding the SDK configuration. We can use it in two ways: @@ -112,138 +290,28 @@ export interface ConfigurationOptions extends CoreConfigurationOptions { * `chromeExecutablePath` | `APIFY_CHROME_EXECUTABLE_PATH` | - * `defaultBrowserPath` | `APIFY_DEFAULT_BROWSER_PATH` | - */ -export class Configuration extends CoreConfiguration { - /** @inheritDoc */ - // eslint-disable-next-line no-use-before-define -- Self-reference - static override globalConfig?: Configuration; - - // maps environment variables to config keys (e.g. `APIFY_MEMORY_MBYTES` to `memoryMbytes`) - protected static override ENV_MAP = { - // regular crawlee env vars are also supported - ...CoreConfiguration.ENV_MAP, - - // support crawlee env vars prefixed with `APIFY_` too - APIFY_AVAILABLE_MEMORY_RATIO: 'availableMemoryRatio', - APIFY_PURGE_ON_START: 'purgeOnStart', - APIFY_MEMORY_MBYTES: 'memoryMbytes', - APIFY_DEFAULT_DATASET_ID: 'defaultDatasetId', - APIFY_DEFAULT_KEY_VALUE_STORE_ID: 'defaultKeyValueStoreId', - APIFY_DEFAULT_REQUEST_QUEUE_ID: 'defaultRequestQueueId', - APIFY_INPUT_KEY: 'inputKey', - APIFY_PERSIST_STATE_INTERVAL_MILLIS: 'persistStateIntervalMillis', - APIFY_HEADLESS: 'headless', - APIFY_XVFB: 'xvfb', - APIFY_CHROME_EXECUTABLE_PATH: 'chromeExecutablePath', - APIFY_DEFAULT_BROWSER_PATH: 'defaultBrowserPath', - APIFY_DISABLE_BROWSER_SANDBOX: 'disableBrowserSandbox', - - // as well as apify specific ones - APIFY_TOKEN: 'token', - APIFY_METAMORPH_AFTER_SLEEP_MILLIS: 'metamorphAfterSleepMillis', - APIFY_TEST_PERSIST_INTERVAL_MILLIS: 'persistStateIntervalMillis', // for BC, seems to be unused - APIFY_ACTOR_EVENTS_WS_URL: 'actorEventsWsUrl', - APIFY_ACTOR_ID: 'actorId', - APIFY_API_BASE_URL: 'apiBaseUrl', - APIFY_API_PUBLIC_BASE_URL: 'apiPublicBaseUrl', - APIFY_IS_AT_HOME: 'isAtHome', - APIFY_ACTOR_RUN_ID: 'actorRunId', - APIFY_ACTOR_TASK_ID: 'actorTaskId', - APIFY_CONTAINER_PORT: 'containerPort', - APIFY_CONTAINER_URL: 'containerUrl', - APIFY_USER_ID: 'userId', - APIFY_PROXY_HOSTNAME: 'proxyHostname', - APIFY_PROXY_PASSWORD: 'proxyPassword', - APIFY_PROXY_STATUS_URL: 'proxyStatusUrl', - APIFY_PROXY_PORT: 'proxyPort', - APIFY_INPUT_SECRETS_PRIVATE_KEY_FILE: 'inputSecretsPrivateKeyFile', - APIFY_INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE: - 'inputSecretsPrivateKeyPassphrase', - APIFY_META_ORIGIN: 'metaOrigin', - - // Actor env vars - ACTOR_DEFAULT_DATASET_ID: 'defaultDatasetId', - ACTOR_DEFAULT_KEY_VALUE_STORE_ID: 'defaultKeyValueStoreId', - ACTOR_DEFAULT_REQUEST_QUEUE_ID: 'defaultRequestQueueId', - ACTOR_EVENTS_WEBSOCKET_URL: 'actorEventsWsUrl', - ACTOR_ID: 'actorId', - ACTOR_INPUT_KEY: 'inputKey', - ACTOR_MEMORY_MBYTES: 'memoryMbytes', - ACTOR_RUN_ID: 'actorRunId', - ACTOR_STANDBY_PORT: 'standbyPort', - ACTOR_STANDBY_URL: 'standbyUrl', - ACTOR_TASK_ID: 'actorTaskId', - ACTOR_WEB_SERVER_PORT: 'containerPort', - ACTOR_WEB_SERVER_URL: 'containerUrl', - ACTOR_MAX_TOTAL_CHARGE_USD: 'maxTotalChargeUsd', - ACTOR_TEST_PAY_PER_EVENT: 'testPayPerEvent', - ACTOR_USE_CHARGING_LOG_DATASET: 'useChargingLogDataset', - }; - - protected static override INTEGER_VARS = [ - ...CoreConfiguration.INTEGER_VARS, - 'proxyPort', - 'containerPort', - 'metamorphAfterSleepMillis', - 'maxTotalChargeUsd', - ]; - - protected static override BOOLEAN_VARS = [ - ...CoreConfiguration.BOOLEAN_VARS, - 'isAtHome', - 'testPayPerEvent', - 'useChargingLogDataset', - ]; - - protected static override DEFAULTS = { - ...CoreConfiguration.DEFAULTS, - defaultKeyValueStoreId: - LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_KEY_VALUE_STORE_ID], - defaultDatasetId: - LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_DATASET_ID], - defaultRequestQueueId: - LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.DEFAULT_REQUEST_QUEUE_ID], - inputKey: 'INPUT', - apiBaseUrl: 'https://api.apify.com', - apiPublicBaseUrl: 'https://api.apify.com', - proxyStatusUrl: 'http://proxy.apify.com', - proxyHostname: LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_HOSTNAME], - proxyPort: +LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_PORT], - containerPort: +LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.WEB_SERVER_PORT], - containerUrl: LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.WEB_SERVER_URL], - standbyPort: +LOCAL_ACTOR_ENV_VARS[ACTOR_ENV_VARS.STANDBY_PORT], - metamorphAfterSleepMillis: 300e3, - persistStateIntervalMillis: 60e3, // This value is mentioned in jsdoc in `events.js`, if you update it here, update it there too. - testPayPerEvent: false, - useChargingLogDataset: false, - }; - - /** - * @inheritDoc - */ - override get< - T extends keyof ConfigurationOptions, - U extends ConfigurationOptions[T], - >(key: T, defaultValue?: U): U { - return super.get(key as keyof CoreConfigurationOptions, defaultValue); - } +export class Configuration extends CrawleeConfiguration< + ApifyConfigFields, + ConfigurationOptions, + ConfigurationValues +> { + static override fields: FieldDefinitions = apifyConfigFields; - /** - * @inheritDoc - */ - override set(key: keyof ConfigurationOptions, value?: any) { - super.set(key as keyof CoreConfigurationOptions, value); - } + /** @internal */ + // eslint-disable-next-line no-use-before-define + static override globalConfig?: Configuration; /** - * @inheritDoc + * Returns the global configuration instance. It will respect the environment variables. */ static override getGlobalConfig(): Configuration { - if (Configuration.storage.getStore()) { - return Configuration.storage.getStore() as Configuration; + if (CrawleeConfiguration.storage.getStore()) { + return CrawleeConfiguration.storage.getStore() as Configuration; } + Configuration.globalConfig ??= new Configuration(); - return Configuration.globalConfig as Configuration; + return Configuration.globalConfig; } /** @@ -251,17 +319,6 @@ export class Configuration extends CoreConfiguration { * if we want to change them, we need to first reset the global state. Used mainly for testing purposes. */ static override resetGlobalState(): void { - delete this.globalConfig; + delete Configuration.globalConfig; } } - -// monkey patch the core class so it respects the new options too -CoreConfiguration.getGlobalConfig = Configuration.getGlobalConfig; -// @ts-expect-error protected property -CoreConfiguration.ENV_MAP = Configuration.ENV_MAP; -// @ts-expect-error protected property -CoreConfiguration.INTEGER_VARS = Configuration.INTEGER_VARS; -// @ts-expect-error protected property -CoreConfiguration.BOOLEAN_VARS = Configuration.BOOLEAN_VARS; -// @ts-expect-error protected property -CoreConfiguration.DEFAULTS = Configuration.DEFAULTS; From 9a035ab87f611fa5794892bd96da880ec6768800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Fri, 6 Feb 2026 12:28:15 +0100 Subject: [PATCH 2/5] refactor: use extendField to avoid repeating CRAWLEE_* env vars Uses the new `extendField()` helper from Crawlee to extend base fields with additional env var mappings without repeating the CRAWLEE_* ones. Co-Authored-By: Claude Opus 4.5 --- packages/apify/src/configuration.ts | 125 +++++++++++++++------------- 1 file changed, 66 insertions(+), 59 deletions(-) diff --git a/packages/apify/src/configuration.ts b/packages/apify/src/configuration.ts index 0156872571..bb501bff25 100644 --- a/packages/apify/src/configuration.ts +++ b/packages/apify/src/configuration.ts @@ -2,6 +2,7 @@ import { coerceBoolean, Configuration as CrawleeConfiguration, crawleeConfigFields, + extendField, field, type FieldDefinitions, type InferInputOptions, @@ -18,82 +19,89 @@ import type { META_ORIGINS } from '@apify/consts'; /** * Field definitions for Apify SDK Configuration. * Extends Crawlee's configuration with Apify-specific fields. + * + * Uses `extendField` to add ACTOR_* and APIFY_* env var aliases + * while preserving the base CRAWLEE_* env vars from crawleeConfigFields. */ export const apifyConfigFields = { ...crawleeConfigFields, // Override storage IDs to also check ACTOR_* and APIFY_* env vars - defaultDatasetId: field(z.string().default('default'), { - env: [ - 'ACTOR_DEFAULT_DATASET_ID', - 'APIFY_DEFAULT_DATASET_ID', - 'CRAWLEE_DEFAULT_DATASET_ID', - ], - }), - defaultKeyValueStoreId: field(z.string().default('default'), { - env: [ - 'ACTOR_DEFAULT_KEY_VALUE_STORE_ID', - 'APIFY_DEFAULT_KEY_VALUE_STORE_ID', - 'CRAWLEE_DEFAULT_KEY_VALUE_STORE_ID', - ], - }), - defaultRequestQueueId: field(z.string().default('default'), { - env: [ - 'ACTOR_DEFAULT_REQUEST_QUEUE_ID', - 'APIFY_DEFAULT_REQUEST_QUEUE_ID', - 'CRAWLEE_DEFAULT_REQUEST_QUEUE_ID', - ], + defaultDatasetId: extendField(crawleeConfigFields.defaultDatasetId, { + env: ['ACTOR_DEFAULT_DATASET_ID', 'APIFY_DEFAULT_DATASET_ID'], }), + defaultKeyValueStoreId: extendField( + crawleeConfigFields.defaultKeyValueStoreId, + { + env: [ + 'ACTOR_DEFAULT_KEY_VALUE_STORE_ID', + 'APIFY_DEFAULT_KEY_VALUE_STORE_ID', + ], + }, + ), + defaultRequestQueueId: extendField( + crawleeConfigFields.defaultRequestQueueId, + { + env: [ + 'ACTOR_DEFAULT_REQUEST_QUEUE_ID', + 'APIFY_DEFAULT_REQUEST_QUEUE_ID', + ], + }, + ), - // Override inputKey to also check ACTOR_INPUT_KEY - inputKey: field(z.string().default('INPUT'), { - env: ['ACTOR_INPUT_KEY', 'APIFY_INPUT_KEY', 'CRAWLEE_INPUT_KEY'], + // Override inputKey to also check ACTOR_INPUT_KEY and APIFY_INPUT_KEY + inputKey: extendField(crawleeConfigFields.inputKey, { + env: ['ACTOR_INPUT_KEY', 'APIFY_INPUT_KEY'], }), - // Override memoryMbytes to also check ACTOR_MEMORY_MBYTES - memoryMbytes: field(z.coerce.number().optional(), { - env: [ - 'ACTOR_MEMORY_MBYTES', - 'APIFY_MEMORY_MBYTES', - 'CRAWLEE_MEMORY_MBYTES', - ], + // Override memoryMbytes to also check ACTOR_MEMORY_MBYTES and APIFY_MEMORY_MBYTES + memoryMbytes: extendField(crawleeConfigFields.memoryMbytes, { + env: ['ACTOR_MEMORY_MBYTES', 'APIFY_MEMORY_MBYTES'], }), - // Override persistStateIntervalMillis - persistStateIntervalMillis: field(z.coerce.number().default(60_000), { - env: [ - 'APIFY_PERSIST_STATE_INTERVAL_MILLIS', - 'APIFY_TEST_PERSIST_INTERVAL_MILLIS', - 'CRAWLEE_PERSIST_STATE_INTERVAL_MILLIS', - ], - }), + // Override persistStateIntervalMillis with APIFY_* aliases + persistStateIntervalMillis: extendField( + crawleeConfigFields.persistStateIntervalMillis, + { + env: [ + 'APIFY_PERSIST_STATE_INTERVAL_MILLIS', + 'APIFY_TEST_PERSIST_INTERVAL_MILLIS', + ], + }, + ), // Override browser-related fields to also check APIFY_* env vars - headless: field(coerceBoolean.default(true), { - env: ['APIFY_HEADLESS', 'CRAWLEE_HEADLESS'], + headless: extendField(crawleeConfigFields.headless, { + env: 'APIFY_HEADLESS', }), - xvfb: field(coerceBoolean.default(false), { - env: ['APIFY_XVFB', 'CRAWLEE_XVFB'], + xvfb: extendField(crawleeConfigFields.xvfb, { + env: 'APIFY_XVFB', }), - chromeExecutablePath: field(z.string().optional(), { - env: ['APIFY_CHROME_EXECUTABLE_PATH', 'CRAWLEE_CHROME_EXECUTABLE_PATH'], - }), - defaultBrowserPath: field(z.string().optional(), { - env: ['APIFY_DEFAULT_BROWSER_PATH', 'CRAWLEE_DEFAULT_BROWSER_PATH'], - }), - disableBrowserSandbox: field(coerceBoolean.optional(), { - env: [ - 'APIFY_DISABLE_BROWSER_SANDBOX', - 'CRAWLEE_DISABLE_BROWSER_SANDBOX', - ], + chromeExecutablePath: extendField( + crawleeConfigFields.chromeExecutablePath, + { + env: 'APIFY_CHROME_EXECUTABLE_PATH', + }, + ), + defaultBrowserPath: extendField(crawleeConfigFields.defaultBrowserPath, { + env: 'APIFY_DEFAULT_BROWSER_PATH', }), + disableBrowserSandbox: extendField( + crawleeConfigFields.disableBrowserSandbox, + { + env: 'APIFY_DISABLE_BROWSER_SANDBOX', + }, + ), // Override other crawlee fields with APIFY_* aliases - availableMemoryRatio: field(z.coerce.number().default(0.25), { - env: ['APIFY_AVAILABLE_MEMORY_RATIO', 'CRAWLEE_AVAILABLE_MEMORY_RATIO'], - }), - purgeOnStart: field(coerceBoolean.default(true), { - env: ['APIFY_PURGE_ON_START', 'CRAWLEE_PURGE_ON_START'], + availableMemoryRatio: extendField( + crawleeConfigFields.availableMemoryRatio, + { + env: 'APIFY_AVAILABLE_MEMORY_RATIO', + }, + ), + purgeOnStart: extendField(crawleeConfigFields.purgeOnStart, { + env: 'APIFY_PURGE_ON_START', }), // ========================================================================= @@ -309,7 +317,6 @@ export class Configuration extends CrawleeConfiguration< return CrawleeConfiguration.storage.getStore() as Configuration; } - Configuration.globalConfig ??= new Configuration(); return Configuration.globalConfig; } From 7b9a746bb6e2b70a033851823409be21da59b99d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Fri, 6 Feb 2026 12:35:28 +0100 Subject: [PATCH 3/5] refactor: use Configuration.extendField static method Updates to use CrawleeConfiguration.extendField() static method instead of standalone extendField import. Co-Authored-By: Claude Opus 4.5 --- packages/apify/src/configuration.ts | 43 ++++++++++++++++++----------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/packages/apify/src/configuration.ts b/packages/apify/src/configuration.ts index bb501bff25..315e5152de 100644 --- a/packages/apify/src/configuration.ts +++ b/packages/apify/src/configuration.ts @@ -2,7 +2,6 @@ import { coerceBoolean, Configuration as CrawleeConfiguration, crawleeConfigFields, - extendField, field, type FieldDefinitions, type InferInputOptions, @@ -27,9 +26,12 @@ export const apifyConfigFields = { ...crawleeConfigFields, // Override storage IDs to also check ACTOR_* and APIFY_* env vars - defaultDatasetId: extendField(crawleeConfigFields.defaultDatasetId, { - env: ['ACTOR_DEFAULT_DATASET_ID', 'APIFY_DEFAULT_DATASET_ID'], - }), + defaultDatasetId: CrawleeConfiguration.extendField( + crawleeConfigFields.defaultDatasetId, + { + env: ['ACTOR_DEFAULT_DATASET_ID', 'APIFY_DEFAULT_DATASET_ID'], + }, + ), defaultKeyValueStoreId: extendField( crawleeConfigFields.defaultKeyValueStoreId, { @@ -50,14 +52,17 @@ export const apifyConfigFields = { ), // Override inputKey to also check ACTOR_INPUT_KEY and APIFY_INPUT_KEY - inputKey: extendField(crawleeConfigFields.inputKey, { + inputKey: CrawleeConfiguration.extendField(crawleeConfigFields.inputKey, { env: ['ACTOR_INPUT_KEY', 'APIFY_INPUT_KEY'], }), // Override memoryMbytes to also check ACTOR_MEMORY_MBYTES and APIFY_MEMORY_MBYTES - memoryMbytes: extendField(crawleeConfigFields.memoryMbytes, { - env: ['ACTOR_MEMORY_MBYTES', 'APIFY_MEMORY_MBYTES'], - }), + memoryMbytes: CrawleeConfiguration.extendField( + crawleeConfigFields.memoryMbytes, + { + env: ['ACTOR_MEMORY_MBYTES', 'APIFY_MEMORY_MBYTES'], + }, + ), // Override persistStateIntervalMillis with APIFY_* aliases persistStateIntervalMillis: extendField( @@ -71,10 +76,10 @@ export const apifyConfigFields = { ), // Override browser-related fields to also check APIFY_* env vars - headless: extendField(crawleeConfigFields.headless, { + headless: CrawleeConfiguration.extendField(crawleeConfigFields.headless, { env: 'APIFY_HEADLESS', }), - xvfb: extendField(crawleeConfigFields.xvfb, { + xvfb: CrawleeConfiguration.extendField(crawleeConfigFields.xvfb, { env: 'APIFY_XVFB', }), chromeExecutablePath: extendField( @@ -83,9 +88,12 @@ export const apifyConfigFields = { env: 'APIFY_CHROME_EXECUTABLE_PATH', }, ), - defaultBrowserPath: extendField(crawleeConfigFields.defaultBrowserPath, { - env: 'APIFY_DEFAULT_BROWSER_PATH', - }), + defaultBrowserPath: CrawleeConfiguration.extendField( + crawleeConfigFields.defaultBrowserPath, + { + env: 'APIFY_DEFAULT_BROWSER_PATH', + }, + ), disableBrowserSandbox: extendField( crawleeConfigFields.disableBrowserSandbox, { @@ -100,9 +108,12 @@ export const apifyConfigFields = { env: 'APIFY_AVAILABLE_MEMORY_RATIO', }, ), - purgeOnStart: extendField(crawleeConfigFields.purgeOnStart, { - env: 'APIFY_PURGE_ON_START', - }), + purgeOnStart: CrawleeConfiguration.extendField( + crawleeConfigFields.purgeOnStart, + { + env: 'APIFY_PURGE_ON_START', + }, + ), // ========================================================================= // Apify-specific fields From 6ba172e179aa0523c5d01b4ffea0b5866888e59a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Fri, 6 Feb 2026 12:36:48 +0100 Subject: [PATCH 4/5] fix: use CrawleeConfiguration.extendField for all fields Co-Authored-By: Claude Opus 4.5 --- packages/apify/src/configuration.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/apify/src/configuration.ts b/packages/apify/src/configuration.ts index 315e5152de..da90d4be79 100644 --- a/packages/apify/src/configuration.ts +++ b/packages/apify/src/configuration.ts @@ -32,7 +32,7 @@ export const apifyConfigFields = { env: ['ACTOR_DEFAULT_DATASET_ID', 'APIFY_DEFAULT_DATASET_ID'], }, ), - defaultKeyValueStoreId: extendField( + defaultKeyValueStoreId: CrawleeConfiguration.extendField( crawleeConfigFields.defaultKeyValueStoreId, { env: [ @@ -41,7 +41,7 @@ export const apifyConfigFields = { ], }, ), - defaultRequestQueueId: extendField( + defaultRequestQueueId: CrawleeConfiguration.extendField( crawleeConfigFields.defaultRequestQueueId, { env: [ @@ -65,7 +65,7 @@ export const apifyConfigFields = { ), // Override persistStateIntervalMillis with APIFY_* aliases - persistStateIntervalMillis: extendField( + persistStateIntervalMillis: CrawleeConfiguration.extendField( crawleeConfigFields.persistStateIntervalMillis, { env: [ @@ -82,7 +82,7 @@ export const apifyConfigFields = { xvfb: CrawleeConfiguration.extendField(crawleeConfigFields.xvfb, { env: 'APIFY_XVFB', }), - chromeExecutablePath: extendField( + chromeExecutablePath: CrawleeConfiguration.extendField( crawleeConfigFields.chromeExecutablePath, { env: 'APIFY_CHROME_EXECUTABLE_PATH', @@ -94,7 +94,7 @@ export const apifyConfigFields = { env: 'APIFY_DEFAULT_BROWSER_PATH', }, ), - disableBrowserSandbox: extendField( + disableBrowserSandbox: CrawleeConfiguration.extendField( crawleeConfigFields.disableBrowserSandbox, { env: 'APIFY_DISABLE_BROWSER_SANDBOX', @@ -102,7 +102,7 @@ export const apifyConfigFields = { ), // Override other crawlee fields with APIFY_* aliases - availableMemoryRatio: extendField( + availableMemoryRatio: CrawleeConfiguration.extendField( crawleeConfigFields.availableMemoryRatio, { env: 'APIFY_AVAILABLE_MEMORY_RATIO', From 4241875ca6603ce5e7049b64f046133f3bb39f01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Fri, 6 Feb 2026 12:57:55 +0100 Subject: [PATCH 5/5] chore: bump zod to v4 Co-Authored-By: Claude Opus 4.5 --- packages/apify/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/apify/package.json b/packages/apify/package.json index 668f802f3f..99f8179b3e 100644 --- a/packages/apify/package.json +++ b/packages/apify/package.json @@ -63,6 +63,6 @@ "semver": "^7.7.2", "tslib": "^2.8.1", "ws": "^8.18.2", - "zod": "^3.24.0" + "zod": "^4.0.0" } }