apify
diff --git a/‎docs/guides/configuration.mdx‎
Lines changed: 23 additions & 21 deletions b/‎docs/guides/configuration.mdx‎
Lines changed: 23 additions & 21 deletions
diff --git a/‎docs/guides/parallel-scraping/parallel-scraper.mjs‎
Lines changed: 3 additions & 5 deletions b/‎docs/guides/parallel-scraping/parallel-scraper.mjs‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎docs/guides/parallel-scraping/parallel-scraping.mdx‎
Lines changed: 5 additions & 8 deletions b/‎docs/guides/parallel-scraping/parallel-scraping.mdx‎
Lines changed: 5 additions & 8 deletions
diff --git a/‎docs/upgrading/upgrading_v4.md‎
Lines changed: 36 additions & 0 deletions b/‎docs/upgrading/upgrading_v4.md‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎packages/browser-crawler/src/internals/browser-launcher.ts‎
Lines changed: 3 additions & 3 deletions b/‎packages/browser-crawler/src/internals/browser-launcher.ts‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎packages/core/package.json‎
Lines changed: 2 additions & 2 deletions b/‎packages/core/package.json‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎packages/core/src/autoscaling/snapshotter.ts‎
Lines changed: 3 additions & 5 deletions b/‎packages/core/src/autoscaling/snapshotter.ts‎
Lines changed: 3 additions & 5 deletions
@@ -15,13 +15,13 @@ There are three ways of changing the configuration parameters:
 - using the `Configuration` class
 
 You could also combine all the above, but you should keep in mind, that the precedence for these 3 options is the following:
-***`crawlee.json`*** < ***constructor options*** < ***environment variables***.
+***constructor options*** > ***environment variables*** > ***`crawlee.json`***.
 
-`crawlee.json` is a baseline. The options provided in the `Configuration` constructor will override the options provided in the JSON. Environment variables will override both.
+Constructor options have the highest priority. Environment variables override `crawlee.json`. The JSON file serves as a baseline.
 
 ## `crawlee.json`
 
-The first option you could use for configuring Crawlee is `crawlee.json` file. The only thing you need to do is specify the <ApiLink to="core/interface/ConfigurationOptions">`ConfigurationOptions`</ApiLink> in the file, place the file in the root of your project, and Crawlee will use provided options as global configuration.
+The first option you could use for configuring Crawlee is `crawlee.json` file. The only thing you need to do is specify the configuration options in the file, place the file in the root of your project, and Crawlee will use provided options as global configuration. See the <ApiLink to="core/class/Configuration">`Configuration`</ApiLink> class for the full list of supported options.
 
 ```json title="crawlee.json"
 {
@@ -133,24 +133,28 @@ the autoscaling feature will only use up to 2048 MB of memory.
 
 ## Configuration class
 
-The last option to adjust Crawlee configuration is to use the <ApiLink to="core/class/Configuration">`Configuration`</ApiLink> class in the code.
+The last option to adjust Crawlee configuration is to use the <ApiLink to="core/class/Configuration">`Configuration`</ApiLink> class in the code. Configuration is immutable — values are set via the constructor and cannot be changed afterwards.
 
 ### Global Configuration
 
-By default, there is a global singleton instance of `Configuration` class, it is used by the crawlers and some other classes that depend on a configurable behavior. In most cases you don't need to adjust any options there, but if needed - you can get access to it via <ApiLink to="core/class/Configuration#getGlobalConfig">`Configuration.getGlobalConfig()`</ApiLink> function. Now you can easily <ApiLink to="core/class/Configuration#get">`get`</ApiLink> and <ApiLink to="core/class/Configuration#set">`set`</ApiLink> the <ApiLink to="core/interface/ConfigurationOptions">`ConfigurationOptions`</ApiLink>.
+By default, there is a global singleton instance of `Configuration` class, it is used by the crawlers and some other classes that depend on a configurable behavior. In most cases you don't need to adjust any options there, but if needed - you can access it via <ApiLink to="core/class/Configuration#getGlobalConfig">`Configuration.getGlobalConfig()`</ApiLink>, which delegates to the global <ApiLink to="core/class/ServiceLocator">`serviceLocator`</ApiLink> — the single source of truth for Crawlee's shared services (for example the configuration, event manager, storage client, and logger). You can also reach the same instance directly via `serviceLocator.getConfiguration()` or swap services globally with `serviceLocator.setConfiguration(...)` before any crawler is created. Configuration values are accessible directly as properties on the instance.
 
 ```js
 import { CheerioCrawler, Configuration, sleep } from 'crawlee';
 
 // Get the global configuration
 const config = Configuration.getGlobalConfig();
-// Set the 'persistStateIntervalMillis' option
-// of global configuration to 10 seconds
-config.set('persistStateIntervalMillis', 10_000);
+// Access configuration values directly as properties
+console.log(config.persistStateIntervalMillis);
 
-// Note, that we are not passing the configuration to the crawler
-// as it's using the global configuration
-const crawler = new CheerioCrawler();
+// To use custom configuration values, create a new Configuration instance
+const configuration = new Configuration({
+    // Set the 'persistStateIntervalMillis' option to 10 seconds
+    persistStateIntervalMillis: 10_000,
+});
+
+// Pass the configuration to the crawler
+const crawler = new CheerioCrawler({ configuration });
 
 crawler.router.addDefaultHandler(async ({ request }) => {
     // For the first request we wait for 5 seconds,
@@ -170,15 +174,13 @@ crawler.router.addDefaultHandler(async ({ request }) => {
 await crawler.run(['https://www.example.com/1']);
 ```
 
-This is pretty much the same example we used for showing `crawlee.json` usage,
-but now we're using the global configuration, which is the only difference.
-If you run this example - you will find the `SDK_CRAWLER_STATISTICS` file in default Key-Value store as before,
-which would show the same number of finishes requests (one) and the same crawler runtime (~10 seconds).
-This confirms that provided parameters worked: the state was persisted after 10 seconds, as it was set in the global configuration.
+If you run this example - you will find the `SDK_CRAWLER_STATISTICS` file in default Key-Value store,
+which would show the same number of finished requests (one) and the same crawler runtime (~10 seconds).
+This confirms that provided parameters worked: the state was persisted after 10 seconds, as it was set in the configuration.
 
 :::note
 
-After running the same example with commented two lines of code related to `Configuration` there will be
+After running the same example without the custom configuration, there will be
 no `SDK_CRAWLER_STATISTICS` file stored in the default Key-Value store:
 as we did not change the `persistStateIntervalMillis`, Crawlee used the default value of 60 seconds,
 and the crawler was forcefully aborted after ~15 seconds of run time before it persisted the state for the first time.
@@ -187,19 +189,19 @@ and the crawler was forcefully aborted after ~15 seconds of run time before it p
 
 ### Custom configuration
 
-Alternatively, you can create a custom configuration. In this case you need to pass it to the class that is going to use it, e.g. to the crawler. Let's adjust the previous example:
+You can create a custom configuration and pass it to the crawler via the `configuration` option:
 
 ```js
 import { CheerioCrawler, Configuration, sleep } from 'crawlee';
 
 // Create new configuration
-const config = new Configuration({
+const configuration = new Configuration({
     // Set the 'persistStateIntervalMillis' option to 10 seconds
     persistStateIntervalMillis: 10_000,
 });
 
-// Now we need to pass the configuration to the crawler
-const crawler = new CheerioCrawler({}, config);
+// Pass the configuration to the crawler
+const crawler = new CheerioCrawler({ configuration });
 
 crawler.router.addDefaultHandler(async ({ request }) => {
     // for the first request we wait for 5 seconds,
 
@@ -73,15 +73,13 @@ if (!process.env.IN_WORKER_THREAD) {
     // or a configuration option. This is just for show 😈
     workerLogger.setLevel(log.LEVELS.DEBUG);
 
-    // Disable the automatic purge on start
-    // This is needed when running locally, as otherwise multiple processes will try to clear the default storage (and that will cause clashes)
-    Configuration.set('purgeOnStart', false);
-
     // Get the request queue
     const requestQueue = await getOrInitQueue(false);
 
-    // Configure crawlee to store the worker-specific data in a separate directory (needs to be done AFTER the queue is initialized when running locally)
+    // Disable the automatic purge on start and configure crawlee to store the worker-specific data in a separate directory
+    // (needs to be done AFTER the queue is initialized when running locally)
     const config = new Configuration({
+        purgeOnStart: false,
         storageClientOptions: {
             localDataDirectory: `./storage/worker-${process.env.WORKER_INDEX}`,
         },
 
@@ -132,22 +132,19 @@ We use this to ensure the parent process stays alive until all the worker proces
 
 There are three steps we want to do for the worker processes:
 
-- ensure the default storages do **not** get purged on start, as otherwise we'd lose the queue we prepared
 - get the queue that supports locking from the same location as the parent process
-- initialize a special storage for worker processes so they do not collide with each other
+- ensure the default storages do **not** get purged on start, as otherwise we'd lose the queue we prepared, and initialize a special storage for worker processes so they do not collide with each other
 
 In order, that's what these lines do:
 
 ```javascript title="src/parallel-scraper.mjs"
-// Disable the automatic purge on start (step 1)
-// This is needed when running locally, as otherwise multiple processes will try to clear the default storage (and that will cause clashes)
-Configuration.set('purgeOnStart', false);
-
-// Get the request queue from the parent process (step 2)
+// Get the request queue from the parent process (step 1)
 const requestQueue = await getOrInitQueue(false);
 
-// Configure crawlee to store the worker-specific data in a separate directory (needs to be done AFTER the queue is initialized when running locally) (step 3)
+// Disable the automatic purge on start and configure crawlee to store the worker-specific data
+// in a separate directory (needs to be done AFTER the queue is initialized when running locally) (step 2)
 const config = new Configuration({
+    purgeOnStart: false,
     storageClientOptions: {
         localDataDirectory: `./storage/worker-${process.env.WORKER_INDEX}`,
     },
 
@@ -55,6 +55,7 @@ The crawler following options are removed:
 - `FileDownloadOptions.streamHandler` - streaming should now be handled directly in the `requestHandler` instead
 - `playwrightUtils.registerUtilsToContext` and `puppeteerUtils.registerUtilsToContext` - this is now added to the context via `ContextPipeline` composition
 - `puppeteerUtils.blockResources` and `puppeteerUtils.cacheResponses` (deprecated)
+- `Configuration.systemInfoV2` / `CRAWLEE_SYSTEM_INFO_V2` environment variable — the v2 behavior is now the default (see [Available resource detection](#available-resource-detection))
 
 ### The protected `BasicCrawler.crawlingContexts` map is removed
 
@@ -154,6 +155,40 @@ The `KeyValueStore.getPublicUrl` method is now asynchronous and reads the public
 
 The `preNavigationHooks` option in `HttpCrawler` subclasses no longer accepts the `gotOptions` object as a second parameter. Modify the `crawlingContext` fields (e.g. `.request`) directly instead.
 
+## Configuration class redesign
+
+The `Configuration` class has been redesigned for v4. The main changes are:
+
+### Direct property access replaces `get()` and `set()`
+
+**Before:**
+```ts
+const config = Configuration.getGlobalConfig();
+config.set('persistStateIntervalMillis', 10_000);
+const headless = config.get('headless');
+```
+
+**After:**
+```ts
+// Configuration is now immutable — set options via the constructor
+const config = new Configuration({ persistStateIntervalMillis: 10_000 });
+const headless = config.headless;
+```
+
+The `get()` and `set()` methods are removed. Access config values directly as properties.
+Configuration instances are immutable — attempting to assign a property throws a `TypeError`.
+
+### Constructor options now take precedence over environment variables
+
+**New priority order (highest to lowest):**
+1. Constructor options
+2. Environment variables
+3. `crawlee.json`
+4. Schema defaults
+
+Previously, environment variables always won. Now `new Configuration({ headless: false })`
+works even when `CRAWLEE_HEADLESS=true` is set.
+
 ## Service management moved from `Configuration` to `ServiceLocator`
 
 The service management functionality has been extracted from `Configuration` into a new `ServiceLocator` class, following the pattern established in Crawlee for Python.
@@ -166,6 +201,7 @@ The following methods and properties have been removed from `Configuration`:
 - `Configuration.getEventManager()` - moved to `ServiceLocator.getEventManager()`
 - `Configuration.useStorageClient()` - use `ServiceLocator.setStorageClient()` instead
 - `Configuration.useEventManager()` - use `ServiceLocator.setEventManager()` instead
+- `Configuration.resetGlobalState()` - use `serviceLocator.reset()` instead
 - `Configuration.storageManagers` - moved to `ServiceLocator.storageManagers`
 
 The `EventManager` and `LocalEventManager` constructors now accept an options object for configuring event intervals (e.g. `persistStateIntervalMillis`, `systemInfoIntervalMillis`). You can also use the new `LocalEventManager.fromConfig()` factory method to create an instance with intervals derived from a `Configuration` object.
 
@@ -189,7 +189,7 @@ export abstract class BrowserLauncher<
             ...this.launchOptions,
         };
 
-        if (this.config.get('disableBrowserSandbox')) {
+        if (this.config.disableBrowserSandbox) {
             launchOptions.args.push('--no-sandbox');
         }
 
@@ -209,11 +209,11 @@ export abstract class BrowserLauncher<
     }
 
     protected _getDefaultHeadlessOption(): boolean {
-        return this.config.get('headless')! && !this.config.get('xvfb', false);
+        return this.config.headless && !this.config.xvfb;
     }
 
     protected _getChromeExecutablePath(): string {
-        return this.config.get('chromeExecutablePath', this._getTypicalChromeExecutablePath());
+        return this.config.chromeExecutablePath ?? this._getTypicalChromeExecutablePath();
     }
 
     /**
 
@@ -59,14 +59,14 @@
         "@sapphire/async-queue": "^1.5.5",
         "@vladfrangu/async_event_emitter": "^2.4.6",
         "csv-stringify": "^6.5.2",
-        "fs-extra": "^11.3.0",
         "json5": "^2.2.3",
         "minimatch": "^10.0.1",
         "ow": "^2.0.0",
         "stream-json": "^1.9.1",
         "tldts": "^7.0.6",
         "tough-cookie": "^6.0.0",
         "tslib": "^2.8.1",
-        "type-fest": "^4.41.0"
+        "type-fest": "^4.41.0",
+        "zod": "^3.24.0 || ^4.0.0"
     }
 }
@@ -171,21 +171,19 @@ export class Snapshotter {
      * Starts capturing snapshots at configured intervals.
      */
     async start(): Promise<void> {
-        const memoryMbytes = serviceLocator.getConfiguration().get('memoryMbytes', 0);
+        const memoryMbytes = serviceLocator.getConfiguration().memoryMbytes ?? 0;
 
         if (memoryMbytes > 0) {
             this.maxMemoryBytes = memoryMbytes * 1024 * 1024;
         } else {
-            const containerized = serviceLocator.getConfiguration().get('containerized', await isContainerized());
+            const containerized = serviceLocator.getConfiguration().containerized ?? (await isContainerized());
             const memInfo = await getMemoryInfo({
                 containerized,
                 logger: serviceLocator.getLogger(),
             });
             const totalBytes = memInfo.totalBytes;
 
-            this.maxMemoryBytes = Math.ceil(
-                totalBytes * serviceLocator.getConfiguration().get('availableMemoryRatio')!,
-            );
+            this.maxMemoryBytes = Math.ceil(totalBytes * serviceLocator.getConfiguration().availableMemoryRatio);
             this.log.debug(
                 `Setting max memory of this run to ${Math.round(this.maxMemoryBytes / 1024 / 1024)} MB. ` +
                     'Use the CRAWLEE_MEMORY_MBYTES or CRAWLEE_AVAILABLE_MEMORY_RATIO environment variable to override it.',
Original file line number	Diff line number	Diff line change
`@@ -189,7 +189,7 @@ export abstract class BrowserLauncher<`
`189`	`189`	`...this.launchOptions,`
`190`	`190`	`};`
`191`	`191`
`192`		`- if (this.config.get('disableBrowserSandbox')) {`
	`192`	`+ if (this.config.disableBrowserSandbox) {`
`193`	`193`	`launchOptions.args.push('--no-sandbox');`
`194`	`194`	`}`
`195`	`195`
`@@ -209,11 +209,11 @@ export abstract class BrowserLauncher<`
`209`	`209`	`}`
`210`	`210`
`211`	`211`	`protected _getDefaultHeadlessOption(): boolean {`
`212`		`- return this.config.get('headless')! && !this.config.get('xvfb', false);`
	`212`	`+ return this.config.headless && !this.config.xvfb;`
`213`	`213`	`}`
`214`	`214`
`215`	`215`	`protected _getChromeExecutablePath(): string {`
`216`		`- return this.config.get('chromeExecutablePath', this._getTypicalChromeExecutablePath());`
	`216`	`+ return this.config.chromeExecutablePath ?? this._getTypicalChromeExecutablePath();`
`217`	`217`	`}`
`218`	`218`
`219`	`219`	`/**`