Skip to content

Commit 0809bd3

Browse files
authored
feat: Support for Apify "multiple datasets" feature (#571)
1 parent 47c58d9 commit 0809bd3

13 files changed

Lines changed: 636 additions & 31 deletions

File tree

package-lock.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@
7676
"*": "prettier --write --ignore-unknown"
7777
},
7878
"dependencies": {
79-
"@apify/consts": "^2.47.1",
79+
"@apify/consts": "^2.51.0",
8080
"@apify/input_secrets": "^1.2.0",
8181
"@apify/log": "^2.4.3",
8282
"@apify/timeout": "^0.3.0",

src/actor.ts

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import {
1414
EventType,
1515
purgeDefaultStorages,
1616
RequestQueue,
17-
StorageManager,
1817
} from '@crawlee/core';
1918
import type {
2019
Awaitable,
@@ -65,6 +64,12 @@ import {
6564
import { PlatformEventManager } from './platform_event_manager.js';
6665
import type { ProxyConfigurationOptions } from './proxy_configuration.js';
6766
import { ProxyConfiguration } from './proxy_configuration.js';
67+
import type {
68+
OpenStorageOptions,
69+
StorageIdentifier,
70+
StorageIdentifierWithoutAlias,
71+
} from './storage.js';
72+
import { openStorage } from './storage.js';
6873
import { checkCrawleeVersion, getSystemInfo } from './utils.js';
6974

7075
export interface InitOptions {
@@ -360,15 +365,6 @@ export interface RebootOptions {
360365
customAfterSleepMillis?: number;
361366
}
362367

363-
export interface OpenStorageOptions {
364-
/**
365-
* If set to `true` then the cloud storage is used even if the `CRAWLEE_STORAGE_DIR`
366-
* environment variable is set. This way it is possible to combine local and cloud storage.
367-
* @default false
368-
*/
369-
forceCloud?: boolean;
370-
}
371-
372368
export { ClientActorRun as ActorRun };
373369

374370
/**
@@ -429,6 +425,13 @@ export class Actor<Data extends Dictionary = Dictionary> {
429425

430426
private chargingManager: ChargingManager;
431427

428+
/**
429+
* Tracks which aliased storages have been purged during this session,
430+
* so we only purge them once (on first open) when running locally.
431+
* @internal
432+
*/
433+
purgedStorageAliases = new Set<string>();
434+
432435
constructor(options: ConfigurationOptions = {}) {
433436
// use default configuration object if nothing overridden (it fallbacks to env vars)
434437
this.config =
@@ -1122,16 +1125,17 @@ export class Actor<Data extends Dictionary = Dictionary> {
11221125
* For more details and code examples, see the {@apilink Dataset} class.
11231126
*
11241127
* @param [datasetIdOrName]
1125-
* ID or name of the dataset to be opened. If `null` or `undefined`,
1128+
* ID, name, or alias of the dataset to be opened. If `null` or `undefined`,
11261129
* the function returns the default dataset associated with the Actor run.
1130+
* You can also pass `{ alias: 'name' }` to open a dataset defined in the Actor's schema storages,
1131+
* `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
11271132
* @param [options]
11281133
* @ignore
11291134
*/
11301135
async openDataset(
1131-
datasetIdOrName?: string | null,
1136+
datasetIdOrName?: StorageIdentifier | null,
11321137
options: OpenStorageOptions = {},
11331138
): Promise<Dataset<Data>> {
1134-
ow(datasetIdOrName, ow.optional.string);
11351139
ow(
11361140
options,
11371141
ow.object.exactShape({
@@ -1313,14 +1317,14 @@ export class Actor<Data extends Dictionary = Dictionary> {
13131317
* @param [storeIdOrName]
13141318
* ID or name of the key-value store to be opened. If `null` or `undefined`,
13151319
* the function returns the default key-value store associated with the Actor run.
1320+
* You can also pass `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
13161321
* @param [options]
13171322
* @ignore
13181323
*/
13191324
async openKeyValueStore(
1320-
storeIdOrName?: string | null,
1325+
storeIdOrName?: StorageIdentifierWithoutAlias | null,
13211326
options: OpenStorageOptions = {},
13221327
): Promise<KeyValueStore> {
1323-
ow(storeIdOrName, ow.optional.string);
13241328
ow(
13251329
options,
13261330
ow.object.exactShape({
@@ -1347,14 +1351,14 @@ export class Actor<Data extends Dictionary = Dictionary> {
13471351
* @param [queueIdOrName]
13481352
* ID or name of the request queue to be opened. If `null` or `undefined`,
13491353
* the function returns the default request queue associated with the Actor run.
1354+
* You can also pass `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
13501355
* @param [options]
13511356
* @ignore
13521357
*/
13531358
async openRequestQueue(
1354-
queueIdOrName?: string | null,
1359+
queueIdOrName?: StorageIdentifierWithoutAlias | null,
13551360
options: OpenStorageOptions = {},
13561361
): Promise<RequestQueue> {
1357-
ow(queueIdOrName, ow.optional.string);
13581362
ow(
13591363
options,
13601364
ow.object.exactShape({
@@ -2023,12 +2027,14 @@ export class Actor<Data extends Dictionary = Dictionary> {
20232027
* For more details and code examples, see the {@apilink Dataset} class.
20242028
*
20252029
* @param [datasetIdOrName]
2026-
* ID or name of the dataset to be opened. If `null` or `undefined`,
2030+
* ID, name, or alias of the dataset to be opened. If `null` or `undefined`,
20272031
* the function returns the default dataset associated with the Actor run.
2032+
* You can also pass `{ alias: 'name' }` to open a dataset defined in the Actor's schema storages,
2033+
* `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
20282034
* @param [options]
20292035
*/
20302036
static async openDataset<Data extends Dictionary = Dictionary>(
2031-
datasetIdOrName?: string | null,
2037+
datasetIdOrName?: StorageIdentifier | null,
20322038
options: OpenStorageOptions = {},
20332039
): Promise<Dataset<Data>> {
20342040
return Actor.getDefaultInstance().openDataset(datasetIdOrName, options);
@@ -2158,10 +2164,11 @@ export class Actor<Data extends Dictionary = Dictionary> {
21582164
* @param [storeIdOrName]
21592165
* ID or name of the key-value store to be opened. If `null` or `undefined`,
21602166
* the function returns the default key-value store associated with the Actor run.
2167+
* You can also pass `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
21612168
* @param [options]
21622169
*/
21632170
static async openKeyValueStore(
2164-
storeIdOrName?: string | null,
2171+
storeIdOrName?: StorageIdentifierWithoutAlias | null,
21652172
options: OpenStorageOptions = {},
21662173
): Promise<KeyValueStore> {
21672174
return Actor.getDefaultInstance().openKeyValueStore(
@@ -2184,10 +2191,11 @@ export class Actor<Data extends Dictionary = Dictionary> {
21842191
* @param [queueIdOrName]
21852192
* ID or name of the request queue to be opened. If `null` or `undefined`,
21862193
* the function returns the default request queue associated with the Actor run.
2194+
* You can also pass `{ id: 'abc' }` to open by explicit ID, or `{ name: 'abc' }` to open by explicit name.
21872195
* @param [options]
21882196
*/
21892197
static async openRequestQueue(
2190-
queueIdOrName?: string | null,
2198+
queueIdOrName?: StorageIdentifierWithoutAlias | null,
21912199
options: OpenStorageOptions = {},
21922200
): Promise<RequestQueue> {
21932201
return Actor.getDefaultInstance().openRequestQueue(
@@ -2366,16 +2374,14 @@ export class Actor<Data extends Dictionary = Dictionary> {
23662374

23672375
private async _openStorage<T extends IStorage>(
23682376
storageClass: Constructor<T>,
2369-
id?: string,
2377+
identifier?: StorageIdentifier | null,
23702378
options: OpenStorageOptions = {},
23712379
) {
2372-
const client = options.forceCloud ? this.apifyClient : undefined;
2373-
return StorageManager.openStorage<T>(
2374-
storageClass,
2375-
id,
2376-
client,
2377-
this.config,
2378-
);
2380+
return openStorage<T>(storageClass, identifier, {
2381+
config: this.config,
2382+
client: options.forceCloud ? this.apifyClient : undefined,
2383+
purgedStorageAliases: this.purgedStorageAliases,
2384+
});
23792385
}
23802386

23812387
private _ensureActorInit(methodCalled: string) {

src/configuration.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ export interface ConfigurationOptions extends CoreConfigurationOptions {
4141
useChargingLogDataset?: boolean;
4242
actorPricingInfo?: string;
4343
chargedEventCounts?: string;
44+
actorStoragesJson?: string;
4445
}
4546

4647
/**
@@ -185,6 +186,7 @@ export class Configuration extends CoreConfiguration {
185186
ACTOR_USE_CHARGING_LOG_DATASET: 'useChargingLogDataset',
186187
APIFY_ACTOR_PRICING_INFO: 'actorPricingInfo',
187188
APIFY_CHARGED_ACTOR_EVENT_COUNTS: 'chargedEventCounts',
189+
ACTOR_STORAGES_JSON: 'actorStoragesJson',
188190
};
189191

190192
protected static override INTEGER_VARS = [

src/index.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
11
export * from './actor.js';
2+
export type {
3+
OpenStorageOptions,
4+
StorageAlias,
5+
StorageId,
6+
StorageName,
7+
StorageIdentifier,
8+
StorageIdentifierWithoutAlias,
9+
} from './storage.js';
210
export {
311
ChargeOptions,
412
ChargeResult,

0 commit comments

Comments
 (0)