Skip to content

Commit 8dff93a

Browse files
authored
feat!: make storage purging default, add --resurrect (#729)
BREAKING CHANGE: Purging is now enabled by default, bringing the `run` command in sync with Crawlee v3+ behavior (meaning you no longer need to provide `--purge` when starting your actors.). Use the `--resurrect` flag (or `--no-purge`) to keep the storage folder intact. The `purge-queue`, `purge-dataset` and `purge-key-value-store` flags have been removed, and the logic of all three was combined into the `purge` flag.
1 parent a9452d7 commit 8dff93a

7 files changed

Lines changed: 31 additions & 50 deletions

File tree

.github/workflows/check.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
strategy:
1919
fail-fast: false
2020
matrix:
21-
os: [ubuntu-latest, windows-latest]
21+
os: [ubuntu-latest, windows-2025]
2222
node-version: [18, 20, 22, 24]
2323

2424
steps:
@@ -60,7 +60,7 @@ jobs:
6060
strategy:
6161
fail-fast: false
6262
matrix:
63-
os: [ubuntu-latest, windows-latest]
63+
os: [ubuntu-latest, windows-2025]
6464
python-version: ["3.9", "3.10", "3.11", "3.12"]
6565
runs-on: ${{ matrix.os }}
6666

.github/workflows/cucumber.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ jobs:
1616
strategy:
1717
fail-fast: false
1818
matrix:
19-
os: [ubuntu-latest, windows-latest]
19+
os: [ubuntu-latest, windows-2025]
2020
# We only test LTS for now
21-
node-version: [20]
21+
node-version: [22]
2222

2323
runs-on: ${{ matrix.os }}
2424

src/commands/_register.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,5 @@ export const actorCommands = [
6666
ActorGetInputCommand,
6767
ActorChargeCommand,
6868
HelpCommand,
69+
UpgradeCommand,
6970
] as const satisfies (typeof BuiltApifyCommand)[];

src/commands/run.ts

Lines changed: 15 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -60,29 +60,25 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
6060
purge: Flags.boolean({
6161
char: 'p',
6262
description:
63-
'Shortcut that combines the --purge-queue, --purge-dataset and --purge-key-value-store options.',
63+
'Whether to purge the default request queue, dataset and key-value store before the run starts.\nFor crawlee projects, this is the default behavior, and the flag is optional.\nUse `--no-purge` to keep the storage folder intact.',
6464
required: false,
65+
default: true,
66+
exclusive: ['resurrect'],
6567
}),
66-
'purge-queue': Flags.boolean({
67-
description: 'Deletes the local directory containing the default request queue before the run starts.',
68-
required: false,
69-
}),
70-
'purge-dataset': Flags.boolean({
71-
description: 'Deletes the local directory containing the default dataset before the run starts.',
72-
required: false,
73-
}),
74-
'purge-key-value-store': Flags.boolean({
68+
resurrect: Flags.boolean({
7569
description:
76-
'Deletes all records from the default key-value store in the local directory before the run starts, except for the "INPUT" key.',
70+
'Whether to keep the default request queue, dataset and key-value store before the run starts.',
7771
required: false,
72+
default: false,
73+
exclusive: ['purge'],
7874
}),
7975
entrypoint: Flags.string({
8076
description: [
8177
'Optional entrypoint for running with injected environment variables.',
8278
'\n',
8379
'For Python, it is the module name, or a path to a file.',
8480
'\n',
85-
'For node.js, it is the npm script name, or a path to a JS/MJS file.',
81+
'For Node.js, it is the npm script name, or a path to a JS/MJS file.',
8682
'You can also pass in a directory name, provided that directory contains an "index.js" file.',
8783
].join(' '),
8884
required: false,
@@ -215,42 +211,25 @@ export class RunCommand extends ApifyCommand<typeof RunCommand> {
215211

216212
let CRAWLEE_PURGE_ON_START = '0';
217213

214+
// Mark resurrect as a special case of --no-purge
215+
if (this.flags.resurrect) {
216+
this.flags.purge = false;
217+
}
218+
218219
// Purge stores
219-
// TODO: this needs to be cleaned up heavily - ideally logic should be in the project analyzers
220220
if (this.flags.purge) {
221221
CRAWLEE_PURGE_ON_START = '1';
222222

223223
if (crawleeVersion.isNone()) {
224224
await Promise.all([purgeDefaultQueue(), purgeDefaultKeyValueStore(), purgeDefaultDataset()]);
225225
info({ message: 'All default local stores were purged.' });
226226
}
227-
228-
// This might not be needed for python and scrapy projects
229-
// if (type === ProjectLanguage.Python || type === ProjectLanguage.Scrapy) {
230-
// await Promise.all([purgeDefaultQueue(), purgeDefaultKeyValueStore(), purgeDefaultDataset()]);
231-
// info({ message: 'All default local stores were purged.' });
232-
// }
233-
}
234-
235-
// TODO: deprecate these flags
236-
if (this.flags.purgeQueue && !this.flags.purge) {
237-
await purgeDefaultQueue();
238-
info({ message: 'Default local request queue was purged.' });
239-
}
240-
241-
if (this.flags.purgeDataset && !this.flags.purge) {
242-
await purgeDefaultDataset();
243-
info({ message: 'Default local dataset was purged.' });
244-
}
245-
246-
if (this.flags.purgeKeyValueStore && !this.flags.purge) {
247-
await purgeDefaultKeyValueStore();
248-
info({ message: 'Default local key-value store was purged.' });
249227
}
250228

251229
if (!this.flags.purge) {
252230
const isStorageEmpty = await checkIfStorageIsEmpty();
253-
if (!isStorageEmpty) {
231+
232+
if (!isStorageEmpty && !this.flags.resurrect) {
254233
warning({
255234
message:
256235
'The storage directory contains a previous state, the Actor will continue where it left off. ' +

src/lib/command-framework/apify-command.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,8 @@ export abstract class ApifyCommand<T extends typeof BuiltApifyCommand = typeof B
312312
});
313313

314314
return;
315+
} else if (typeof builderData.hasDefault !== 'undefined') {
316+
this.flags[camelCasedName] = builderData.hasDefault;
315317
}
316318
}
317319
}
@@ -452,7 +454,7 @@ export abstract class ApifyCommand<T extends typeof BuiltApifyCommand = typeof B
452454

453455
// yargs handles "no-" flags by negating the flag, so we need to handle that differently if we register a flag with a "no-" prefix
454456
if (flagKey.startsWith('no-')) {
455-
finalYargs = internalBuilderData.builder(finalYargs, flagKey.slice(3), [], true);
457+
finalYargs = internalBuilderData.builder(finalYargs, flagKey.slice(3));
456458
} else {
457459
finalYargs = internalBuilderData.builder(finalYargs, flagKey);
458460
}

src/lib/command-framework/flags.ts

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ export interface TaggedFlagBuilder<
4242
HasDefault = false,
4343
> {
4444
flagTag: Tag;
45-
builder: (args: Argv, objectName: string, extraArgs?: string[], invertDefaultIfSet?: boolean) => Argv;
45+
builder: (args: Argv, objectName: string, extraArgs?: string[]) => Argv;
4646
choicesType: ChoicesType;
4747
required: Required;
4848
hasDefault: HasDefault;
@@ -67,7 +67,7 @@ function stringFlag<const Choices extends string[], const T extends StringFlagOp
6767
): TaggedFlagBuilder<'string', Choices, T['default'] extends string ? true : T['required'], T['default']> {
6868
return {
6969
flagTag: 'string',
70-
builder: (args, objectName, extraAliases, invertDefaultIfSet = false) => {
70+
builder: (args, objectName, extraAliases) => {
7171
const allAliases = new Set([...(options.aliases ?? []), ...(extraAliases ?? [])]);
7272

7373
if (options.char) {
@@ -82,7 +82,6 @@ function stringFlag<const Choices extends string[], const T extends StringFlagOp
8282
alias: [...allAliases].map((alias) => kebabCaseString(camelCaseToKebabCase(alias))),
8383
hidden: options.hidden ?? false,
8484
conflicts: options.exclusive,
85-
default: invertDefaultIfSet ? !options.default : options.default,
8685
choices: options.choices,
8786
string: true,
8887
// we only require something be passed in if we don't have a default or read from stdin
@@ -107,7 +106,7 @@ function booleanFlag<const T extends BooleanFlagOptions>(
107106
): TaggedFlagBuilder<'boolean', never, T['default'] extends boolean ? true : T['required'], T['default']> {
108107
return {
109108
flagTag: 'boolean',
110-
builder: (args, objectName, extraAliases, invertDefaultIfSet = false) => {
109+
builder: (args, objectName, extraAliases) => {
111110
const allAliases = new Set([...(options.aliases ?? []), ...(extraAliases ?? [])]);
112111

113112
if (options.char) {
@@ -122,7 +121,6 @@ function booleanFlag<const T extends BooleanFlagOptions>(
122121
alias: [...allAliases].map((alias) => kebabCaseString(camelCaseToKebabCase(alias))),
123122
hidden: options.hidden ?? false,
124123
conflicts: options.exclusive,
125-
default: invertDefaultIfSet ? !options.default : options.default,
126124
boolean: true,
127125
});
128126
},
@@ -144,7 +142,7 @@ function integerFlag<const T extends IntegerFlagOptions>(
144142
): TaggedFlagBuilder<'integer', never, T['default'] extends number ? true : T['required'], T['default']> {
145143
return {
146144
flagTag: 'integer',
147-
builder: (args, objectName, extraAliases, invertDefaultIfSet = false) => {
145+
builder: (args, objectName, extraAliases) => {
148146
const allAliases = new Set([...(options.aliases ?? []), ...(extraAliases ?? [])]);
149147

150148
if (options.char) {
@@ -159,7 +157,6 @@ function integerFlag<const T extends IntegerFlagOptions>(
159157
alias: [...allAliases].map((alias) => kebabCaseString(camelCaseToKebabCase(alias))),
160158
hidden: options.hidden ?? false,
161159
conflicts: options.exclusive,
162-
default: invertDefaultIfSet ? !options.default : options.default,
163160
choices: options.choices,
164161
string: true,
165162
nargs: 1,

vitest.config.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
// eslint-disable-next-line import/extensions
12
import { defineConfig } from 'vitest/config';
23

34
const isWindows = process.platform === 'win32';
5+
const multiplierFactor = isWindows ? 4 : 1;
46

57
export default defineConfig({
68
esbuild: {
@@ -10,8 +12,8 @@ export default defineConfig({
1012
test: {
1113
globals: true,
1214
restoreMocks: true,
13-
testTimeout: 60_000 * (isWindows ? 2 : 1),
14-
hookTimeout: 60_000 * (isWindows ? 2 : 1),
15+
testTimeout: 60_000 * multiplierFactor,
16+
hookTimeout: 60_000 * multiplierFactor,
1517
include: ['**/*.{test,spec}.?(c|m)[jt]s?(x)'],
1618
passWithNoTests: true,
1719
silent: !process.env.NO_SILENT_TESTS,

0 commit comments

Comments
 (0)