Skip to content

Commit 313feae

Browse files
l2yshovladfrangu
andauthored
fix: validate all schemas in actor.json (#1071)
example output ``` Info: Validating input schema at .actor/input_schema.json Success: Input schema is valid. Info: Validating Dataset schema at .actor/dataset_schema.json Success: Dataset schema is valid. Info: Validating Output schema at actor/output_schema.json Success: Output schema is valid. Info: Validating Key-Value Store schema at .actor/key_value_store_schema.json Success: Key-Value Store schema is valid. ``` closes #1036 --------- Co-authored-by: Vlad Frangu <me@vladfrangu.dev>
1 parent ef4d375 commit 313feae

7 files changed

Lines changed: 447 additions & 41 deletions

File tree

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
"@apify/actor-templates": "^0.1.5",
7070
"@apify/consts": "^2.36.0",
7171
"@apify/input_schema": "^3.17.0",
72+
"@apify/json_schemas": "^0.13.0",
7273
"@apify/utilities": "^2.18.0",
7374
"@crawlee/memory-storage": "^3.12.0",
7475
"@inquirer/core": "^11.0.0",

src/commands/_register.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ import { RunsIndexCommand } from './runs/_index.js';
3333
import { SecretsIndexCommand } from './secrets/_index.js';
3434
import { TasksIndexCommand } from './task/_index.js';
3535
import { TelemetryIndexCommand } from './telemetry/_index.js';
36-
import { ValidateInputSchemaCommand } from './validate-schema.js';
36+
import { ValidateSchemaCommand } from './validate-schema.js';
3737

3838
export const apifyCommands = [
3939
// namespaces
@@ -64,7 +64,7 @@ export const apifyCommands = [
6464
TopLevelPullCommand,
6565
ToplevelPushCommand,
6666
RunCommand,
67-
ValidateInputSchemaCommand,
67+
ValidateSchemaCommand,
6868
HelpCommand,
6969

7070
// test commands

src/commands/validate-schema.ts

Lines changed: 121 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,33 @@
11
import process from 'node:process';
22

3+
import { validateInputSchema } from '@apify/input_schema';
4+
35
import { ApifyCommand } from '../lib/command-framework/apify-command.js';
46
import { Args } from '../lib/command-framework/args.js';
5-
import { LOCAL_CONFIG_PATH } from '../lib/consts.js';
6-
import { readAndValidateInputSchema } from '../lib/input_schema.js';
7-
import { success } from '../lib/outputs.js';
7+
import { CommandExitCodes, LOCAL_CONFIG_PATH } from '../lib/consts.js';
8+
import {
9+
readAndValidateInputSchema,
10+
readInputSchema,
11+
readStorageSchema,
12+
validateDatasetSchema,
13+
validateKvsSchema,
14+
validateOutputSchema,
15+
} from '../lib/input_schema.js';
16+
import { error, info, success } from '../lib/outputs.js';
17+
import { Ajv2019 } from '../lib/utils.js';
818

9-
export class ValidateInputSchemaCommand extends ApifyCommand<typeof ValidateInputSchemaCommand> {
19+
export class ValidateSchemaCommand extends ApifyCommand<typeof ValidateSchemaCommand> {
1020
static override name = 'validate-schema' as const;
1121

12-
static override description = `Validates Actor input schema from one of these locations (in priority order):
13-
1. Object in '${LOCAL_CONFIG_PATH}' under "input" key
14-
2. JSON file path in '${LOCAL_CONFIG_PATH}' "input" key
15-
3. .actor/INPUT_SCHEMA.json
16-
4. INPUT_SCHEMA.json
22+
static override description = `Validates Actor schemas.
23+
24+
When a path argument is provided, validates only the input schema at that path.
1725
18-
Optionally specify custom schema path to validate.`;
26+
When no path is provided, validates all schemas found in '${LOCAL_CONFIG_PATH}':
27+
- Input schema (from "input" key or default locations)
28+
- Dataset schema (from "storages.dataset")
29+
- Output schema (from "output")
30+
- Key-Value Store schema (from "storages.keyValueStore")`;
1931

2032
static override group = 'Local Actor Development';
2133

@@ -35,22 +47,115 @@ Optionally specify custom schema path to validate.`;
3547
static override args = {
3648
path: Args.string({
3749
required: false,
38-
description: 'Optional path to your INPUT_SCHEMA.json file. If not provided ./INPUT_SCHEMA.json is used.',
50+
description: `Optional path to your INPUT_SCHEMA.json file. If not provided, validates all schemas in '${LOCAL_CONFIG_PATH}'.`,
3951
}),
4052
};
4153

4254
static override hiddenAliases = ['vis'];
4355

4456
async run() {
57+
if (this.args.path) {
58+
await this.validateInputSchemaAtPath(this.args.path);
59+
return;
60+
}
61+
62+
await this.validateAllSchemas();
63+
}
64+
65+
private async validateInputSchemaAtPath(forcePath: string) {
4566
await readAndValidateInputSchema({
46-
forcePath: this.args.path,
67+
forcePath,
4768
cwd: process.cwd(),
48-
getMessage: (path) =>
49-
path
50-
? `Validating input schema at ${path}`
51-
: `Validating input schema embedded in '${LOCAL_CONFIG_PATH}'`,
69+
getMessage: (path) => `Validating input schema at ${path ?? forcePath}`,
5270
});
5371

5472
success({ message: 'Input schema is valid.' });
5573
}
74+
75+
private async validateAllSchemas() {
76+
const cwd = process.cwd();
77+
let foundAny = false;
78+
let hasErrors = false;
79+
80+
// Input schema — not using readAndValidateInputSchema here because it throws
81+
// when no schema is found; in the all-schemas scan, a missing input schema
82+
// should be silently skipped, not treated as an error.
83+
try {
84+
const { inputSchema, inputSchemaPath } = await readInputSchema({ cwd, throwOnMissing: true });
85+
86+
if (inputSchema) {
87+
foundAny = true;
88+
89+
const location = inputSchemaPath ? `at ${inputSchemaPath}` : `embedded in '${LOCAL_CONFIG_PATH}'`;
90+
info({ message: `Validating input schema ${location}` });
91+
92+
const validator = new Ajv2019({ strict: false });
93+
validateInputSchema(validator, inputSchema);
94+
success({ message: 'Input schema is valid.' });
95+
}
96+
} catch (err) {
97+
foundAny = true;
98+
hasErrors = true;
99+
error({ message: (err as Error).message });
100+
}
101+
102+
// Storage schemas (Dataset, Output, Key-Value Store)
103+
const storageSchemas = [
104+
{
105+
label: 'Dataset',
106+
read: () => readStorageSchema({ cwd, key: 'dataset', label: 'Dataset', throwOnMissing: true }),
107+
validate: validateDatasetSchema,
108+
},
109+
{
110+
label: 'Output',
111+
read: () =>
112+
readStorageSchema({
113+
cwd,
114+
key: 'output',
115+
label: 'Output',
116+
getRef: (config) => config?.output,
117+
throwOnMissing: true,
118+
}),
119+
validate: validateOutputSchema,
120+
},
121+
{
122+
label: 'Key-Value Store',
123+
read: () =>
124+
readStorageSchema({ cwd, key: 'keyValueStore', label: 'Key-Value Store', throwOnMissing: true }),
125+
validate: validateKvsSchema,
126+
},
127+
];
128+
129+
for (const { label, read, validate } of storageSchemas) {
130+
try {
131+
const result = read();
132+
133+
if (result) {
134+
foundAny = true;
135+
136+
const location = result.schemaPath
137+
? `at ${result.schemaPath}`
138+
: `embedded in '${LOCAL_CONFIG_PATH}'`;
139+
info({ message: `Validating ${label} schema ${location}` });
140+
141+
validate(result.schema);
142+
success({ message: `${label} schema is valid.` });
143+
}
144+
} catch (err) {
145+
foundAny = true;
146+
hasErrors = true;
147+
error({ message: (err as Error).message });
148+
}
149+
}
150+
151+
if (!foundAny) {
152+
throw new Error(
153+
`No schemas found. Make sure '${LOCAL_CONFIG_PATH}' exists and defines at least one schema.`,
154+
);
155+
}
156+
157+
if (hasErrors) {
158+
process.exitCode = CommandExitCodes.InvalidInput;
159+
}
160+
}
56161
}

src/lib/input_schema.ts

Lines changed: 75 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
11
import { existsSync, writeFileSync } from 'node:fs';
22
import { join } from 'node:path';
33

4+
import type { Ajv, ErrorObject } from 'ajv';
45
import { cloneDeep } from 'es-toolkit';
56

67
import { KEY_VALUE_STORE_KEYS } from '@apify/consts';
78
import { validateInputSchema } from '@apify/input_schema';
9+
import {
10+
getDatasetSchemaValidator,
11+
getKeyValueStoreSchemaValidator,
12+
getOutputSchemaValidator,
13+
} from '@apify/json_schemas';
814

915
import { ACTOR_SPECIFICATION_FOLDER, LOCAL_CONFIG_PATH } from './consts.js';
1016
import { info, warning } from './outputs.js';
@@ -24,7 +30,15 @@ const DEFAULT_INPUT_SCHEMA_PATHS = [
2430
* In such a case, path would be set to the location
2531
* where the input schema would be expected to be found (and e.g. can be created there).
2632
*/
27-
export const readInputSchema = async ({ forcePath, cwd }: { forcePath?: string; cwd: string }) => {
33+
export const readInputSchema = async ({
34+
forcePath,
35+
cwd,
36+
throwOnMissing = false,
37+
}: {
38+
forcePath?: string;
39+
cwd: string;
40+
throwOnMissing?: boolean;
41+
}) => {
2842
if (forcePath) {
2943
return {
3044
inputSchema: getJsonFileContent(forcePath),
@@ -34,7 +48,7 @@ export const readInputSchema = async ({ forcePath, cwd }: { forcePath?: string;
3448

3549
const localConfig = getLocalConfig(cwd);
3650

37-
if (typeof localConfig?.input === 'object') {
51+
if (typeof localConfig?.input === 'object' && localConfig.input !== null) {
3852
return {
3953
inputSchema: localConfig.input as Record<string, unknown>,
4054
inputSchemaPath: null,
@@ -43,8 +57,25 @@ export const readInputSchema = async ({ forcePath, cwd }: { forcePath?: string;
4357

4458
if (typeof localConfig?.input === 'string') {
4559
const fullPath = join(cwd, ACTOR_SPECIFICATION_FOLDER, localConfig.input);
60+
const schema = getJsonFileContent(fullPath);
61+
62+
if (!schema) {
63+
if (throwOnMissing) {
64+
throw new Error(`Input schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`);
65+
}
66+
67+
warning({
68+
message: `Input schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`,
69+
});
70+
71+
return {
72+
inputSchema: null,
73+
inputSchemaPath: fullPath,
74+
};
75+
}
76+
4677
return {
47-
inputSchema: getJsonFileContent(fullPath),
78+
inputSchema: schema,
4879
inputSchemaPath: fullPath,
4980
};
5081
}
@@ -115,11 +146,13 @@ export const readStorageSchema = ({
115146
key,
116147
label,
117148
getRef,
149+
throwOnMissing = false,
118150
}: {
119151
cwd: string;
120152
key: string;
121153
label: string;
122154
getRef?: (config: ReturnType<typeof getLocalConfig>) => unknown;
155+
throwOnMissing?: boolean;
123156
}): { schema: Record<string, unknown>; schemaPath: string | null } | null => {
124157
const localConfig = getLocalConfig(cwd);
125158

@@ -137,6 +170,12 @@ export const readStorageSchema = ({
137170
const schema = getJsonFileContent(fullPath);
138171

139172
if (!schema) {
173+
if (throwOnMissing) {
174+
throw new Error(
175+
`${label} schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`,
176+
);
177+
}
178+
140179
warning({
141180
message: `${label} schema file not found at ${fullPath} (referenced in '${LOCAL_CONFIG_PATH}').`,
142181
});
@@ -255,8 +294,40 @@ export const getDefaultsFromInputSchema = (inputSchema: any) => {
255294
return defaults;
256295
};
257296

297+
function formatSchemaValidationErrors(errors: ErrorObject[], schemaName: string): string {
298+
const details = errors
299+
.map((err) => {
300+
const path = err.instancePath ? ` at ${err.instancePath}` : '';
301+
return ` - ${err.message}${path}`;
302+
})
303+
.join('\n');
304+
305+
return `${schemaName} schema is not valid:\n${details}`;
306+
}
307+
308+
export function validateDatasetSchema(schema: Record<string, unknown>): void {
309+
const validate = getDatasetSchemaValidator();
310+
if (!validate(schema)) {
311+
throw new Error(formatSchemaValidationErrors(validate.errors!, 'Dataset'));
312+
}
313+
}
314+
315+
export function validateOutputSchema(schema: Record<string, unknown>): void {
316+
const validate = getOutputSchemaValidator();
317+
if (!validate(schema)) {
318+
throw new Error(formatSchemaValidationErrors(validate.errors!, 'Output'));
319+
}
320+
}
321+
322+
export function validateKvsSchema(schema: Record<string, unknown>): void {
323+
const validate = getKeyValueStoreSchemaValidator();
324+
if (!validate(schema)) {
325+
throw new Error(formatSchemaValidationErrors(validate.errors!, 'Key-Value Store'));
326+
}
327+
}
328+
258329
// Lots of code copied from @apify-packages/actor, this really should be moved to the shared input_schema package
259-
export const getAjvValidator = (inputSchema: any, ajvInstance: import('ajv').Ajv) => {
330+
export const getAjvValidator = (inputSchema: any, ajvInstance: Ajv) => {
260331
const copyOfSchema = cloneDeep(inputSchema);
261332
copyOfSchema.required = [];
262333

0 commit comments

Comments
 (0)