Skip to content

Commit 5f3c7c5

Browse files
feat(bin): don't test on nonfunctional changes in json files (#56)
* feat(bin): don't test on nonfunctional changes in json files * test: add test for reordering (passes) * refactor: cleaner logic flow, and use 'function' and 'cosmetic' everywhere * fix should build & test tests and smaller touches
1 parent f32cef4 commit 5f3c7c5

9 files changed

Lines changed: 700 additions & 160 deletions

File tree

bin/diff-changes.ts

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
import { isCosmeticOnlyJsonSchemaChange } from './diff-json-schema.js';
2+
import type { ActorConfig, Commit } from './types.js';
3+
4+
interface ShouldBuildAndTestOptions {
5+
filepathsChanged: string[];
6+
actorConfigs: ActorConfig[];
7+
isLatest?: boolean;
8+
commits: Commit[];
9+
}
10+
11+
export const maybeParseActorFolder = (lowercaseFilePath: string): { isActorFolder: true, actorName: string } | { isActorFolder: false } => {
12+
const match = lowercaseFilePath.match(/^(?:standalone-)?actors\/([^/]+)\/.+/);
13+
if (match) {
14+
// Some usernames weirdly use underscores, e.g. google_maps_email_extractor_standby-contact-details-scraper so we only need replace the last one
15+
return { isActorFolder: true, actorName: match[1].replace(/_(?=[^_]*$)/, '/') };
16+
}
17+
return { isActorFolder: false };
18+
}
19+
20+
/**
21+
* Also works for folders
22+
*/
23+
const isIgnoredTopLevelFile = (lowercaseFilePath: string) => {
24+
// On top level, we should only have dev-only readme and .actor/ is just for apify push CLI (real Actor configs are in /actors)
25+
const IGNORED_TOP_LEVEL_FILES = ['.vscode/', '.gitignore', 'readme.md', '.husky/', '.eslintrc', 'eslint.config.mjs', '.prettierrc', '.editorconfig', '.actor/'];
26+
// Strip out deprecated /code and /shared folders, treat them as top-level code
27+
const sanitizedLowercaseFilePath = lowercaseFilePath.replace(/^code\//, '').replace(/^shared\//, '');
28+
29+
return IGNORED_TOP_LEVEL_FILES.some((ignoredFile) => sanitizedLowercaseFilePath.startsWith(ignoredFile));
30+
};
31+
32+
type FileChange =
33+
{ impact: 'ignored' } |
34+
// Only things that influence how the Actor looks - e.g. README and CHANGELOG files, schema titles, descriptions, reordering, etc. We only need to rebuild on release
35+
{ impact: 'cosmetic', includes: 'all-actors' | ActorConfig } |
36+
// Influences how the Actor works - we need to run tests
37+
{
38+
impact: 'functional', includes: 'all-actors' | ActorConfig
39+
};
40+
41+
const classifyFileChange = (lowercaseFilePath: string, actorConfigs: ActorConfig[], commits: Commit[]): FileChange => {
42+
if (isIgnoredTopLevelFile(lowercaseFilePath)) {
43+
return { impact: 'ignored' };
44+
}
45+
46+
if (lowercaseFilePath.endsWith('changelog.md')) {
47+
return { impact: 'cosmetic', includes: 'all-actors' };
48+
}
49+
50+
const actorFolderInfo = maybeParseActorFolder(lowercaseFilePath);
51+
if (actorFolderInfo.isActorFolder) {
52+
const actorConfigChanged = actorConfigs.find(({ actorName }) => actorName.toLowerCase() === actorFolderInfo.actorName);
53+
// This is some super weird case that happened once in the past but I don't remember the context anymore
54+
if (actorConfigChanged === undefined) {
55+
console.error('SHOULD NEVER HAPPEN: changes was found in an actor folder which no longer exists in the current commit, skipping this file', {
56+
actorName: actorFolderInfo.actorName,
57+
lowercaseFilePath,
58+
});
59+
return { impact: 'ignored' };
60+
}
61+
if (lowercaseFilePath.endsWith('readme.md')) {
62+
return { impact: 'cosmetic', includes: actorConfigChanged };
63+
}
64+
if (lowercaseFilePath.endsWith('.json') && isCosmeticOnlyJsonSchemaChange(commits, lowercaseFilePath)) {
65+
return { impact: 'cosmetic', includes: actorConfigChanged };
66+
}
67+
68+
return { impact: 'functional', includes: actorConfigChanged };
69+
}
70+
71+
// For any other files, we assume they can interact with the code
72+
return { impact: 'functional', includes: 'all-actors' };
73+
}
74+
75+
export const getChangedActors = (
76+
{ filepathsChanged, actorConfigs, isLatest = false, commits }: ShouldBuildAndTestOptions,
77+
): ActorConfig[] => {
78+
// folder -> ActorConfig
79+
const actorsChangedMap = new Map<string, ActorConfig>();
80+
81+
const actorConfigsWithoutStandalone = actorConfigs.filter(({ isStandalone }) => !isStandalone);
82+
83+
const lowercaseFiles = filepathsChanged.map((file) => file.toLowerCase());
84+
85+
for (const lowercaseFilePath of lowercaseFiles) {
86+
const fileChange = classifyFileChange(lowercaseFilePath, actorConfigs, commits);
87+
if (fileChange.impact === 'ignored') {
88+
continue;
89+
}
90+
91+
if (fileChange.impact === 'cosmetic' && !isLatest) {
92+
continue;
93+
}
94+
95+
if (fileChange.includes !== 'all-actors') {
96+
actorsChangedMap.set(fileChange.includes.folder, fileChange.includes);
97+
} else if (fileChange.includes === 'all-actors') {
98+
// Standalone Actors are handled always via specific actors change, not all-actors
99+
for (const actorConfig of actorConfigsWithoutStandalone) {
100+
actorsChangedMap.set(actorConfig.folder, actorConfig);
101+
}
102+
}
103+
}
104+
105+
const actorsChanged = Array.from(actorsChangedMap.values());
106+
107+
// All below here is just for logging
108+
const ignoredFilesChanged = lowercaseFiles.filter((file) => classifyFileChange(file, actorConfigs, commits).impact === 'ignored');
109+
console.error(`[DIFF]: Ignored files (don't trigger test or build): ${ignoredFilesChanged.join(', ')}`);
110+
111+
const cosmeticFilesChanged = lowercaseFiles.filter((file) => classifyFileChange(file, actorConfigs, commits).impact === 'cosmetic');
112+
console.error(`[DIFF]: Cosmetic files (should only trigger release build): ${cosmeticFilesChanged.join(', ')}`);
113+
114+
const functionalFilesChanged = lowercaseFiles.filter((file) => classifyFileChange(file, actorConfigs, commits).impact === 'functional');
115+
console.error(`[DIFF]: Functional files (trigger test & release build): ${functionalFilesChanged.join(', ')}`);
116+
117+
if (actorsChanged.length > 0) {
118+
const miniactors = actorsChanged.filter((config) => !config.isStandalone).map((config) => config.actorName);
119+
const standaloneActors = actorsChanged.filter((config) => config.isStandalone).map((config) => config.actorName);
120+
console.error(`[DIFF]: MiniActors to be built and tested: ${miniactors.join(', ')}`);
121+
console.error(`[DIFF]: Standalone Actors to be built and tested: ${standaloneActors.join(', ')}`);
122+
} else {
123+
console.error(`[DIFF]: No relevant files changed, skipping builds and tests`);
124+
}
125+
126+
return actorsChanged;
127+
};

bin/diff-json-schema.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import type { Commit } from './types.js';
2+
import { spawnCommandInGhWorkspace } from './utils.js';
3+
4+
const COSMETIC_JSON_FIELD_NAMES = new Set([
5+
'title', 'description', 'example', 'enumTitles', 'sectionCaption', 'sectionDescription',
6+
]);
7+
8+
const isPlainObject = (val: unknown): val is Record<string, unknown> =>
9+
typeof val === 'object' && val !== null && !Array.isArray(val);
10+
11+
const isCosmeticObjectChange = (oldVal: unknown, newVal: unknown, currentKey?: string): boolean => {
12+
// If the key itself is cosmetic, any change under it is fine
13+
if (currentKey && COSMETIC_JSON_FIELD_NAMES.has(currentKey)) return true;
14+
if (JSON.stringify(oldVal) === JSON.stringify(newVal)) return true;
15+
if (isPlainObject(oldVal) && isPlainObject(newVal)) {
16+
const allKeys = new Set([...Object.keys(oldVal), ...Object.keys(newVal)]);
17+
return [...allKeys].every((key) => isCosmeticObjectChange(oldVal[key], newVal[key], key));
18+
}
19+
return false;
20+
};
21+
22+
/**
23+
* Returns true if the two JSON strings differ only in cosmetic fields
24+
* (title, description, example, enumTitles, sectionCaption, sectionDescription).
25+
*/
26+
export const isCosmeticOnlyJsonSchemaChange = (commits: Commit[], changedFilepath: string): boolean => {
27+
// TODO: validate this is the right commit range
28+
const oldRef = `${commits[0].sha}~`;
29+
const newRef = commits[commits.length - 1].sha;
30+
let oldJson: unknown;
31+
let newJson: unknown;
32+
try {
33+
const oldContent = spawnCommandInGhWorkspace(`git show ${oldRef}:${changedFilepath}`);
34+
const newContent = spawnCommandInGhWorkspace(`git show ${newRef}:${changedFilepath}`);
35+
36+
oldJson = JSON.parse(oldContent);
37+
newJson = JSON.parse(newContent);
38+
} catch {
39+
console.error(`Failed to get or parse JSON content for ${changedFilepath} at refs ${oldRef} and ${newRef}, maybe it is new file or deleted? Treating it as a non-cosmetic change.`);
40+
return false;
41+
}
42+
return isCosmeticObjectChange(oldJson, newJson);
43+
};

bin/git.ts

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { Commit, Config } from './types.js';
1+
import type { Commit, Config } from './types.js';
22
import { spawnCommandInGhWorkspace } from './utils.js';
33

44
export const GIT_FORMAT_SEPARATOR = '»¦«';
@@ -8,11 +8,13 @@ const GIT_LOG_FORMAT = ['%H', '%aN<%aE>', '%aD', '%s'].join(GIT_FORMAT_SEPARATOR
88
* Gets the list of changed files between the given commits (inclusive).
99
*/
1010
export const getChangedFiles = (commits: Commit[]) => {
11-
const changedFiles = spawnCommandInGhWorkspace(
11+
const changedFilesString = spawnCommandInGhWorkspace(
1212
`git diff --name-only ${commits[0].sha}~..${commits[commits.length - 1].sha}`,
1313
);
1414

15-
return changedFiles.split('\n');
15+
const changedFiles = changedFilesString.split('\n');
16+
console.error(`Changed files (up to 50): ${changedFiles.slice(0, 50).join(', ')}`);
17+
return changedFiles;
1618
};
1719

1820
/**
@@ -29,8 +31,15 @@ export const getCommits = ({ sourceBranch, targetBranch, baseCommit: baseCommitS
2931
const baseCommitIndex = commits.findIndex((commit) => commit.sha === baseCommitSha);
3032

3133
const hasBaseCommit = baseCommitIndex !== -1;
32-
if (hasBaseCommit) return commits.slice(baseCommitIndex + 1);
34+
if (hasBaseCommit) {
35+
const commitsUpToBaseCommit = commits.slice(baseCommitIndex + 1);
36+
console.error(`Found base commit ${baseCommitSha} at index ${baseCommitIndex}, returning ${commitsUpToBaseCommit.length} commits after it`);
37+
console.error(`Commits being returned: ${commitsUpToBaseCommit.map((c) => c.sha).join(', ')}`);
38+
return commitsUpToBaseCommit;
39+
}
3340

41+
console.error(`Base commit ${baseCommitSha} not found in the commit range, returning all ${commits.length} commits`);
42+
console.error(`Commits being returned: ${commits.map((c) => c.sha).join(', ')}`);
3443
return commits;
3544
};
3645

bin/main.ts

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
#!/usr/bin/env node
22

3-
import process from 'process';
3+
import process from 'node:process';
4+
45
import yargs, { type Argv } from 'yargs';
56
import { hideBin } from 'yargs/helpers';
67

7-
import { getRepoActors, getChangedActors, spawnCommandInGhWorkspace, setCwd } from './utils.js';
88
import { runBuilds } from './build.js';
9+
import { getChangedActors } from './diff-changes.js';
910
import { getChangedFiles, getCommits } from './git.js';
1011
import { getPushData } from './github.js';
1112
import { notifyToSlack } from './slack.js';
1213
import { reportTestResults } from './test-report.js';
14+
import { getRepoActors, setCwd,spawnCommandInGhWorkspace } from './utils.js';
1315

1416
/**
1517
* Middlewares to be run before every command execution
@@ -69,7 +71,7 @@ await yargs()
6971
const commits = getCommits(args);
7072
const changedFiles = getChangedFiles(commits);
7173
const actorConfigs = await getRepoActors();
72-
const { actorsChanged } = getChangedActors({ filepathsChanged: changedFiles, actorConfigs, isLatest: false });
74+
const actorsChanged = getChangedActors({ filepathsChanged: changedFiles, actorConfigs, isLatest: false, commits });
7375
console.log(JSON.stringify(actorsChanged));
7476
})
7577
.command(
@@ -93,9 +95,10 @@ await yargs()
9395
const commits = getCommits(args);
9496
const changedFiles = getChangedFiles(commits);
9597
const actorConfigs = await getRepoActors();
96-
const { actorsChanged } = getChangedActors({
98+
const actorsChanged = getChangedActors({
9799
filepathsChanged: changedFiles,
98100
actorConfigs,
101+
commits,
99102
});
100103
// https://github.com/apify-store/google-maps#:actors/lukaskrivka_google-maps-with-contact-details
101104
// git@github.com:apify-store/google-maps#:actors/lukaskrivka_google-maps-with-contact-details
@@ -128,10 +131,11 @@ await yargs()
128131
);
129132
const isLatest = true;
130133
const actorConfigs = await getRepoActors();
131-
const { actorsChanged } = getChangedActors({
134+
const actorsChanged = getChangedActors({
132135
filepathsChanged: changedFiles,
133136
actorConfigs,
134137
isLatest,
138+
commits,
135139
});
136140
const { dryRun, reportSlackChannel, releaseSlackChannel } = args;
137141
const builds = await runBuilds({

bin/utils.ts

Lines changed: 2 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
import fs from 'node:fs/promises';
21
import { spawnSync } from 'node:child_process';
2+
import fs from 'node:fs/promises';
3+
34
import type {
45
ActorConfig,
5-
Commit,
66
GitHubEvent,
77
} from './types.js';
88

@@ -92,120 +92,3 @@ export const getHeadCommitSha = (githubEvent: GitHubEvent) => {
9292
? githubEvent.pull_request.head.sha
9393
: githubEvent.head_commit.id;
9494
};
95-
96-
export interface GetChangedActorsResult {
97-
actorsChanged: ActorConfig[];
98-
codeChanged: boolean;
99-
}
100-
101-
interface ShouldBuildAndTestOptions {
102-
filepathsChanged: string[];
103-
actorConfigs: ActorConfig[];
104-
// Just for logging
105-
isLatest?: boolean;
106-
}
107-
108-
/**
109-
* Also works for folders
110-
*/
111-
const isIgnoredTopLevelFile = (lowercaseFilePath: string) => {
112-
// On top level, we should only have dev-only readme and .actor/ is just for apify push CLI (real Actor configs are in /actors)
113-
const IGNORED_TOP_LEVEL_FILES = ['.vscode/', '.gitignore', 'readme.md', '.husky/', '.eslintrc', '.editorconfig', '.actor/'];
114-
// Strip out deprecated /code and /shared folders, treat them as top-level code
115-
const sanitizedLowercaseFilePath = lowercaseFilePath.replace(/^code\//, '').replace(/^shared\//, '');
116-
117-
return IGNORED_TOP_LEVEL_FILES.some((ignoredFile) => sanitizedLowercaseFilePath.startsWith(ignoredFile));
118-
};
119-
120-
const isLatestBuildOnlyFile = (lowercaseFilePath: string) => {
121-
if (lowercaseFilePath.endsWith('changelog.md')) {
122-
return true;
123-
}
124-
125-
// Either in /actors or /standalone-actors, we need to rebuild readme but we don't rebuild top-level dev-only readme
126-
if ((lowercaseFilePath.startsWith('actors/') || lowercaseFilePath.startsWith('standalone-actors/')) && lowercaseFilePath.endsWith('readme.md')) {
127-
return true;
128-
}
129-
130-
return false;
131-
};
132-
133-
/**
134-
* Latest and devel are the same except that for latest we also rebuild with README and CHANGELOG files
135-
*/
136-
export const getChangedActors = (
137-
{ filepathsChanged, actorConfigs, isLatest = false }: ShouldBuildAndTestOptions,
138-
): GetChangedActorsResult => {
139-
let codeChanged = false;
140-
// folder -> ActorConfig
141-
const actorsChangedMap = new Map<string, ActorConfig>();
142-
143-
const actorConfigsWithoutStandalone = actorConfigs.filter(({ isStandalone }) => !isStandalone);
144-
145-
const lowercaseFiles = filepathsChanged.map((file) => file.toLowerCase());
146-
147-
for (const lowercaseFilePath of lowercaseFiles) {
148-
if (isIgnoredTopLevelFile(lowercaseFilePath)) {
149-
continue;
150-
}
151-
// First we check for specific actors that have configs in /actors or standalone actors in /standalone-actors
152-
// This matches both actors/username_actorName and standalone-actors/username_actorName
153-
const changedActorConfigMatch = lowercaseFilePath.match(/^(?:standalone-)?actors\/([^/]+)\/.+/);
154-
if (changedActorConfigMatch) {
155-
const sanitizedActorName = changedActorConfigMatch[1].replace('_', '/');
156-
const actorConfigChanged = actorConfigs.find(({ actorName }) => actorName.toLowerCase() === sanitizedActorName);
157-
if (actorConfigChanged === undefined) {
158-
console.warn('changes was found in an actor folder which no longer exists in the current commit', {
159-
actorName: sanitizedActorName,
160-
actorFolderName: changedActorConfigMatch[1],
161-
});
162-
continue;
163-
}
164-
165-
console.error(`actorConfigChanged ${actorConfigChanged.actorName}: sanitizedActorName ${sanitizedActorName} ${lowercaseFilePath} `);
166-
// These can be nested at various folders inside the actor folder
167-
if (isLatest || !isLatestBuildOnlyFile(lowercaseFilePath)) {
168-
// We assume other files will are either actor.json or input_schema.json and those needs to be tested
169-
// TODO: Check what changed in schema, we don't need to test description changes
170-
actorsChangedMap.set(actorConfigChanged.folder, actorConfigChanged);
171-
}
172-
continue;
173-
}
174-
175-
// We check top level files (formerly in /code and /shared folders) that are shared among all non-standalone Actors
176-
// Standalone actors are always handled separately by name via changedActorConfigMatch
177-
if (isLatest || !isLatestBuildOnlyFile(lowercaseFilePath)) {
178-
codeChanged = !isLatest; // NOTE: code is changed only in PR
179-
for (const actorConfig of actorConfigsWithoutStandalone) {
180-
actorsChangedMap.set(actorConfig.folder, actorConfig);
181-
}
182-
}
183-
}
184-
185-
const actorsChanged = Array.from(actorsChangedMap.values());
186-
187-
// All below here is just for logging
188-
const ignoredFilesChanged = lowercaseFiles.filter((file) => isIgnoredTopLevelFile(file));
189-
console.error(`[DIFF]: Top level files changed that we ignore (don't trigger test or build): ${ignoredFilesChanged.join(', ')}`);
190-
191-
const onlyLatestFilesChanged = lowercaseFiles.filter((file) => isLatestBuildOnlyFile(file));
192-
console.error(`[DIFF]: Files changed that only trigger latest build: ${onlyLatestFilesChanged.join(', ')}`);
193-
194-
if (!isLatest && codeChanged) {
195-
console.error(`[DIFF]: All non-standalone Actors need to be built and tested (changes in top-level code)`);
196-
}
197-
198-
if (actorsChanged.length > 0) {
199-
const miniactors = actorsChanged.filter((config) => !config.isStandalone).map((config) => config.actorName);
200-
const standaloneActors = actorsChanged.filter((config) => config.isStandalone).map((config) => config.actorName);
201-
console.error(`[DIFF]: MiniActors to be built and tested: ${miniactors.join(', ')}`);
202-
console.error(`[DIFF]: Standalone Actors to be built and tested: ${standaloneActors.join(', ')}`);
203-
} else {
204-
console.error(`[DIFF]: No relevant files changed, skipping builds and tests`);
205-
}
206-
207-
return {
208-
actorsChanged,
209-
codeChanged,
210-
};
211-
};

0 commit comments

Comments
 (0)