Skip to content

Commit 5d1fe06

Browse files
feat: add --user-agent flag for caller-identified telemetry (#1102)
## Summary - Adds an opt-in caller identifier (`--user-agent` flag / `APIFY_CLI_USER_AGENT` env var) so skills, plugins, and integrations that wrap the CLI can be distinguished from direct human usage in telemetry. - Flag wins over env. Value is sanitized (ASCII control chars stripped, capped at 256 chars) and attached to telemetry as `userAgent`. - Scoped to the public `apify` entrypoint only; the `actor` entrypoint (which runs inside Actor Docker images) rejects it. ## Test plan - [x] `yarn lint` - [x] `yarn build` - [x] `yarn test:local` — 21 new unit tests for `resolveUserAgentForTelemetry`, flag registration, and end-to-end telemetry wiring all pass. (Two pre-existing Python fixture failures are unrelated.) - [x] Reviewer: confirm `properties.userAgent` coexistence with `context.userAgent` in Segment payloads is acceptable (different objects, distinct meaning). Closes: #1100
1 parent 3111d16 commit 5d1fe06

4 files changed

Lines changed: 308 additions & 3 deletions

File tree

src/lib/command-framework/apify-command.ts

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/* eslint-disable max-classes-per-file */
22

3+
import process from 'node:process';
34
import type { parseArgs, ParseArgsConfig, ParseArgsOptionDescriptor } from 'node:util';
45

56
import type { Awaitable } from '@crawlee/types';
@@ -144,6 +145,39 @@ const jsonFlagDefinition = {
144145
multiple: false,
145146
} as const satisfies ParseArgsOptionDescriptor;
146147

148+
const userAgentFlagDefinition = {
149+
type: 'string',
150+
multiple: false,
151+
} as const satisfies ParseArgsOptionDescriptor;
152+
153+
export const USER_AGENT_FLAG_NAME = 'user-agent';
154+
export const USER_AGENT_ENV_VAR = 'APIFY_CLI_USER_AGENT';
155+
export const USER_AGENT_MAX_LENGTH = 256;
156+
// Scope the caller-id flag to the public `apify` entrypoint. The `actor` entrypoint
157+
// runs inside Actor Docker images where caller-identification is not meaningful.
158+
const USER_AGENT_SUPPORTED_ENTRYPOINTS = new Set(['apify']);
159+
160+
function sanitizeUserAgentValue(value: string | undefined): string | undefined {
161+
if (typeof value !== 'string') {
162+
return undefined;
163+
}
164+
// Strip ASCII control chars (0x00-0x1F and 0x7F) to keep telemetry payloads clean.
165+
// eslint-disable-next-line no-control-regex
166+
const stripped = value.replace(/[\u0000-\u001f\u007f]/g, '');
167+
const trimmed = stripped.trim();
168+
if (!trimmed) {
169+
return undefined;
170+
}
171+
return trimmed.length > USER_AGENT_MAX_LENGTH ? trimmed.slice(0, USER_AGENT_MAX_LENGTH) : trimmed;
172+
}
173+
174+
export function resolveUserAgentForTelemetry(
175+
flagValue: string | undefined,
176+
envValue: string | undefined,
177+
): string | undefined {
178+
return sanitizeUserAgentValue(flagValue) ?? sanitizeUserAgentValue(envValue);
179+
}
180+
147181
export const commandRegistry = new Map<string, typeof BuiltApifyCommand>();
148182

149183
type ParseResult = ReturnType<typeof parseArgs<ReturnType<ApifyCommand['_buildParseArgsOption']>>>;
@@ -281,6 +315,17 @@ export abstract class ApifyCommand<T extends typeof BuiltApifyCommand = typeof B
281315
this.ctor.printHelp();
282316
}
283317

318+
if (USER_AGENT_SUPPORTED_ENTRYPOINTS.has(this.entrypoint)) {
319+
const rawUserAgentFlag = rawFlags[USER_AGENT_FLAG_NAME];
320+
const resolvedUserAgent = resolveUserAgentForTelemetry(
321+
typeof rawUserAgentFlag === 'string' ? rawUserAgentFlag : undefined,
322+
process.env[USER_AGENT_ENV_VAR],
323+
);
324+
if (resolvedUserAgent) {
325+
this.telemetryData.userAgent = resolvedUserAgent;
326+
}
327+
}
328+
284329
// Cheating a bit here with the types, but its fine
285330

286331
this.args = {} as any;
@@ -675,14 +720,20 @@ export abstract class ApifyCommand<T extends typeof BuiltApifyCommand = typeof B
675720
}
676721

677722
protected _buildParseArgsOption() {
723+
const baseOptions: Record<string, ParseArgsOptionDescriptor> = {
724+
help: helpFlagDefinition,
725+
};
726+
727+
if (USER_AGENT_SUPPORTED_ENTRYPOINTS.has(this.entrypoint)) {
728+
baseOptions[USER_AGENT_FLAG_NAME] = userAgentFlagDefinition;
729+
}
730+
678731
const object = {
679732
allowNegative: true,
680733
allowPositionals: true,
681734
strict: true,
682735
tokens: true,
683-
options: {
684-
help: helpFlagDefinition,
685-
} as {
736+
options: baseOptions as {
686737
help: typeof helpFlagDefinition;
687738
json: typeof jsonFlagDefinition;
688739
[k: string]: ParseArgsOptionDescriptor;

src/lib/hooks/telemetry/trackEvent.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ export interface TrackEventMap {
4242
exitCode?: number;
4343
durationMs?: number;
4444
aiAgent?: string;
45+
userAgent?: string;
4546
isCi?: boolean;
4647
ciProvider?: string;
4748
isInteractive?: boolean;
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import { runCli } from '../__helpers__/run-cli.js';
2+
3+
describe('[e2e] --user-agent flag', () => {
4+
it('accepts --user-agent on any command without erroring', async () => {
5+
const result = await runCli('apify', ['help', '--user-agent', 'test-caller/1.0.0']);
6+
expect(result.exitCode, `stderr: ${result.stderr}`).toBe(0);
7+
});
8+
9+
it('accepts APIFY_CLI_USER_AGENT env var on any command without erroring', async () => {
10+
const result = await runCli('apify', ['help'], {
11+
env: { APIFY_CLI_USER_AGENT: 'test-caller/env-1.0.0' },
12+
});
13+
expect(result.exitCode, `stderr: ${result.stderr}`).toBe(0);
14+
});
15+
16+
it('rejects --user-agent without a value (string flag)', async () => {
17+
// Passing --user-agent with no value should surface a parseArgs error,
18+
// since the flag is declared as a string type.
19+
const result = await runCli('apify', ['help', '--user-agent']);
20+
expect(result.exitCode).not.toBe(0);
21+
});
22+
23+
it('rejects --user-agent under the actor entrypoint', async () => {
24+
// The flag is scoped to the public apify entrypoint only — the actor
25+
// entrypoint runs inside Actor Docker images where caller-id is meaningless.
26+
const result = await runCli('actor', ['help', '--user-agent', 'foo']);
27+
expect(result.exitCode).not.toBe(0);
28+
});
29+
});
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
/* eslint-disable max-classes-per-file */
2+
import { parseArgs } from 'node:util';
3+
4+
import {
5+
ApifyCommand,
6+
type BuiltApifyCommand as _BuiltApifyCommand,
7+
resolveUserAgentForTelemetry,
8+
USER_AGENT_ENV_VAR,
9+
USER_AGENT_FLAG_NAME,
10+
USER_AGENT_MAX_LENGTH,
11+
} from '../../../../src/lib/command-framework/apify-command.js';
12+
import { Flags } from '../../../../src/lib/command-framework/flags.js';
13+
14+
const BuiltApifyCommand = ApifyCommand as typeof _BuiltApifyCommand;
15+
16+
class NoOpCommand extends BuiltApifyCommand {
17+
static override name = 'noop' as const;
18+
static override description = 'Does nothing.';
19+
async run() {
20+
// no-op
21+
}
22+
}
23+
24+
class CommandWithFlags extends BuiltApifyCommand {
25+
static override name = 'with-flags' as const;
26+
static override description = 'Has flags.';
27+
static override flags = {
28+
foo: Flags.string({ description: 'foo flag' }),
29+
};
30+
31+
async run() {
32+
// no-op
33+
}
34+
}
35+
36+
describe('resolveUserAgentForTelemetry()', () => {
37+
test('flag value wins over env value', () => {
38+
expect(resolveUserAgentForTelemetry('flag', 'env')).toBe('flag');
39+
});
40+
41+
test('env value used as fallback', () => {
42+
expect(resolveUserAgentForTelemetry(undefined, 'env')).toBe('env');
43+
});
44+
45+
test('undefined when neither set', () => {
46+
expect(resolveUserAgentForTelemetry(undefined, undefined)).toBeUndefined();
47+
});
48+
49+
test('empty/whitespace flag falls through to env', () => {
50+
expect(resolveUserAgentForTelemetry(' ', 'env')).toBe('env');
51+
expect(resolveUserAgentForTelemetry('', 'env')).toBe('env');
52+
});
53+
54+
test('empty/whitespace env returns undefined when flag also empty', () => {
55+
expect(resolveUserAgentForTelemetry('', '')).toBeUndefined();
56+
expect(resolveUserAgentForTelemetry(undefined, ' ')).toBeUndefined();
57+
});
58+
59+
test('trims surrounding whitespace on returned value', () => {
60+
expect(resolveUserAgentForTelemetry(' foo ', undefined)).toBe('foo');
61+
expect(resolveUserAgentForTelemetry(undefined, ' bar ')).toBe('bar');
62+
});
63+
64+
test('strips ASCII control characters silently', () => {
65+
expect(resolveUserAgentForTelemetry('plugin\u0000/1.0\u0007', undefined)).toBe('plugin/1.0');
66+
expect(resolveUserAgentForTelemetry('line\nbreak\rthing\ttab', undefined)).toBe('linebreakthingtab');
67+
expect(resolveUserAgentForTelemetry('\u007f\u0001only-control\u001f', undefined)).toBe('only-control');
68+
});
69+
70+
test('returns undefined when input is only control characters', () => {
71+
expect(resolveUserAgentForTelemetry('\u0000\u0001\u001f', undefined)).toBeUndefined();
72+
});
73+
74+
test('caps length at USER_AGENT_MAX_LENGTH', () => {
75+
const long = 'x'.repeat(USER_AGENT_MAX_LENGTH + 100);
76+
const resolved = resolveUserAgentForTelemetry(long, undefined);
77+
expect(resolved).toBeDefined();
78+
expect(resolved!.length).toBe(USER_AGENT_MAX_LENGTH);
79+
});
80+
81+
test('does not touch values at or below cap', () => {
82+
const exact = 'y'.repeat(USER_AGENT_MAX_LENGTH);
83+
expect(resolveUserAgentForTelemetry(exact, undefined)).toBe(exact);
84+
});
85+
});
86+
87+
describe('--user-agent flag registration', () => {
88+
test('is parseable on commands with no declared flags', () => {
89+
const instance = new NoOpCommand('apify', NoOpCommand.name, NoOpCommand.name);
90+
// eslint-disable-next-line dot-notation
91+
const parserOptions = instance['_buildParseArgsOption']();
92+
93+
expect(parserOptions.options).toHaveProperty(USER_AGENT_FLAG_NAME);
94+
95+
const parsed = parseArgs({
96+
...parserOptions,
97+
args: ['--user-agent', 'apify-agent-skills/ultimate-scraper-1.3.0'],
98+
});
99+
100+
expect(parsed.values[USER_AGENT_FLAG_NAME]).toBe('apify-agent-skills/ultimate-scraper-1.3.0');
101+
});
102+
103+
test('is parseable alongside command-specific flags', () => {
104+
const instance = new CommandWithFlags('apify', CommandWithFlags.name, CommandWithFlags.name);
105+
// eslint-disable-next-line dot-notation
106+
const parserOptions = instance['_buildParseArgsOption']();
107+
108+
const parsed = parseArgs({
109+
...parserOptions,
110+
args: ['--foo', 'hello', '--user-agent', 'my-plugin/1.0.0'],
111+
});
112+
113+
// command-declared string flags use multiple:true, so node returns an array.
114+
expect(parsed.values.foo).toEqual(['hello']);
115+
expect(parsed.values[USER_AGENT_FLAG_NAME]).toBe('my-plugin/1.0.0');
116+
});
117+
118+
test('is optional — parser does not fail when omitted', () => {
119+
const instance = new NoOpCommand('apify', NoOpCommand.name, NoOpCommand.name);
120+
// eslint-disable-next-line dot-notation
121+
const parserOptions = instance['_buildParseArgsOption']();
122+
123+
const parsed = parseArgs({
124+
...parserOptions,
125+
args: [],
126+
});
127+
128+
expect(parsed.values[USER_AGENT_FLAG_NAME]).toBeUndefined();
129+
});
130+
131+
test('treats --user-agent after `--` separator as a positional, not a flag', () => {
132+
const instance = new NoOpCommand('apify', NoOpCommand.name, NoOpCommand.name);
133+
// eslint-disable-next-line dot-notation
134+
const parserOptions = instance['_buildParseArgsOption']();
135+
136+
const parsed = parseArgs({
137+
...parserOptions,
138+
args: ['--', '--user-agent', 'forwarded-value'],
139+
});
140+
141+
expect(parsed.values[USER_AGENT_FLAG_NAME]).toBeUndefined();
142+
expect(parsed.positionals).toEqual(['--user-agent', 'forwarded-value']);
143+
});
144+
145+
test('is NOT registered under non-apify entrypoints (e.g. actor)', () => {
146+
const instance = new NoOpCommand('actor', NoOpCommand.name, NoOpCommand.name);
147+
// eslint-disable-next-line dot-notation
148+
const parserOptions = instance['_buildParseArgsOption']();
149+
150+
expect(parserOptions.options).not.toHaveProperty(USER_AGENT_FLAG_NAME);
151+
152+
// parseArgs with strict:true should now reject --user-agent on this entrypoint.
153+
expect(() =>
154+
parseArgs({
155+
...parserOptions,
156+
args: ['--user-agent', 'foo'],
157+
}),
158+
).toThrow();
159+
});
160+
});
161+
162+
describe('--user-agent end-to-end telemetry wiring', () => {
163+
const originalEnv = process.env[USER_AGENT_ENV_VAR];
164+
165+
afterEach(() => {
166+
if (originalEnv === undefined) {
167+
delete process.env[USER_AGENT_ENV_VAR];
168+
} else {
169+
process.env[USER_AGENT_ENV_VAR] = originalEnv;
170+
}
171+
});
172+
173+
async function runWith(
174+
args: string[],
175+
entrypoint = 'apify',
176+
): Promise<_BuiltApifyCommand & { telemetryData: Record<string, unknown> }> {
177+
const instance = new NoOpCommand(entrypoint, NoOpCommand.name, NoOpCommand.name);
178+
// eslint-disable-next-line dot-notation
179+
instance['skipTelemetry'] = true;
180+
// eslint-disable-next-line dot-notation
181+
const parserOptions = instance['_buildParseArgsOption']();
182+
const parsed = parseArgs({ ...parserOptions, args });
183+
// eslint-disable-next-line dot-notation
184+
await instance['_run'](parsed);
185+
return instance as unknown as _BuiltApifyCommand & { telemetryData: Record<string, unknown> };
186+
}
187+
188+
test('populates telemetryData.userAgent when flag passed', async () => {
189+
delete process.env[USER_AGENT_ENV_VAR];
190+
const instance = await runWith(['--user-agent', 'skills/scraper-1.0']);
191+
expect(instance.telemetryData.userAgent).toBe('skills/scraper-1.0');
192+
});
193+
194+
test('falls back to APIFY_CLI_USER_AGENT env var', async () => {
195+
process.env[USER_AGENT_ENV_VAR] = 'env-caller/2.0';
196+
const instance = await runWith([]);
197+
expect(instance.telemetryData.userAgent).toBe('env-caller/2.0');
198+
});
199+
200+
test('flag overrides env var', async () => {
201+
process.env[USER_AGENT_ENV_VAR] = 'env-caller/2.0';
202+
const instance = await runWith(['--user-agent', 'flag-caller/3.0']);
203+
expect(instance.telemetryData.userAgent).toBe('flag-caller/3.0');
204+
});
205+
206+
test('leaves userAgent unset when neither flag nor env var provided', async () => {
207+
delete process.env[USER_AGENT_ENV_VAR];
208+
const instance = await runWith([]);
209+
expect(instance.telemetryData.userAgent).toBeUndefined();
210+
});
211+
212+
test('sanitizes oversized flag input before emitting', async () => {
213+
delete process.env[USER_AGENT_ENV_VAR];
214+
const bigValue = 'a'.repeat(USER_AGENT_MAX_LENGTH + 50);
215+
const instance = await runWith(['--user-agent', bigValue]);
216+
expect((instance.telemetryData.userAgent as string).length).toBe(USER_AGENT_MAX_LENGTH);
217+
});
218+
219+
test('ignores env var entirely when running under actor entrypoint', async () => {
220+
process.env[USER_AGENT_ENV_VAR] = 'env-caller/2.0';
221+
const instance = await runWith([], 'actor');
222+
expect(instance.telemetryData.userAgent).toBeUndefined();
223+
});
224+
});

0 commit comments

Comments
 (0)