Skip to content

Commit b1b8d6d

Browse files
authored
feat: structured logging + opt-in debug mode (Phase 7) (#22)
Closes #13. Part of plan #15. ## Motivation Phase 4's dogfood failures took three iterations and a full revert to diagnose because the ephemeral EC2 instance terminates before anyone can look at cloud-init logs. Structured action-side logs are the cheap half of the diagnostic story (see #20 for the AWS-side upload- to-S3 trap that completes it). ## New module: src/log.js JSON-shaped events via core.info / core.warning / core.error: log.info('run_instance', { ami_id: 'ami-0x', instance_type: 't3.medium', subnet_id: 'subnet-aaa', sg_id: 'sg-bbb', label: 'abcde', }); // -> ::notice ::{"step":"run_instance","mode":"start",...} Features: - Lazy require of ./config so validation-phase log calls don't crash when config's try/catch hasn't finished running new Config(). - sanitize() redacts values under a small secret-key allowlist before emitting (githubToken, github-token, token, aws-access-key-id, aws-secret-access-key, GPG_PRIVATE_KEY, password). - log.debug(...) is a no-op unless config.input.debug === 'true'. ## New input: debug action.yml declares 'debug: false' as default. Consumers opt in by setting debug: 'true' in their workflow; the action then emits the extra diagnostic traffic (full DescribeImages response, wait_for_runner poll details, input echo) without changing default output. src/config.js reads the input with a 'false' fallback so old callers that don't set it continue to work. ## Call sites instrumented src/aws.js: - wait_for_instance (start + end + error) - describe_images (request + selection + full-list under debug) - run_instance (request + result + error) - associate_address (attempt + warn on failure) - terminate_instance (start + end + error) src/gh.js: - gh_registration_token (attempt + success + error) - remove_runner (attempt + success + skip + error) - wait_for_runner (final outcome + per-poll details under debug) src/index.js: - start/stop bracketed by core.startGroup/endGroup so the Actions log collapses cleanly. - Top-level 'fatal' log on uncaught failure. - start_inputs / stop_inputs logged under debug (sanitized). ## Tests tests/log.test.js — 7 new cases: - info / warn / error emit through the right core.* channels with step + mode + fields baked into the JSON payload. - debug emits nothing when config.input.debug !== 'true'. - debug emits when config.input.debug === 'true'. - sanitize redacts recursively. - Payload-level redaction works (info with a {githubToken: …} field). tests/config.test.js — 2 new cases for the debug input (default + explicit override). Total: 23 -> 30 tests. ## Unblocks Phase 4.b (non-root retry, #20) now has visibility into what the bootstrap is doing. Combined with the S3-upload trap on the AWS side (also scoped in #20), any future bootstrap regression is diagnosable from outside the instance. Signed-off-by: yuriyryabikov <22548029+kurok@users.noreply.github.com>
1 parent 249efbd commit b1b8d6d

9 files changed

Lines changed: 431 additions & 18 deletions

File tree

action.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,15 @@ inputs:
7070
IAM Role Name to attach to the created EC2 instance.
7171
This requires additional permissions on the AWS role used to launch instances.
7272
required: false
73+
debug:
74+
description: >-
75+
When 'true', the action emits extra diagnostic output to the
76+
Actions run log: input parameters (secrets redacted), AWS SDK
77+
response metadata, runner-registration poll details. Leave at
78+
'false' for normal operation. Set 'true' when troubleshooting
79+
bootstrap failures.
80+
required: false
81+
default: 'false'
7382
aws-resource-tags:
7483
description: >-
7584
Tags to attach to the launched EC2 instance and volume.

dist/index.js

Lines changed: 165 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -87854,6 +87854,7 @@ const {
8785487854
} = __nccwpck_require__(5193);
8785587855
const core = __nccwpck_require__(7484);
8785687856
const config = __nccwpck_require__(1283);
87857+
const log = __nccwpck_require__(7223);
8785787858
const { sortByCreationDate } = __nccwpck_require__(5804);
8785887859

8785987860
// EC2Client reads region + credentials from the environment (set by
@@ -87865,13 +87866,17 @@ function ec2Client() {
8786587866
}
8786687867

8786787868
async function waitForInstanceRunning(ec2InstanceId) {
87869+
const start = Date.now();
87870+
log.info('wait_for_instance', { instance_id: ec2InstanceId });
8786887871
try {
8786987872
await waitUntilInstanceRunning(
8787087873
{ client: ec2Client(), maxWaitTime: 300 },
8787187874
{ InstanceIds: [ec2InstanceId] },
8787287875
);
87876+
log.info('wait_for_instance', { instance_id: ec2InstanceId, elapsed_ms: Date.now() - start });
8787387877
core.info(`AWS EC2 instance ${ec2InstanceId} is up and running`);
8787487878
} catch (error) {
87879+
log.error('wait_for_instance', { instance_id: ec2InstanceId, error: error.name, message: error.message });
8787587880
core.error(`AWS EC2 instance ${ec2InstanceId} initialization error`);
8787687881
throw error;
8787787882
}
@@ -87892,12 +87897,17 @@ async function resolveImageId(client) {
8789287897
amiParams.Owners = [config.input.ec2ImageOwner];
8789387898
}
8789487899

87900+
log.info('describe_images', { owner: config.input.ec2ImageOwner || null, filters: config.input.ec2ImageFilters });
8789587901
const result = await client.send(new DescribeImagesCommand(amiParams));
8789687902
if (!result.Images || result.Images.length === 0) {
87903+
log.error('describe_images', { match_count: 0 });
8789787904
throw new Error('Unable to find AMI using passed filter');
8789887905
}
8789987906
sortByCreationDate(result);
87900-
return result.Images[0].ImageId;
87907+
const picked = result.Images[0].ImageId;
87908+
log.info('describe_images', { match_count: result.Images.length, selected_ami: picked });
87909+
log.debug('describe_images_all', { images: result.Images.map(i => ({ id: i.ImageId, name: i.Name, created: i.CreationDate })) });
87910+
return picked;
8790187911
}
8790287912

8790387913
async function startEc2Instance(label, githubRegistrationToken) {
@@ -87934,11 +87944,22 @@ async function startEc2Instance(label, githubRegistrationToken) {
8793487944
};
8793587945

8793687946
let ec2InstanceId;
87947+
const runStart = Date.now();
87948+
log.info('run_instance', {
87949+
ami_id: config.input.ec2ImageId,
87950+
instance_type: config.input.ec2InstanceType,
87951+
subnet_id: config.input.subnetId,
87952+
sg_id: config.input.securityGroupId,
87953+
iam_role: config.input.iamRoleName || null,
87954+
label,
87955+
});
8793787956
try {
8793887957
const result = await client.send(new RunInstancesCommand(params));
8793987958
ec2InstanceId = result.Instances[0].InstanceId;
87959+
log.info('run_instance', { instance_id: ec2InstanceId, elapsed_ms: Date.now() - runStart });
8794087960
core.info(`AWS EC2 instance ${ec2InstanceId} is started`);
8794187961
} catch (error) {
87962+
log.error('run_instance', { error: error.name, message: error.message });
8794287963
core.error('AWS EC2 instance starting error');
8794387964
throw error;
8794487965
}
@@ -87947,11 +87968,13 @@ async function startEc2Instance(label, githubRegistrationToken) {
8794787968
await waitForInstanceRunning(ec2InstanceId);
8794887969

8794987970
try {
87971+
log.info('associate_address', { allocation_id: config.input.eipAllocationId, instance_id: ec2InstanceId });
8795087972
await client.send(new AssociateAddressCommand({
8795187973
AllocationId: config.input.eipAllocationId,
8795287974
InstanceId: ec2InstanceId,
8795387975
}));
8795487976
} catch (error) {
87977+
log.warn('associate_address', { allocation_id: config.input.eipAllocationId, instance_id: ec2InstanceId, error: error.name, message: error.message });
8795587978
core.warning(`Elastic IP association error, trying to proceed w/o EIP: ${error.message}`);
8795687979
}
8795787980
}
@@ -87962,12 +87985,16 @@ async function startEc2Instance(label, githubRegistrationToken) {
8796287985
async function terminateEc2Instance() {
8796387986
const client = ec2Client();
8796487987

87988+
const start = Date.now();
87989+
log.info('terminate_instance', { instance_id: config.input.ec2InstanceId });
8796587990
try {
8796687991
await client.send(new TerminateInstancesCommand({
8796787992
InstanceIds: [config.input.ec2InstanceId],
8796887993
}));
87994+
log.info('terminate_instance', { instance_id: config.input.ec2InstanceId, elapsed_ms: Date.now() - start });
8796987995
core.info(`AWS EC2 instance ${config.input.ec2InstanceId} is terminated`);
8797087996
} catch (error) {
87997+
log.error('terminate_instance', { instance_id: config.input.ec2InstanceId, error: error.name, message: error.message });
8797187998
core.error(`AWS EC2 instance ${config.input.ec2InstanceId} termination error`);
8797287999
throw error;
8797388000
}
@@ -88003,6 +88030,7 @@ class Config {
8800388030
label: core.getInput('label'),
8800488031
ec2InstanceId: core.getInput('ec2-instance-id'),
8800588032
iamRoleName: core.getInput('iam-role-name'),
88033+
debug: core.getInput('debug') || 'false',
8800688034
};
8800788035

8800888036
const tags = JSON.parse(core.getInput('aws-resource-tags'));
@@ -88069,6 +88097,7 @@ const core = __nccwpck_require__(7484);
8806988097
const github = __nccwpck_require__(3228);
8807088098
const _ = __nccwpck_require__(9975);
8807188099
const config = __nccwpck_require__(1283);
88100+
const log = __nccwpck_require__(7223);
8807288101

8807388102
// use the unique label to find the runner
8807488103
// as we don't have the runner's id, it's not possible to get it in any other way
@@ -88087,12 +88116,16 @@ async function getRunner(label) {
8808788116
// get GitHub Registration Token for registering a self-hosted runner
8808888117
async function getRegistrationToken() {
8808988118
const octokit = github.getOctokit(config.input.githubToken);
88119+
const start = Date.now();
88120+
log.info('gh_registration_token', { ...config.githubContext });
8809088121

8809188122
try {
8809288123
const response = await octokit.request('POST /repos/{owner}/{repo}/actions/runners/registration-token', config.githubContext);
88124+
log.info('gh_registration_token', { ...config.githubContext, elapsed_ms: Date.now() - start });
8809388125
core.info('GitHub Registration Token is received');
8809488126
return response.data.token;
8809588127
} catch (error) {
88128+
log.error('gh_registration_token', { error: error.name, message: error.message, status: error.status });
8809688129
core.error('GitHub Registration Token receiving error');
8809788130
throw error;
8809888131
}
@@ -88104,15 +88137,20 @@ async function removeRunner() {
8810488137

8810588138
// skip the runner removal process if the runner is not found
8810688139
if (!runner) {
88140+
log.info('remove_runner', { label: config.input.label, skipped: true, reason: 'not_found' });
8810788141
core.info(`GitHub self-hosted runner with label ${config.input.label} is not found, so the removal is skipped`);
8810888142
return;
8810988143
}
8811088144

88145+
const start = Date.now();
88146+
log.info('remove_runner', { runner_id: runner.id, label: config.input.label });
8811188147
try {
8811288148
await octokit.request('DELETE /repos/{owner}/{repo}/actions/runners/{runner_id}', _.merge(config.githubContext, { runner_id: runner.id }));
88149+
log.info('remove_runner', { runner_id: runner.id, label: config.input.label, elapsed_ms: Date.now() - start });
8811388150
core.info(`GitHub self-hosted runner ${runner.name} is removed`);
8811488151
return;
8811588152
} catch (error) {
88153+
log.error('remove_runner', { runner_id: runner.id, label: config.input.label, error: error.name, message: error.message });
8811688154
core.error('GitHub self-hosted runner removal error');
8811788155
throw error;
8811888156
}
@@ -88131,14 +88169,17 @@ async function waitForRunnerRegistered(label) {
8813188169
return new Promise((resolve, reject) => {
8813288170
const interval = setInterval(async () => {
8813388171
const runner = await getRunner(label);
88172+
log.debug('wait_for_runner_poll', { label, elapsed_s: waitSeconds, found: !!runner, status: runner ? runner.status : null });
8813488173

8813588174
if (waitSeconds > timeoutMinutes * 60) {
88175+
log.error('wait_for_runner', { label, timeout_minutes: timeoutMinutes });
8813688176
core.error('GitHub self-hosted runner registration error');
8813788177
clearInterval(interval);
8813888178
reject(`A timeout of ${timeoutMinutes} minutes is exceeded. Your AWS EC2 instance was not able to register itself in GitHub as a new self-hosted runner.`);
8813988179
}
8814088180

8814188181
if (runner && runner.status === 'online') {
88182+
log.info('wait_for_runner', { label, runner_id: runner.id, elapsed_s: waitSeconds });
8814288183
core.info(`GitHub self-hosted runner ${runner.name} is registered and ready to use`);
8814388184
clearInterval(interval);
8814488185
resolve();
@@ -88157,6 +88198,105 @@ module.exports = {
8815788198
};
8815888199

8815988200

88201+
/***/ }),
88202+
88203+
/***/ 7223:
88204+
/***/ ((module, __unused_webpack_exports, __nccwpck_require__) => {
88205+
88206+
const core = __nccwpck_require__(7484);
88207+
88208+
// Structured logger. Every notable lifecycle event emits a JSON-shaped
88209+
// line via core.info so the Actions run summary can be scraped or
88210+
// eyeballed without parsing free-form text. When config.input.debug is
88211+
// true the debug() helper also emits, which gives consumers a way to
88212+
// get verbose diagnostics without changing default output.
88213+
//
88214+
// Gotchas:
88215+
// - We defer `require('./config')` until first use because log.js is
88216+
// loaded transitively from src/index.js before config validation
88217+
// completes; importing at top-level would short-circuit on any
88218+
// config error.
88219+
// - sanitize() redacts values under known secret keys. Always pass
88220+
// raw fields through sanitize() before logging an object that may
88221+
// contain user input (tokens, credentials, etc.).
88222+
88223+
const SECRET_KEYS = new Set([
88224+
'githubToken',
88225+
'github-token',
88226+
'token',
88227+
'aws-access-key-id',
88228+
'aws-secret-access-key',
88229+
'GPG_PRIVATE_KEY',
88230+
'password',
88231+
]);
88232+
88233+
function sanitize(fields) {
88234+
if (!fields || typeof fields !== 'object') return fields;
88235+
const out = Array.isArray(fields) ? [] : {};
88236+
for (const [k, v] of Object.entries(fields)) {
88237+
if (SECRET_KEYS.has(k)) {
88238+
out[k] = '***';
88239+
} else if (v && typeof v === 'object') {
88240+
out[k] = sanitize(v);
88241+
} else {
88242+
out[k] = v;
88243+
}
88244+
}
88245+
return out;
88246+
}
88247+
88248+
function emit(level, step, fields) {
88249+
const payload = {
88250+
step,
88251+
mode: (() => {
88252+
// best-effort mode lookup; log.js may be required before Config
88253+
// finishes its constructor, in which case config.input is undefined.
88254+
try {
88255+
const config = __nccwpck_require__(1283);
88256+
return config && config.input ? config.input.mode : undefined;
88257+
} catch (_e) {
88258+
return undefined;
88259+
}
88260+
})(),
88261+
...(fields ? sanitize(fields) : {}),
88262+
};
88263+
const line = JSON.stringify(payload);
88264+
switch (level) {
88265+
case 'warning':
88266+
core.warning(line);
88267+
break;
88268+
case 'error':
88269+
core.error(line);
88270+
break;
88271+
default:
88272+
core.info(line);
88273+
}
88274+
}
88275+
88276+
function info(step, fields) { emit('info', step, fields); }
88277+
function warn(step, fields) { emit('warning', step, fields); }
88278+
function err(step, fields) { emit('error', step, fields); }
88279+
88280+
function debug(step, fields) {
88281+
try {
88282+
const config = __nccwpck_require__(1283);
88283+
if (config && config.input && config.input.debug === 'true') {
88284+
emit('info', step, { debug: true, ...fields });
88285+
}
88286+
} catch (_e) {
88287+
// Config not yet loaded — skip debug output.
88288+
}
88289+
}
88290+
88291+
module.exports = {
88292+
info,
88293+
warn,
88294+
error: err,
88295+
debug,
88296+
sanitize,
88297+
};
88298+
88299+
8816088300
/***/ }),
8816188301

8816288302
/***/ 5804:
@@ -88639,6 +88779,7 @@ const os = __nccwpck_require__(857);
8863988779
const aws = __nccwpck_require__(3776);
8864088780
const gh = __nccwpck_require__(5934);
8864188781
const config = __nccwpck_require__(1283);
88782+
const log = __nccwpck_require__(7223);
8864288783
const core = __nccwpck_require__(7484);
8864388784

8864488785
// Write directly to the $GITHUB_OUTPUT file. The bundled @actions/core
@@ -88656,23 +88797,38 @@ function setOutput(label, ec2InstanceId) {
8865688797
}
8865788798

8865888799
async function start() {
88659-
const label = config.generateUniqueLabel();
88660-
const githubRegistrationToken = await gh.getRegistrationToken();
88661-
const ec2InstanceId = await aws.startEc2Instance(label, githubRegistrationToken);
88662-
setOutput(label, ec2InstanceId);
88663-
await aws.waitForInstanceRunning(ec2InstanceId);
88664-
await gh.waitForRunnerRegistered(label);
88800+
core.startGroup('start-runner');
88801+
try {
88802+
log.debug('start_inputs', config.input); // sanitized inside log.js
88803+
const label = config.generateUniqueLabel();
88804+
const githubRegistrationToken = await gh.getRegistrationToken();
88805+
const ec2InstanceId = await aws.startEc2Instance(label, githubRegistrationToken);
88806+
setOutput(label, ec2InstanceId);
88807+
await aws.waitForInstanceRunning(ec2InstanceId);
88808+
await gh.waitForRunnerRegistered(label);
88809+
log.info('start', { label, instance_id: ec2InstanceId, outcome: 'registered' });
88810+
} finally {
88811+
core.endGroup();
88812+
}
8866588813
}
8866688814

8866788815
async function stop() {
88668-
await aws.terminateEc2Instance();
88669-
await gh.removeRunner();
88816+
core.startGroup('stop-runner');
88817+
try {
88818+
log.debug('stop_inputs', config.input);
88819+
await aws.terminateEc2Instance();
88820+
await gh.removeRunner();
88821+
log.info('stop', { instance_id: config.input.ec2InstanceId, label: config.input.label, outcome: 'ok' });
88822+
} finally {
88823+
core.endGroup();
88824+
}
8867088825
}
8867188826

8867288827
(async function () {
8867388828
try {
8867488829
config.input.mode === 'start' ? await start() : await stop();
8867588830
} catch (error) {
88831+
log.error('fatal', { mode: config.input.mode, error: error.name, message: error.message });
8867688832
core.error(error);
8867788833
core.setFailed(error.message);
8867888834
}

0 commit comments

Comments
 (0)