Skip to content

Commit 031172d

Browse files
committed
Harden federation metrics review fixes
Make the context meter provider source-compatible for external Context implementations, count all 401 signature rejection paths, and avoid adding full activity payloads to successful delivery span events. Guard queued failure span attributes so malformed inbox payloads do not break retry logic. #755 (comment) #755 (comment) #755 (comment) #755 (comment) #755 (comment) Assisted-by: gpt-5.5
1 parent 33f5f91 commit 031172d

9 files changed

Lines changed: 95 additions & 21 deletions

File tree

docs/manual/opentelemetry.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -289,13 +289,13 @@ Instrumented metrics
289289

290290
Fedify records the following OpenTelemetry metrics:
291291

292-
| Metric name | Instrument | Unit | Description |
293-
| -------------------------------------------- | ---------- | ----------- | ------------------------------------------------------------- |
294-
| `activitypub.delivery.sent` | Counter | `{attempt}` | Counts outgoing ActivityPub delivery attempts. |
295-
| `activitypub.delivery.permanent_failure` | Counter | `{failure}` | Counts outgoing deliveries abandoned as permanent failures. |
296-
| `activitypub.delivery.duration` | Histogram | `ms` | Measures outgoing ActivityPub delivery attempt duration. |
297-
| `activitypub.inbox.processing_duration` | Histogram | `ms` | Measures inbox listener processing duration. |
298-
| `activitypub.signature.verification_failure` | Counter | `{failure}` | Counts failed HTTP Signature verification for inbox requests. |
292+
| Metric name | Instrument | Unit | Description |
293+
| -------------------------------------------- | ---------- | ----------- | ----------------------------------------------------------- |
294+
| `activitypub.delivery.sent` | Counter | `{attempt}` | Counts outgoing ActivityPub delivery attempts. |
295+
| `activitypub.delivery.permanent_failure` | Counter | `{failure}` | Counts outgoing deliveries abandoned as permanent failures. |
296+
| `activitypub.delivery.duration` | Histogram | `ms` | Measures outgoing ActivityPub delivery attempt duration. |
297+
| `activitypub.inbox.processing_duration` | Histogram | `ms` | Measures inbox listener processing duration. |
298+
| `activitypub.signature.verification_failure` | Counter | `{failure}` | Counts failed signature verification for inbox requests. |
299299

300300
### Metric attributes
301301

packages/fedify/src/federation/context.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ export interface Context<TContextData> {
7474
* The OpenTelemetry meter provider.
7575
* @since 2.3.0
7676
*/
77-
readonly meterProvider: MeterProvider;
77+
readonly meterProvider?: MeterProvider;
7878

7979
/**
8080
* The document loader for loading remote JSON-LD documents.

packages/fedify/src/federation/handler.test.ts

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3053,6 +3053,7 @@ test("handleInbox() nonce consumption on valid signed request", async () => {
30533053
});
30543054

30553055
test("handleInbox() nonce replay prevention", async () => {
3056+
const [meterProvider, recorder] = createTestMeterProvider();
30563057
const activity = new Create({
30573058
id: new URL("https://example.com/activities/nonce-3"),
30583059
actor: new URL("https://example.com/person2"),
@@ -3075,7 +3076,10 @@ test("handleInbox() nonce replay prevention", async () => {
30753076
rsaPublicKey3.id!,
30763077
{ spec: "rfc9421", rfc9421: { nonce } },
30773078
);
3078-
const federation = createFederation<void>({ kv: new MemoryKvStore() });
3079+
const federation = createFederation<void>({
3080+
kv: new MemoryKvStore(),
3081+
meterProvider,
3082+
});
30793083
const context = createRequestContext({
30803084
federation,
30813085
request: signedRequest,
@@ -3107,6 +3111,7 @@ test("handleInbox() nonce replay prevention", async () => {
31073111
onNotFound: () => new Response("Not found", { status: 404 }),
31083112
signatureTimeWindow: { minutes: 5 },
31093113
skipSignatureVerification: false,
3114+
meterProvider,
31103115
inboxChallengePolicy: {
31113116
enabled: true,
31123117
requestNonce: true,
@@ -3132,6 +3137,19 @@ test("handleInbox() nonce replay prevention", async () => {
31323137
"no-store",
31333138
"Challenge response must have Cache-Control: no-store",
31343139
);
3140+
const failures = recorder.getMeasurements(
3141+
"activitypub.signature.verification_failure",
3142+
);
3143+
assertEquals(failures.length, 1);
3144+
assertEquals(failures[0].value, 1);
3145+
assertEquals(
3146+
failures[0].attributes["activitypub.remote.host"],
3147+
"example.com",
3148+
);
3149+
assertEquals(
3150+
failures[0].attributes["activitypub.verification.failure_reason"],
3151+
"invalidNonce",
3152+
);
31353153
});
31363154

31373155
test(
@@ -3273,6 +3291,7 @@ test(
32733291
test(
32743292
"handleInbox() actor/key mismatch does not consume nonce",
32753293
async () => {
3294+
const [meterProvider, recorder] = createTestMeterProvider();
32763295
// A request that has a valid RFC 9421 signature with a nonce, but the
32773296
// signing key does not belong to the claimed actor. The nonce must NOT be
32783297
// consumed so the legitimate sender can still use it.
@@ -3304,7 +3323,10 @@ test(
33043323
rsaPublicKey3.id!,
33053324
{ spec: "rfc9421", rfc9421: { nonce } },
33063325
);
3307-
const federation = createFederation<void>({ kv: new MemoryKvStore() });
3326+
const federation = createFederation<void>({
3327+
kv: new MemoryKvStore(),
3328+
meterProvider,
3329+
});
33083330
const context = createRequestContext({
33093331
federation,
33103332
request: maliciousRequest,
@@ -3336,6 +3358,7 @@ test(
33363358
onNotFound: () => new Response("Not found", { status: 404 }),
33373359
signatureTimeWindow: { minutes: 5 },
33383360
skipSignatureVerification: false,
3361+
meterProvider,
33393362
inboxChallengePolicy: {
33403363
enabled: true,
33413364
requestNonce: true,
@@ -3357,6 +3380,19 @@ test(
33573380
true,
33583381
"Nonce must not be consumed when actor/key ownership check fails",
33593382
);
3383+
const failures = recorder.getMeasurements(
3384+
"activitypub.signature.verification_failure",
3385+
);
3386+
assertEquals(failures.length, 1);
3387+
assertEquals(failures[0].value, 1);
3388+
assertEquals(
3389+
failures[0].attributes["activitypub.remote.host"],
3390+
"example.com",
3391+
);
3392+
assertEquals(
3393+
failures[0].attributes["activitypub.verification.failure_reason"],
3394+
"actorKeyMismatch",
3395+
);
33603396
},
33613397
);
33623398

packages/fedify/src/federation/handler.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,6 +1134,11 @@ async function handleInboxInternal<TContextData>(
11341134
if (
11351135
httpSigKey != null && !await doesActorOwnKey(activity, httpSigKey, ctx)
11361136
) {
1137+
getFederationMetrics(parameters.meterProvider)
1138+
.recordSignatureVerificationFailure(
1139+
"actorKeyMismatch",
1140+
httpSigKey.id?.hostname,
1141+
);
11371142
logger.error(
11381143
"The signer ({keyId}) and the actor ({actorId}) do not match.",
11391144
{
@@ -1162,6 +1167,11 @@ async function handleInboxInternal<TContextData>(
11621167
pendingNonceLabel,
11631168
);
11641169
if (!nonceValid) {
1170+
getFederationMetrics(parameters.meterProvider)
1171+
.recordSignatureVerificationFailure(
1172+
"invalidNonce",
1173+
httpSigKey?.id?.hostname,
1174+
);
11651175
logger.error(
11661176
"Signature nonce verification failed (missing, expired, or replayed).",
11671177
{ recipient },

packages/fedify/src/federation/metrics.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class FederationMetrics {
3030
this.signatureVerificationFailure = meter.createCounter(
3131
"activitypub.signature.verification_failure",
3232
{
33-
description: "ActivityPub HTTP Signature verification failures.",
33+
description: "ActivityPub signature verification failures.",
3434
unit: "{failure}",
3535
},
3636
);

packages/fedify/src/federation/middleware.test.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3657,6 +3657,30 @@ test("FederationImpl.processQueuedTask() permanent failure", async (t) => {
36573657
},
36583658
);
36593659

3660+
await t.step("malformed inbox does not break failure handling", async () => {
3661+
const [tracerProvider, exporter] = createTestTracerProvider();
3662+
const { federation, queuedMessages } = setup({ tracerProvider });
3663+
3664+
await federation.processQueuedTask(
3665+
undefined,
3666+
createOutboxMessage(
3667+
"not a url",
3668+
"https://example.com/activity/9",
3669+
["https://gone.example/users/bob"],
3670+
),
3671+
);
3672+
3673+
assertEquals(queuedMessages.length, 1);
3674+
assertEquals((queuedMessages[0] as OutboxMessage).attempt, 1);
3675+
const events = exporter.getEvents(
3676+
"activitypub.outbox",
3677+
"activitypub.delivery.failed",
3678+
);
3679+
assertEquals(events.length, 1);
3680+
assertEquals(events[0].attributes?.["activitypub.remote.host"], undefined);
3681+
assertEquals(events[0].attributes?.["activitypub.delivery.attempt"], 0);
3682+
});
3683+
36603684
fetchMock.hardReset();
36613685
});
36623686

packages/fedify/src/federation/middleware.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -687,8 +687,20 @@ export class FederationImpl<TContextData>
687687
});
688688
} catch (error) {
689689
span.setStatus({ code: SpanStatusCode.ERROR, message: String(error) });
690+
const remoteHost = (() => {
691+
if (error instanceof SendActivityError) {
692+
return getRemoteHost(error.inbox);
693+
}
694+
try {
695+
return getRemoteHost(new URL(message.inbox));
696+
} catch (_) {
697+
return undefined;
698+
}
699+
})();
690700
span.addEvent("activitypub.delivery.failed", {
691-
"activitypub.remote.host": getRemoteHost(new URL(message.inbox)),
701+
...(remoteHost == null
702+
? {}
703+
: { "activitypub.remote.host": remoteHost }),
692704
"activitypub.delivery.attempt": message.attempt,
693705
"activitypub.delivery.permanent_failure":
694706
error instanceof SendActivityError &&

packages/fedify/src/federation/send.test.ts

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -498,14 +498,7 @@ test("sendActivity() records OpenTelemetry span events", async (t) => {
498498
event.attributes["activitypub.activity.id"],
499499
"https://example.com/activity",
500500
);
501-
assert(typeof event.attributes["activitypub.activity.json"] === "string");
502-
503-
// Verify the JSON contains the activity
504-
const recordedActivity = JSON.parse(
505-
event.attributes["activitypub.activity.json"] as string,
506-
);
507-
assertEquals(recordedActivity.id, "https://example.com/activity");
508-
assertEquals(recordedActivity.type, "Create");
501+
assertEquals(event.attributes["activitypub.activity.json"], undefined);
509502

510503
exporter.clear();
511504
fetchMock.hardReset();

packages/fedify/src/federation/send.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,6 @@ async function sendActivityInternal(
341341

342342
// Record the sent activity with delivery details
343343
span.addEvent("activitypub.activity.sent", {
344-
"activitypub.activity.json": JSON.stringify(activity),
345344
"activitypub.inbox.url": inbox.href,
346345
"activitypub.activity.id": activityId ?? "",
347346
});

0 commit comments

Comments
 (0)