Skip to content

Commit 02143bc

Browse files
committed
admin: forward 500 parity + log unexpected source errors (Codex P2 ×2)
Two related findings on the leader-side AdminForward dispatcher. - Operation-specific 500 codes: forwardErrorResponse mapped any unmapped failure to {"error":"internal", ...}, but the HTTP handler's writeTablesError emits dynamo_create_failed / dynamo_delete_failed. A forwarded write that hit the same backend/storage error therefore returned a different code than a leader-direct write — clients branching on those codes saw divergent behaviour. Fix: forwardErrorResponse now takes an `op` argument ("create" or "delete") so the fallthrough emits dynamo_<op>_failed, matching leader-direct parity exactly. - Log unexpected source errors: handleCreate / handleDelete on the forward path returned 500 with "see leader logs" in the body but never logged anything for non-sentinel source failures, leaving operators with no breadcrumb for forwarded 500s. Add logUnexpectedSourceError that emits an admin_forward_<op>_failed line at LevelError carrying the table, forwarded_from, and raw error. Sentinels (forbidden, not-found, already-exists, validation) skip the log — they're routine client-side outcomes, and logging at LevelError would drown the operational signal (same policy as the HTTP path's writeTablesError). Tests: - TestForwardServer_CreateTable_GenericErrorReturns500 now also asserts the response body contains dynamo_create_failed. - New TestForwardServer_DeleteTable_GenericErrorUsesOperationSpecificCode pins the delete-side code parity. - New TestForwardServer_LogsUnexpectedSourceError captures the slog output and confirms the table, forwarded_from, and raw error all appear. - New TestForwardServer_DoesNotLogStructuredSourceErrors sweeps the four sentinel cases and confirms the LevelError handler never emits anything.
1 parent 1de2e39 commit 02143bc

2 files changed

Lines changed: 136 additions & 4 deletions

File tree

internal/admin/forward_server.go

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,8 @@ func (s *ForwardServer) handleCreate(ctx context.Context, principal AuthPrincipa
150150
}
151151
summary, err := s.source.AdminCreateTable(ctx, principal, body)
152152
if err != nil {
153-
return forwardErrorResponse(err), nil
153+
s.logUnexpectedSourceError(ctx, "create_table", body.TableName, req.GetForwardedFrom(), err)
154+
return forwardErrorResponse("create", err), nil
154155
}
155156
s.logger.LogAttrs(ctx, slog.LevelInfo, "admin_audit",
156157
slog.String("actor", principal.AccessKey),
@@ -203,7 +204,8 @@ func (s *ForwardServer) handleDelete(ctx context.Context, principal AuthPrincipa
203204
return rejectForward(http.StatusBadRequest, "invalid_body", "delete payload name must not contain '/'")
204205
}
205206
if err := s.source.AdminDeleteTable(ctx, principal, body.Name); err != nil {
206-
return forwardErrorResponse(err), nil
207+
s.logUnexpectedSourceError(ctx, "delete_table", body.Name, req.GetForwardedFrom(), err)
208+
return forwardErrorResponse("delete", err), nil
207209
}
208210
s.logger.LogAttrs(ctx, slog.LevelInfo, "admin_audit",
209211
slog.String("actor", principal.AccessKey),
@@ -220,7 +222,14 @@ func (s *ForwardServer) handleDelete(ctx context.Context, principal AuthPrincipa
220222
// is the leader-side counterpart of writeTablesError: every status /
221223
// JSON code the HTTP handler chooses is mirrored here so a forwarded
222224
// call is indistinguishable to the SPA from a leader-direct call.
223-
func forwardErrorResponse(err error) *pb.AdminForwardResponse {
225+
//
226+
// op is "create" or "delete" so the unmapped 500 fallthrough emits
227+
// dynamo_create_failed / dynamo_delete_failed — the same
228+
// operation-specific codes the leader-direct HTTP path produces in
229+
// writeTablesError. Without this, forwarded write failures showed
230+
// up to clients as a generic "internal" code, breaking parity with
231+
// the leader-direct path (Codex P2 on PR #635).
232+
func forwardErrorResponse(op string, err error) *pb.AdminForwardResponse {
224233
switch {
225234
case errors.Is(err, ErrTablesForbidden):
226235
return mustForwardJSON(http.StatusForbidden, errorBody{Error: "forbidden", Message: "this endpoint requires a full-access role"})
@@ -238,7 +247,45 @@ func forwardErrorResponse(err error) *pb.AdminForwardResponse {
238247
if errors.As(err, &verr) {
239248
return mustForwardJSON(http.StatusBadRequest, errorBody{Error: "invalid_request", Message: verr.Error()})
240249
}
241-
return mustForwardJSON(http.StatusInternalServerError, errorBody{Error: "internal", Message: "internal error; see leader logs"})
250+
return mustForwardJSON(http.StatusInternalServerError, errorBody{
251+
Error: "dynamo_" + op + "_failed",
252+
Message: "failed to " + op + " table; see leader logs",
253+
})
254+
}
255+
256+
// logUnexpectedSourceError emits an error log for non-sentinel
257+
// source failures so operators have a breadcrumb when forwarded
258+
// writes 500. Sentinel errors that map to specific HTTP statuses
259+
// (forbidden, not-found, validation, ...) are deliberately
260+
// silent: those are routine client-side failures, not server
261+
// regressions, and logging them at LevelError would drown the
262+
// operational signal. The HTTP path's writeTablesError applies
263+
// the same policy (Codex P2 on PR #635 flagged the silent path).
264+
func (s *ForwardServer) logUnexpectedSourceError(ctx context.Context, op, table, forwardedFrom string, err error) {
265+
if isStructuredSourceError(err) {
266+
return
267+
}
268+
s.logger.LogAttrs(ctx, slog.LevelError, "admin_forward_"+op+"_failed",
269+
slog.String("table", table),
270+
slog.String("forwarded_from", forwardedFrom),
271+
slog.String("error", err.Error()),
272+
)
273+
}
274+
275+
// isStructuredSourceError reports whether err is one of the
276+
// admin-package sentinels or a ValidationError — i.e., a known
277+
// failure mode the handler maps to a non-500 status. These are
278+
// expected and not log-worthy.
279+
func isStructuredSourceError(err error) bool {
280+
switch {
281+
case errors.Is(err, ErrTablesForbidden),
282+
errors.Is(err, ErrTablesNotLeader),
283+
errors.Is(err, ErrTablesNotFound),
284+
errors.Is(err, ErrTablesAlreadyExists):
285+
return true
286+
}
287+
var verr *ValidationError
288+
return errors.As(err, &verr)
242289
}
243290

244291
// errorBody is the shared JSON shape for both the HTTP handler's

internal/admin/forward_server_test.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
package admin
22

33
import (
4+
"bytes"
45
"context"
56
"errors"
7+
"log/slog"
68
"net/http"
79
"testing"
810

@@ -285,4 +287,87 @@ func TestForwardServer_CreateTable_GenericErrorReturns500(t *testing.T) {
285287
require.NoError(t, err)
286288
require.Equal(t, int32(http.StatusInternalServerError), resp.GetStatusCode())
287289
require.NotContains(t, string(resp.GetPayload()), "ZQ-993")
290+
// Error code parity with leader-direct path: must use
291+
// dynamo_create_failed, not the previous generic "internal"
292+
// (Codex P2 on PR #635).
293+
require.Contains(t, string(resp.GetPayload()), `"error":"dynamo_create_failed"`)
294+
}
295+
296+
// TestForwardServer_DeleteTable_GenericErrorUsesOperationSpecificCode
297+
// is the delete-side counterpart: 500 fallthrough must use
298+
// dynamo_delete_failed so client retry/branching logic that
299+
// already handles the leader-direct path works unchanged.
300+
func TestForwardServer_DeleteTable_GenericErrorUsesOperationSpecificCode(t *testing.T) {
301+
src := &stubTablesSource{
302+
tables: map[string]*DynamoTableSummary{"users": {Name: "users"}},
303+
deleteErr: errors.New("storage backing sentinel DEL-1"),
304+
}
305+
srv := newForwardServerForTest(src, fullPrincipalRoleStore())
306+
resp, err := srv.Forward(context.Background(), &pb.AdminForwardRequest{
307+
Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"},
308+
Operation: pb.AdminOperation_ADMIN_OP_DELETE_TABLE,
309+
Payload: []byte(`{"name":"users"}`),
310+
})
311+
require.NoError(t, err)
312+
require.Equal(t, int32(http.StatusInternalServerError), resp.GetStatusCode())
313+
require.Contains(t, string(resp.GetPayload()), `"error":"dynamo_delete_failed"`)
314+
require.NotContains(t, string(resp.GetPayload()), "DEL-1")
315+
}
316+
317+
// TestForwardServer_LogsUnexpectedSourceError confirms the leader
318+
// emits an error log line for non-sentinel source failures so
319+
// operators can investigate forwarded write 500s without
320+
// reverse-engineering the response body. The HTTP path's
321+
// writeTablesError already logs; without the symmetric emit on
322+
// the forward path, forwarded failures were operationally invisible
323+
// (Codex P2 on PR #635).
324+
func TestForwardServer_LogsUnexpectedSourceError(t *testing.T) {
325+
var buf bytes.Buffer
326+
logger := slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelError}))
327+
src := &stubTablesSource{createErr: errors.New("storage sentinel SENT-42")}
328+
srv := NewForwardServer(src, fullPrincipalRoleStore(), logger)
329+
330+
_, err := srv.Forward(context.Background(), &pb.AdminForwardRequest{
331+
Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"},
332+
Operation: pb.AdminOperation_ADMIN_OP_CREATE_TABLE,
333+
Payload: mustJSON(t, CreateTableRequest{TableName: "u", PartitionKey: CreateTableAttribute{Name: "id", Type: "S"}}),
334+
ForwardedFrom: "follower-9",
335+
})
336+
require.NoError(t, err)
337+
logged := buf.String()
338+
require.Contains(t, logged, "admin_forward_create_table_failed")
339+
require.Contains(t, logged, "SENT-42")
340+
require.Contains(t, logged, "follower-9")
341+
}
342+
343+
// TestForwardServer_DoesNotLogStructuredSourceErrors confirms the
344+
// "expected" sentinels (forbidden, not-found, already-exists,
345+
// validation) do NOT emit error-level logs. They are routine
346+
// client-side outcomes, not server regressions, so logging them
347+
// at LevelError would drown the operational signal.
348+
func TestForwardServer_DoesNotLogStructuredSourceErrors(t *testing.T) {
349+
cases := []struct {
350+
name string
351+
err error
352+
}{
353+
{"forbidden", ErrTablesForbidden},
354+
{"not_found", ErrTablesNotFound},
355+
{"already_exists", ErrTablesAlreadyExists},
356+
{"validation", &ValidationError{Message: "field foo invalid"}},
357+
}
358+
for _, tc := range cases {
359+
t.Run(tc.name, func(t *testing.T) {
360+
var buf bytes.Buffer
361+
logger := slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelError}))
362+
src := &stubTablesSource{createErr: tc.err}
363+
srv := NewForwardServer(src, fullPrincipalRoleStore(), logger)
364+
_, _ = srv.Forward(context.Background(), &pb.AdminForwardRequest{
365+
Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"},
366+
Operation: pb.AdminOperation_ADMIN_OP_CREATE_TABLE,
367+
Payload: mustJSON(t, CreateTableRequest{TableName: "u", PartitionKey: CreateTableAttribute{Name: "id", Type: "S"}}),
368+
})
369+
require.NotContains(t, buf.String(), "admin_forward_create_table_failed",
370+
"sentinel error %q must not produce an error log", tc.name)
371+
})
372+
}
288373
}

0 commit comments

Comments
 (0)