From e782e98bd20706a1d93acb5fd939a2b2c923bacc Mon Sep 17 00:00:00 2001 From: Dylan Date: Fri, 1 May 2026 14:41:14 -0400 Subject: [PATCH] fix(api_server): require CallbackToken for Merkle callback endpoint (#76) handleCallback no longer skips bearer-token validation when CallbackToken is empty; it now always demands a valid bearer. Config validation rejects an empty CallbackToken when MerkleService is configured, so operators can't accidentally deploy an unauthenticated callback receiver. Token comparison uses constant-time compare to remove the timing side channel. Closes F-018. --- config.example.yaml | 10 ++ config/config.go | 10 ++ config/config_test.go | 36 ++++++ services/api_server/handlers.go | 28 +++-- services/api_server/handlers_test.go | 159 ++++++++++++++++++++++++--- services/api_server/routes.go | 2 +- 6 files changed, 219 insertions(+), 26 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index 74c480b..25e2cfd 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -20,6 +20,12 @@ log_level: info network: mainnet # callback_url: "https://example.com/callback" +# Bearer token the Merkle Service must present in the Authorization header on +# every POST /api/v1/merkle-service/callback. REQUIRED when merkle_service.url +# is set: arcade refuses to start otherwise, since an unset token would let any +# unauthenticated caller submit forged status updates for any txid. Generate a +# high-entropy random value (e.g. `openssl rand -hex 32`) and configure the +# matching token on the Merkle Service. See issue #76 / finding F-018. # callback_token: "your-callback-token" api: @@ -73,6 +79,10 @@ teranode: auth_token: "" merkle_service: + # Merkle Service base URL. Leaving this empty disables the Merkle integration + # (standalone profiles). When set, the top-level `callback_token` becomes + # REQUIRED — arcade fails to start otherwise so that the inbound callback + # endpoint never runs unauthenticated. See issue #76 / finding F-018. url: "" auth_token: "" diff --git a/config/config.go b/config/config.go index 6d1de23..ea330a0 100644 --- a/config/config.go +++ b/config/config.go @@ -538,6 +538,16 @@ func validate(cfg *Config) error { // of the client. The documented standalone and zero-dependency profiles // (config.example.standalone.yaml) ship with merkle_service.url: "" for // exactly this reason. See issue #59 / finding F-001. + // + // When the Merkle integration IS enabled (URL set), callback_token is + // mandatory. The /api/v1/merkle-service/callback endpoint accepts forged + // status updates for any txid in the system if it runs without bearer-token + // auth, so we fail-closed here at config load rather than silently exposing + // the unauthenticated receiver. See issue #76 / finding F-018. + if cfg.MerkleService.URL != "" && cfg.CallbackToken == "" { + return fmt.Errorf("callback_token is required when merkle_service.url is set " + + "(unauthenticated /api/v1/merkle-service/callback would accept forged callbacks; see issue #76)") + } if cfg.Network == "" { cfg.Network = NetworkMainnet } diff --git a/config/config_test.go b/config/config_test.go index 9030f02..a1d50bd 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -7,6 +7,10 @@ import ( // baseValidConfig returns a Config populated with the minimum fields other // validate() branches require so each test can focus on the network branch. +// +// Sets a non-empty CallbackToken because validate() now refuses to start +// when MerkleService.URL is set without a bearer token (issue #76 / F-018). +// Tests that need to exercise that branch override CallbackToken explicitly. func baseValidConfig() *Config { cfg := &Config{} cfg.Mode = "all" @@ -15,6 +19,7 @@ func baseValidConfig() *Config { cfg.Store.Pebble.Path = "/tmp/arcade-test" cfg.Network = NetworkMainnet cfg.MerkleService.URL = "http://merkle.local" + cfg.CallbackToken = "test-callback-token" return cfg } @@ -43,6 +48,37 @@ func TestValidate_AcceptsPopulatedMerkleServiceURL(t *testing.T) { } } +// Issue #76 / finding F-018: when the Merkle integration is wired up, the +// inbound /api/v1/merkle-service/callback endpoint MUST be authenticated. We +// fail fast at config load when MerkleService.URL is set without a +// callback_token rather than silently exposing an unauthenticated receiver +// that any unauthenticated caller could use to submit forged status updates. +func TestValidate_RequiresCallbackTokenWhenMerkleEnabled(t *testing.T) { + cfg := baseValidConfig() + cfg.MerkleService.URL = "http://merkle.local" + cfg.CallbackToken = "" + err := validate(cfg) + if err == nil { + t.Fatal("expected error when merkle_service.url is set without callback_token") + } + if !strings.Contains(err.Error(), "callback_token") { + t.Errorf("error should mention callback_token, got: %v", err) + } +} + +// Standalone profiles ship with merkle_service.url: "" and frequently leave +// callback_token empty too — there is no Merkle Service issuing callbacks, so +// no token is required. The validation must pass cleanly so the standalone +// binary keeps booting (issue #59 fix from #104 must continue to hold). +func TestValidate_AllowsEmptyCallbackTokenInStandaloneMode(t *testing.T) { + cfg := baseValidConfig() + cfg.MerkleService.URL = "" + cfg.CallbackToken = "" + if err := validate(cfg); err != nil { + t.Fatalf("standalone (no merkle, no callback_token) should validate, got: %v", err) + } +} + // Each canonical network name must validate cleanly. The empty string is also // accepted — validate() normalizes it to mainnet so CLI users can omit the key. // Regtest validates without bootstrap_peers here because baseValidConfig leaves diff --git a/services/api_server/handlers.go b/services/api_server/handlers.go index 3d5e7e0..d73c9f4 100644 --- a/services/api_server/handlers.go +++ b/services/api_server/handlers.go @@ -3,6 +3,7 @@ package api_server import ( "context" "crypto/rand" + "crypto/subtle" "encoding/hex" "html/template" "io" @@ -225,14 +226,27 @@ func (s *Server) handleReady(c *gin.Context) { // handleCallback processes inbound callbacks from Merkle Service. // Uses CallbackMessage format with Type field. +// +// Bearer-token authentication is mandatory. config.validate refuses to start +// the binary when MerkleService is configured without a CallbackToken (finding +// F-018 / issue #76), so reaching this handler with an empty configured token +// means a misconfigured deployment outside the supported envelope. We still +// fail closed here as a defense-in-depth measure: an empty/missing bearer or +// any mismatch is rejected with 401 before any callback processing runs. func (s *Server) handleCallback(c *gin.Context) { - // Bearer token validation - if s.cfg.CallbackToken != "" { - auth := c.GetHeader("Authorization") - if !strings.HasPrefix(auth, "Bearer ") || auth[len("Bearer "):] != s.cfg.CallbackToken { - c.JSON(http.StatusUnauthorized, gin.H{"error": "unauthorized"}) - return - } + // Bearer token validation — always enforced, never skipped on empty + // configured token. subtle.ConstantTimeCompare removes the timing side + // channel that a plain == on the secret would expose. + auth := c.GetHeader("Authorization") + const bearerPrefix = "Bearer " + configured := []byte(s.cfg.CallbackToken) + var presented []byte + if strings.HasPrefix(auth, bearerPrefix) { + presented = []byte(auth[len(bearerPrefix):]) + } + if len(configured) == 0 || subtle.ConstantTimeCompare(configured, presented) != 1 { + c.JSON(http.StatusUnauthorized, gin.H{"error": "unauthorized"}) + return } var msg models.CallbackMessage diff --git a/services/api_server/handlers_test.go b/services/api_server/handlers_test.go index 2d2df96..8180c44 100644 --- a/services/api_server/handlers_test.go +++ b/services/api_server/handlers_test.go @@ -161,11 +161,17 @@ func mustMarshalJSON(t *testing.T, v any) []byte { return b } +// testCallbackToken is the bearer secret every callback test installs into the +// Server.cfg so handleCallback's mandatory auth check passes. Real deployments +// supply a high-entropy value via config.callback_token; tests just need a +// stable non-empty string. +const testCallbackToken = "test-callback-token" + func setupServerWithStore(broker *kafka.RecordingBroker, ms *mockStore) (*Server, *gin.Engine) { gin.SetMode(gin.TestMode) producer := kafka.NewProducer(broker) srv := &Server{ - cfg: &config.Config{}, + cfg: &config.Config{CallbackToken: testCallbackToken}, logger: zap.NewNop(), producer: producer, store: ms, @@ -179,7 +185,7 @@ func setupServer(broker *kafka.RecordingBroker) (*Server, *gin.Engine) { gin.SetMode(gin.TestMode) producer := kafka.NewProducer(broker) srv := &Server{ - cfg: &config.Config{}, + cfg: &config.Config{CallbackToken: testCallbackToken}, logger: zap.NewNop(), producer: producer, } @@ -188,6 +194,17 @@ func setupServer(broker *kafka.RecordingBroker) (*Server, *gin.Engine) { return srv, router } +// authedCallbackRequest builds a callback POST with the canonical bearer +// header so the mandatory auth check inside handleCallback accepts it. Tests +// that exercise the auth check itself construct their own requests instead. +func authedCallbackRequest(t *testing.T, body []byte) *http.Request { + t.Helper() + req := httptest.NewRequestWithContext(t.Context(), http.MethodPost, "/api/v1/merkle-service/callback", bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "Bearer "+testCallbackToken) + return req +} + // totalMessages returns the combined count of single-message Sends and // batched entries — matching the old Sarama mock's flat-message semantics. func totalMessages(broker *kafka.RecordingBroker) int { @@ -315,8 +332,7 @@ func TestHandleCallback_SeenMultipleNodes_UpdatesStatus(t *testing.T) { } body := mustMarshalJSON(t, payload) - req := httptest.NewRequestWithContext(t.Context(), http.MethodPost, "/api/v1/merkle-service/callback", bytes.NewReader(body)) - req.Header.Set("Content-Type", "application/json") + req := authedCallbackRequest(t, body) w := httptest.NewRecorder() router.ServeHTTP(w, req) @@ -356,8 +372,7 @@ func TestHandleCallback_Stump_StorageError_Returns500(t *testing.T) { } body := mustMarshalJSON(t, payload) - req := httptest.NewRequestWithContext(t.Context(), http.MethodPost, "/api/v1/merkle-service/callback", bytes.NewReader(body)) - req.Header.Set("Content-Type", "application/json") + req := authedCallbackRequest(t, body) w := httptest.NewRecorder() router.ServeHTTP(w, req) @@ -380,8 +395,7 @@ func TestHandleCallback_SeenMultipleNodes_EmptyTxIDs(t *testing.T) { } body := mustMarshalJSON(t, payload) - req := httptest.NewRequestWithContext(t.Context(), http.MethodPost, "/api/v1/merkle-service/callback", bytes.NewReader(body)) - req.Header.Set("Content-Type", "application/json") + req := authedCallbackRequest(t, body) w := httptest.NewRecorder() router.ServeHTTP(w, req) @@ -459,8 +473,7 @@ func TestHandleCallback_FullBlockFlow_20Subtrees(t *testing.T) { errCh <- fmt.Errorf("marshal subtree %d: %w", i, err) return } - req := httptest.NewRequestWithContext(t.Context(), http.MethodPost, "/api/v1/merkle-service/callback", bytes.NewReader(body)) - req.Header.Set("Content-Type", "application/json") + req := authedCallbackRequest(t, body) // Match merkle-service's delivery headers exactly. req.Header.Set("X-Idempotency-Key", fmt.Sprintf("%s:%d:STUMP", blockHash, i)) w := httptest.NewRecorder() @@ -523,8 +536,7 @@ func TestHandleCallback_FullBlockFlow_20Subtrees(t *testing.T) { if err != nil { t.Fatalf("marshal BLOCK_PROCESSED: %v", err) } - req := httptest.NewRequestWithContext(t.Context(), http.MethodPost, "/api/v1/merkle-service/callback", bytes.NewReader(body)) - req.Header.Set("Content-Type", "application/json") + req := authedCallbackRequest(t, body) req.Header.Set("X-Idempotency-Key", blockHash+":BLOCK_PROCESSED") w := httptest.NewRecorder() router.ServeHTTP(w, req) @@ -573,8 +585,7 @@ func TestHandleCallback_FullBlockFlow_20Subtrees(t *testing.T) { Stump: stumpPayloads[0], } retryBody := mustMarshalJSON(t, retryPayload) - retryReq := httptest.NewRequestWithContext(t.Context(), http.MethodPost, "/api/v1/merkle-service/callback", bytes.NewReader(retryBody)) - retryReq.Header.Set("Content-Type", "application/json") + retryReq := authedCallbackRequest(t, retryBody) retryW := httptest.NewRecorder() router.ServeHTTP(retryW, retryReq) if retryW.Code != http.StatusOK { @@ -658,8 +669,7 @@ func TestHandleCallback_FullBlockFlow_PartialStumpFailure(t *testing.T) { Stump: stumpPayloads[i], } body := mustMarshalJSON(t, payload) - req := httptest.NewRequestWithContext(t.Context(), http.MethodPost, "/api/v1/merkle-service/callback", bytes.NewReader(body)) - req.Header.Set("Content-Type", "application/json") + req := authedCallbackRequest(t, body) w := httptest.NewRecorder() router.ServeHTTP(w, req) resCh <- result{idx: i, status: w.Code} @@ -707,8 +717,7 @@ func TestHandleCallback_FullBlockFlow_PartialStumpFailure(t *testing.T) { BlockHash: blockHash, } body := mustMarshalJSON(t, blockMsg) - req := httptest.NewRequestWithContext(t.Context(), http.MethodPost, "/api/v1/merkle-service/callback", bytes.NewReader(body)) - req.Header.Set("Content-Type", "application/json") + req := authedCallbackRequest(t, body) w := httptest.NewRecorder() router.ServeHTTP(w, req) if w.Code != http.StatusOK { @@ -863,3 +872,117 @@ func TestHandleSubmitTransactions_TxID_IsCanonical(t *testing.T) { t.Errorf("submissions: want %v, got %v", want, got) } } + +// TestHandleCallback_RejectsUnauthenticated locks down the F-018 fix: the +// /api/v1/merkle-service/callback receiver MUST refuse any request missing +// or presenting the wrong bearer token, and MUST refuse all requests when +// the configured token is empty (fail-closed). Pre-fix, the handler skipped +// the entire bearer check when CallbackToken == "", letting any unauthenticated +// caller submit forged Merkle status updates. +// +// This test exercises the runtime check directly. The "config rejects empty +// token when Merkle is enabled" half of the fix is covered in +// config/config_test.go (TestValidate_RequiresCallbackTokenWhenMerkleEnabled). +func TestHandleCallback_RejectsUnauthenticated(t *testing.T) { + payload := mustMarshalJSON(t, models.CallbackMessage{ + Type: models.CallbackSeenMultipleNodes, + TxIDs: []string{"tx1"}, + }) + + cases := []struct { + name string + token string // configured CallbackToken on the Server. + header string // Authorization header sent on the request, "" = none. + wantOK bool + wantErr int + }{ + { + name: "no auth header is rejected", + token: testCallbackToken, + header: "", + wantOK: false, + wantErr: http.StatusUnauthorized, + }, + { + name: "wrong bearer is rejected", + token: testCallbackToken, + header: "Bearer not-the-real-token", + wantOK: false, + wantErr: http.StatusUnauthorized, + }, + { + name: "non-bearer scheme is rejected", + token: testCallbackToken, + header: "Basic " + testCallbackToken, + wantOK: false, + wantErr: http.StatusUnauthorized, + }, + { + // Defense-in-depth. Config validation now refuses to start with an + // empty CallbackToken when Merkle is enabled, but if a misconfigured + // process somehow reaches the handler with cfg.CallbackToken == "" + // every request — including one presenting an empty bearer — must + // still be refused. + name: "empty configured token rejects all callers", + token: "", + header: "Bearer ", + wantOK: false, + wantErr: http.StatusUnauthorized, + }, + { + // Same scenario as above with no auth header — must also be 401. + name: "empty configured token rejects unauthenticated caller", + token: "", + header: "", + wantOK: false, + wantErr: http.StatusUnauthorized, + }, + { + name: "correct bearer is accepted", + token: testCallbackToken, + header: "Bearer " + testCallbackToken, + wantOK: true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ms := &mockStore{} + gin.SetMode(gin.TestMode) + producer := kafka.NewProducer(&kafka.RecordingBroker{}) + srv := &Server{ + cfg: &config.Config{CallbackToken: tc.token}, + logger: zap.NewNop(), + producer: producer, + store: ms, + } + router := gin.New() + srv.registerRoutes(router) + + req := httptest.NewRequestWithContext(t.Context(), http.MethodPost, "/api/v1/merkle-service/callback", bytes.NewReader(payload)) + req.Header.Set("Content-Type", "application/json") + if tc.header != "" { + req.Header.Set("Authorization", tc.header) + } + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + if tc.wantOK { + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + if len(ms.updateStatusCalls) != 1 { + t.Errorf("expected store update on accepted callback, got %d", len(ms.updateStatusCalls)) + } + return + } + if w.Code != tc.wantErr { + t.Fatalf("expected %d, got %d: %s", tc.wantErr, w.Code, w.Body.String()) + } + // Rejected requests must not reach the dispatch path. + if len(ms.updateStatusCalls) != 0 { + t.Errorf("rejected callback must not write to the store, got %d UpdateStatus calls", len(ms.updateStatusCalls)) + } + }) + } +} diff --git a/services/api_server/routes.go b/services/api_server/routes.go index b387594..1e61b08 100644 --- a/services/api_server/routes.go +++ b/services/api_server/routes.go @@ -55,7 +55,7 @@ var routeDocs = []RouteDoc{ Method: "POST", Path: "/api/v1/merkle-service/callback", Description: "Receive callbacks from Merkle Service", - RequestFormat: "JSON CallbackMessage with type field. Optional Bearer token auth.", + RequestFormat: "JSON CallbackMessage with type field. Bearer token auth required (Authorization: Bearer ).", ResponseFormat: "200 OK", }, {