Skip to content

Commit 3e0ca42

Browse files
nadahallipavel-raykovvreff
authored
[CRE] [1/5] Gateway handler for confidential relay (#21638)
* [CRE] [1/5] Gateway handler for confidential relay Add a new gateway handler type "confidential-compute-relay" that fans out enclave JSON-RPC requests to relay DON nodes and aggregates responses using F+1 quorum. Supports secrets_get and capability_exec methods. Part of #21635 * Fix exhaustive switch lint and tidy integration-tests modules Add missing api.ConflictError and api.LimitExceededError cases to both switch statements in handler.go. Run go mod tidy on integration-tests and integration-tests/load. * Fix goimports formatting in gateway_job.go * Use ServiceName constants and add RequestTimeoutSec to relay handler - Replace string literals with ServiceNameVault, ServiceNameWorkflows, ServiceNameConfidential constants in all handler constructors. - Add RequestTimeoutSec to confidentialRelayHandlerConfig, set to gateway timeout minus 1s (matching vault handler pattern). Ensures the handler times out before the gateway, returning a clean error instead of the gateway killing the connection. * Add comment explaining requestTimeoutSec - 1 in relay handler * Use fmt.Errorf instead of errors.New with string concatenation Replace errors.New(x.Error() + ...) and fmt.Sprintf + errors.New patterns with fmt.Errorf throughout the relay handler and aggregator. Use %w for error wrapping where appropriate. Add comment clarifying sendResponse deletes expired requests. * Improve F+1 quorum comment in relay aggregator Explain why F+1 is correct: relay nodes proxy already-aggregated DON responses through deterministic translation, so honest nodes produce byte-identical outputs. * Move requestTimeoutSec - 1 to call site for relay handler Make the buffer visible where handlers are wired up instead of hiding it inside the constructor. The vault handler does the same subtraction internally; a follow-up should unify both to use this pattern. * Extract deleteActiveRequest from sendResponse sendResponse no longer has the side effect of deleting from activeRequests. Callers explicitly call deleteActiveRequest after sendResponse, making the cleanup visible at every call site. * Rename sendResponse to sendResponseAndCleanup, fix cleanup-on-error bug The old sendResponse skipped the delete if SendResponse failed, leaving the request in activeRequests forever. Now the delete always runs regardless of send outcome. The method name makes the cleanup explicit. * Handle errQuorumUnobtainable explicitly in aggregation switch * Merge errorResponse into sendErrorResponseAndCleanup * Move error sanitization into sendResponseAndCleanup * Inline send+cleanup into sendResponseAndCleanup and sendSuccessResponseAndCleanup * Unify sendResponseAndCleanup to handle both success and error paths * Simplify `sendResponseAndCleanup`. * Fix exhaustive lint: restore missing switch cases in recordMetrics and constructErrorResponse * Suppress exhaustive switch warning. * fan out relay requests to don nodes concurrently * Clean up confidential relay concurrency test helper * Remove redundant loop variable copy in relay fanout * Use atomic counter for relay node send errors * use cre settings for relay gateway rate limits * Fail fast when relay quorum becomes impossible * Run gomodtidy to fix CI module drift --------- Co-authored-by: pavel-raykov <pavel.raykov@smartcontract.com> Co-authored-by: vreff <104409744+vreff@users.noreply.github.com>
1 parent 92db8e0 commit 3e0ca42

6 files changed

Lines changed: 1232 additions & 10 deletions

File tree

core/services/gateway/handler_factory.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,20 @@ import (
1717
"github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers"
1818
"github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers/capabilities"
1919
v2 "github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers/capabilities/v2"
20+
"github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers/confidentialrelay"
2021
"github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers/functions"
2122
"github.com/smartcontractkit/chainlink/v2/core/services/gateway/handlers/vault"
2223
"github.com/smartcontractkit/chainlink/v2/core/services/gateway/network"
2324
workflowsyncerv2 "github.com/smartcontractkit/chainlink/v2/core/services/workflows/syncer/v2"
2425
)
2526

2627
const (
27-
FunctionsHandlerType HandlerType = "functions"
28-
DummyHandlerType HandlerType = "dummy"
29-
WebAPICapabilitiesType HandlerType = "web-api-capabilities" // Handler for v0.1 HTTP capabilities for DAG workflows
30-
HTTPCapabilityType HandlerType = "http-capabilities" // Handler for v1.0 HTTP capabilities for NoDAG workflows
31-
VaultHandlerType HandlerType = "vault"
28+
FunctionsHandlerType HandlerType = "functions"
29+
DummyHandlerType HandlerType = "dummy"
30+
WebAPICapabilitiesType HandlerType = "web-api-capabilities" // Handler for v0.1 HTTP capabilities for DAG workflows
31+
HTTPCapabilityType HandlerType = "http-capabilities" // Handler for v1.0 HTTP capabilities for NoDAG workflows
32+
VaultHandlerType HandlerType = "vault"
33+
ConfidentialRelayHandlerType HandlerType = "confidential-compute-relay"
3234
)
3335

3436
type handlerFactory struct {
@@ -85,6 +87,8 @@ func (hf *handlerFactory) NewHandler(
8587
return v2.NewGatewayHandler(handlerConfig, donConfig, don, hf.httpClient, hf.lggr, hf.lf)
8688
case VaultHandlerType:
8789
return vault.NewHandler(handlerConfig, donConfig, don, hf.capabilitiesRegistry, hf.workflowRegistrySyncer, hf.lggr, clockwork.NewRealClock(), hf.lf)
90+
case ConfidentialRelayHandlerType:
91+
return confidentialrelay.NewHandler(handlerConfig, donConfig, don, hf.lggr, clockwork.NewRealClock(), hf.lf)
8892
default:
8993
return nil, fmt.Errorf("unsupported handler type %s", handlerType)
9094
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package confidentialrelay
2+
3+
import (
4+
"encoding/json"
5+
"errors"
6+
"fmt"
7+
8+
jsonrpc "github.com/smartcontractkit/chainlink-common/pkg/jsonrpc2"
9+
"github.com/smartcontractkit/chainlink-common/pkg/logger"
10+
)
11+
12+
var (
13+
errInsufficientResponsesForQuorum = errors.New("insufficient valid responses to reach quorum")
14+
errQuorumUnobtainable = errors.New("quorum unobtainable")
15+
)
16+
17+
type aggregator struct{}
18+
19+
func (a *aggregator) Aggregate(resps map[string]jsonrpc.Response[json.RawMessage], donF int, donMembersCount int, l logger.Logger) (*jsonrpc.Response[json.RawMessage], error) {
20+
// F+1 (QuorumFPlusOne) is sufficient because each relay node calls the
21+
// target DON (Vault or capability) through CRE's standard capability
22+
// dispatch, which includes DON-level consensus. Every honest relay node
23+
// receives the same consensus-aggregated response and performs deterministic
24+
// translation, producing byte-identical outputs. F+1 matching responses
25+
// therefore guarantees at least one honest node vouched for the result.
26+
requiredQuorum := donF + 1
27+
28+
if len(resps) < requiredQuorum {
29+
return nil, errInsufficientResponsesForQuorum
30+
}
31+
32+
shaToCount := map[string]int{}
33+
maxShaToCount := 0
34+
for _, r := range resps {
35+
sha, err := r.Digest()
36+
if err != nil {
37+
l.Errorw("failed to compute digest of response during quorum validation, skipping...", "error", err)
38+
continue
39+
}
40+
shaToCount[sha]++
41+
if shaToCount[sha] > maxShaToCount {
42+
maxShaToCount = shaToCount[sha]
43+
}
44+
if shaToCount[sha] >= requiredQuorum {
45+
return &r, nil
46+
}
47+
}
48+
49+
remainingResponses := donMembersCount - len(resps)
50+
if maxShaToCount+remainingResponses < requiredQuorum {
51+
l.Warnw("quorum unattainable for request", "requiredQuorum", requiredQuorum, "remainingResponses", remainingResponses, "maxShaToCount", maxShaToCount)
52+
return nil, fmt.Errorf("%w: requiredQuorum=%d, maxShaToCount=%d, remainingResponses=%d", errQuorumUnobtainable, requiredQuorum, maxShaToCount, remainingResponses)
53+
}
54+
55+
return nil, errInsufficientResponsesForQuorum
56+
}

0 commit comments

Comments
 (0)