Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions cmd/boulder-wfe2/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/letsencrypt/boulder/goodkey/sagoodkey"
bgrpc "github.com/letsencrypt/boulder/grpc"
"github.com/letsencrypt/boulder/grpc/noncebalancer"
"github.com/letsencrypt/boulder/grpc/noncebalancerv2"
"github.com/letsencrypt/boulder/issuance"
"github.com/letsencrypt/boulder/nonce"
rapb "github.com/letsencrypt/boulder/ra/proto"
Expand Down Expand Up @@ -318,9 +319,11 @@ func main() {
cmd.FailOnError(err, "Failed to load credentials and create gRPC connection to get nonce service")
gnc := nonce.NewGetter(getNonceConn)

if c.WFE.RedeemNonceService.SRVResolver != noncebalancer.SRVResolverScheme {
if c.WFE.RedeemNonceService.SRVResolver != noncebalancer.SRVResolverScheme &&
c.WFE.RedeemNonceService.SRVResolver != noncebalancerv2.SRVResolverScheme {
cmd.Fail(fmt.Sprintf(
"'redeemNonceService.SRVResolver' must be set to %q", noncebalancer.SRVResolverScheme),
"'redeemNonceService.SRVResolver' must be set to %q or %q",
noncebalancer.SRVResolverScheme, noncebalancerv2.SRVResolverScheme),
)
}
redeemNonceConn, err := bgrpc.ClientSetup(c.WFE.RedeemNonceService, tlsConfig, stats, clk)
Expand Down
2 changes: 1 addition & 1 deletion cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ type GRPCClientConfig struct {
// implementation of the SRV resolver should be used. The default is 'srv'
// For more details, see the documentation in:
// grpc/internal/resolver/dns/dns_resolver.go.
SRVResolver string `validate:"excluded_with=ServerAddress,isdefault|oneof=srv nonce-srv"`
SRVResolver string `validate:"excluded_with=ServerAddress,isdefault|oneof=srv nonce-srv nonce-srv-v2"`

// ServerAddress is a single <hostname|IPv4|[IPv6]>:<port> or `:<port>` that
// the gRPC client will, if necessary, resolve via DNS and then connect to.
Expand Down
8 changes: 8 additions & 0 deletions grpc/internal/resolver/dns/dns_resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
"github.com/letsencrypt/boulder/bdns"
"github.com/letsencrypt/boulder/grpc/internal/backoff"
"github.com/letsencrypt/boulder/grpc/noncebalancer"
"github.com/letsencrypt/boulder/grpc/noncebalancerv2"
)

var logger = grpclog.Component("srv")
Expand All @@ -54,6 +55,7 @@ var (
func init() {
resolver.Register(NewDefaultSRVBuilder())
resolver.Register(NewNonceSRVBuilder())
resolver.Register(NewNonceSRVBuilderV2())
}

const defaultDNSSvrPort = "53"
Expand Down Expand Up @@ -96,6 +98,12 @@ func NewNonceSRVBuilder() resolver.Builder {
return &srvBuilder{scheme: noncebalancer.SRVResolverScheme, balancer: noncebalancer.Name}
}

// NewNonceSRVBuilderV2 creates a srvBuilder which is used to factory SRV DNS
// resolvers with the v2 nonce balancer used by nonce-service clients.
func NewNonceSRVBuilderV2() resolver.Builder {
return &srvBuilder{scheme: noncebalancerv2.SRVResolverScheme, balancer: noncebalancerv2.Name}
}

type srvBuilder struct {
scheme string
balancer string
Expand Down
213 changes: 213 additions & 0 deletions grpc/noncebalancerv2/balancer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
package noncebalancerv2

import (
"errors"
"fmt"

"google.golang.org/grpc/balancer"
"google.golang.org/grpc/balancer/base"
"google.golang.org/grpc/connectivity"
"google.golang.org/grpc/grpclog"
"google.golang.org/grpc/resolver"
)

var logger = grpclog.Component("noncebalancerv2")

// nonceBalancer implements balancer.Balancer. It is a near-exact copy of
// grpc/balancer/base/balancer.go's baseBalancer with one difference:
// regeneratePicker passes ALL resolver-tracked SubConns to the picker, not just
// READY ones. This allows the picker to distinguish "backend is temporarily
// reconnecting" (queue the RPC via ErrNoSubConnAvailable) from "prefix is
// genuinely unknown" (fail with ErrNoBackendsMatchPrefix).
type nonceBalancer struct {
cc balancer.ClientConn

csEvltr *balancer.ConnectivityStateEvaluator
state connectivity.State

subConns *resolver.AddressMapV2[balancer.SubConn]
scStates map[balancer.SubConn]connectivity.State
picker balancer.Picker
config base.Config

resolverErr error // the last error reported by the resolver; cleared on successful resolution
connErr error // the last connection error; cleared upon leaving TransientFailure
}

func (b *nonceBalancer) ResolverError(err error) {
b.resolverErr = err
if b.subConns.Len() == 0 {
b.state = connectivity.TransientFailure
}

if b.state != connectivity.TransientFailure {
// The picker will not change since the balancer does not currently
// report an error.
return
}
b.regeneratePicker()
b.cc.UpdateState(balancer.State{
ConnectivityState: b.state,
Picker: b.picker,
})
}

func (b *nonceBalancer) UpdateClientConnState(s balancer.ClientConnState) error {
// TODO: handle s.ResolverState.ServiceConfig?
if logger.V(2) {
logger.Info("noncebalancer: got new ClientConn state: ", s)
}
// Successful resolution; clear resolver error and ensure we return nil.
b.resolverErr = nil
// addrsSet is the set converted from addrs, it's used for quick lookup of an address.
addrsSet := resolver.NewAddressMapV2[any]()
for _, a := range s.ResolverState.Addresses {
addrsSet.Set(a, nil)
if _, ok := b.subConns.Get(a); !ok {
// a is a new address (not existing in b.subConns).
var sc balancer.SubConn
opts := balancer.NewSubConnOptions{
HealthCheckEnabled: b.config.HealthCheck,
StateListener: func(scs balancer.SubConnState) { b.updateSubConnState(sc, scs) },
}
sc, err := b.cc.NewSubConn([]resolver.Address{a}, opts)
if err != nil {
logger.Warningf("noncebalancer: failed to create new SubConn: %v", err)
continue
}
b.subConns.Set(a, sc)
b.scStates[sc] = connectivity.Idle
b.csEvltr.RecordTransition(connectivity.Shutdown, connectivity.Idle)
sc.Connect()
}
}
for _, a := range b.subConns.Keys() {
sc, _ := b.subConns.Get(a)
// a was removed by resolver.
if _, ok := addrsSet.Get(a); !ok {
sc.Shutdown()
b.subConns.Delete(a)
// Keep the state of this sc in b.scStates until sc's state becomes Shutdown.
// The entry will be deleted in updateSubConnState.
}
}
// If resolver state contains no addresses, return an error so ClientConn
// will trigger re-resolve. Also records this as a resolver error, so when
// the overall state turns transient failure, the error message will have
// the zero address information.
if len(s.ResolverState.Addresses) == 0 {
b.ResolverError(errors.New("produced zero addresses"))
return balancer.ErrBadResolverState
}

b.regeneratePicker()
b.cc.UpdateState(balancer.State{ConnectivityState: b.state, Picker: b.picker})
return nil
}

// mergeErrors builds an error from the last connection error and the last
// resolver error. Must only be called if b.state is TransientFailure.
func (b *nonceBalancer) mergeErrors() error {
// connErr must always be non-nil unless there are no SubConns, in which
// case resolverErr must be non-nil.
if b.connErr == nil {
return fmt.Errorf("last resolver error: %v", b.resolverErr)
}
if b.resolverErr == nil {
return fmt.Errorf("last connection error: %v", b.connErr)
}
return fmt.Errorf("last connection error: %v; last resolver error: %v", b.connErr, b.resolverErr)
}

// regeneratePicker takes a snapshot of the balancer, and generates a picker
// from it. The picker is
// - errPicker if the balancer is in TransientFailure,
// - a nonce picker with all READY SubConns and all known SubConns otherwise.
//
// This is the only method that differs from baseBalancer: it builds both a
// READY set and a not-READY set from b.subConns. baseBalancer only builds the
// READY set.
func (b *nonceBalancer) regeneratePicker() {
if b.state == connectivity.TransientFailure {
b.picker = base.NewErrPicker(b.mergeErrors())
return
}
readySCs := make(map[balancer.SubConn]resolver.Address)
notReadySCs := make(map[balancer.SubConn]resolver.Address)

for _, addr := range b.subConns.Keys() {
sc, _ := b.subConns.Get(addr)
if st, ok := b.scStates[sc]; ok && st == connectivity.Ready {
readySCs[sc] = addr
} else {
notReadySCs[sc] = addr
}
}
b.picker = &picker{
readyBackends: readySCs,
notReadyBackends: notReadySCs,
}
}

// UpdateSubConnState is a nop because a StateListener is always set in NewSubConn.
func (b *nonceBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
logger.Errorf("noncebalancer: UpdateSubConnState(%v, %+v) called unexpectedly", sc, state)
}

func (b *nonceBalancer) updateSubConnState(sc balancer.SubConn, state balancer.SubConnState) {
s := state.ConnectivityState
if logger.V(2) {
logger.Infof("noncebalancer: handle SubConn state change: %p, %v", sc, s)
}
oldS, ok := b.scStates[sc]
if !ok {
if logger.V(2) {
logger.Infof("noncebalancer: got state changes for an unknown SubConn: %p, %v", sc, s)
}
return
}
if oldS == connectivity.TransientFailure &&
(s == connectivity.Connecting || s == connectivity.Idle) {
// Once a subconn enters TRANSIENT_FAILURE, ignore subsequent IDLE or
// CONNECTING transitions to prevent the aggregated state from being
// always CONNECTING when many backends exist but are all down.
if s == connectivity.Idle {
sc.Connect()
}
return
}
b.scStates[sc] = s
switch s {
case connectivity.Idle:
sc.Connect()
case connectivity.Shutdown:
// When an address was removed by resolver, b called Shutdown but kept
// the sc's state in scStates. Remove state for this sc here.
delete(b.scStates, sc)
case connectivity.TransientFailure:
// Save error to be reported via picker.
b.connErr = state.ConnectionError
}

b.state = b.csEvltr.RecordTransition(oldS, s)

// Regenerate picker when one of the following happens:
// - this sc entered or left ready
// - the aggregated state of balancer is TransientFailure
// (may need to update error message)
if (s == connectivity.Ready) != (oldS == connectivity.Ready) ||
b.state == connectivity.TransientFailure {
b.regeneratePicker()
}
b.cc.UpdateState(balancer.State{ConnectivityState: b.state, Picker: b.picker})
}

// Close is a nop because base balancer doesn't have internal state to clean up,
// and it doesn't need to call Shutdown for the SubConns.
func (b *nonceBalancer) Close() {
}

// ExitIdle is a nop because the base balancer attempts to stay connected to
// all SubConns at all times.
func (b *nonceBalancer) ExitIdle() {
}
59 changes: 59 additions & 0 deletions grpc/noncebalancerv2/noncebalancer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package noncebalancerv2

import (
"google.golang.org/grpc/balancer"
"google.golang.org/grpc/balancer/base"
"google.golang.org/grpc/connectivity"
"google.golang.org/grpc/resolver"
)

const (
// Name is the name used to register the nonce balancer with the gRPC
// runtime.
Name = "noncev2"

// SRVResolverScheme is the scheme used to invoke an instance of the SRV
// resolver which will use the noncebalancer to pick backends. It would be
// ideal to export this from the SRV resolver package but that package is
// internal.
SRVResolverScheme = "nonce-srv-v2"
)

type builder struct {
name string
config base.Config
}

// NewBalancerBuilder returns a nonce balancer builder configured by the
// provided config.
func NewBalancerBuilder(name string, config base.Config) balancer.Builder {
return &builder{
name: name,
config: config,
}
}

func (bb *builder) Build(cc balancer.ClientConn, _ balancer.BuildOptions) balancer.Balancer {
bal := &nonceBalancer{
cc: cc,

subConns: resolver.NewAddressMapV2[balancer.SubConn](),
scStates: make(map[balancer.SubConn]connectivity.State),
csEvltr: &balancer.ConnectivityStateEvaluator{},
config: bb.config,
state: connectivity.Connecting,
}
// Initialize picker to a picker that always returns
// ErrNoSubConnAvailable, because when state of a SubConn changes, we
// may call UpdateState with this picker.
bal.picker = base.NewErrPicker(balancer.ErrNoSubConnAvailable)
return bal
}

func (bb *builder) Name() string {
return bb.name
}

func init() {
balancer.Register(NewBalancerBuilder(Name, base.Config{}))
}
Loading
Loading