diff --git a/changelogs/current.yaml b/changelogs/current.yaml index 0fc3636aa24fb..8072da720a6fd 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -43,6 +43,11 @@ minor_behavior_changes: graceful close against the reset. This behavioral change can be temporarily reverted by setting runtime guard ``envoy.reloadable_features.ssl_socket_report_connection_reset`` to ``false``. +- area: http2 + change: | + The GOAWAY load shed point is fixed to use a graceful two-phase shutdown sequence, to avoid + risk to client traffic. This behavioral change can be temporarily reverted by setting runtime + guard ``envoy.reloadable_features.http2_fix_goaway_loadshed_point`` to ``false``. bug_fixes: # *Changes expected to improve the state of the world and are unlikely to have negative effects* diff --git a/source/common/http/conn_manager_impl.cc b/source/common/http/conn_manager_impl.cc index 77e44fe6fae18..d6940a569b90d 100644 --- a/source/common/http/conn_manager_impl.cc +++ b/source/common/http/conn_manager_impl.cc @@ -133,6 +133,10 @@ ConnectionManagerImpl::ConnectionManagerImpl( overload_manager.getLoadShedPoint(Server::LoadShedPointName::get().HcmDecodeHeaders)), hcm_ondata_creating_codec_( overload_manager.getLoadShedPoint(Server::LoadShedPointName::get().HcmCodecCreation)), + should_send_go_away_on_dispatch_(overload_manager.getLoadShedPoint( + Server::LoadShedPointName::get().H2ServerGoAwayOnDispatch)), + should_send_go_away_and_close_on_dispatch_(overload_manager.getLoadShedPoint( + Server::LoadShedPointName::get().H2ServerGoAwayAndCloseOnDispatch)), overload_stop_accepting_requests_ref_( overload_state_.getState(Server::OverloadActionNames::get().StopAcceptingRequests)), overload_disable_keepalive_ref_( @@ -155,6 +159,13 @@ ConnectionManagerImpl::ConnectionManagerImpl( ENVOY_LOG_ONCE_IF(trace, hcm_ondata_creating_codec_ == nullptr, "LoadShedPoint envoy.load_shed_points.hcm_ondata_creating_codec is not found. " "Is it configured?"); + ENVOY_LOG_ONCE_IF(trace, should_send_go_away_on_dispatch_ == nullptr, + "LoadShedPoint envoy.load_shed_points.http2_server_go_away_on_dispatch is not " + "found. Is it configured?"); + ENVOY_LOG_ONCE_IF( + trace, should_send_go_away_and_close_on_dispatch_ == nullptr, + "LoadShedPoint envoy.load_shed_points.http2_server_go_away_and_close_on_dispatch is not " + "found. Is it configured?"); } const ResponseHeaderMap& ConnectionManagerImpl::continueHeader() { @@ -514,6 +525,20 @@ Network::FilterStatus ConnectionManagerImpl::onData(Buffer::Instance& data, bool createCodec(data); } + if (Runtime::runtimeFeatureEnabled("envoy.reloadable_features.http2_fix_goaway_loadshed_point")) { + if (should_send_go_away_and_close_on_dispatch_ != nullptr && + should_send_go_away_and_close_on_dispatch_->shouldShedLoad()) { + sendGoAwayAndClose(/*graceful=*/false); + handleCodecOverloadError( + "Load shed point http2_server_go_away_and_close_on_dispatch triggered"); + return Network::FilterStatus::StopIteration; + } + if (should_send_go_away_on_dispatch_ != nullptr && + should_send_go_away_on_dispatch_->shouldShedLoad()) { + sendGoAwayAndClose(/*graceful=*/true); + } + } + bool redispatch; do { redispatch = false; @@ -794,7 +819,7 @@ void ConnectionManagerImpl::onDrainTimeout() { } void ConnectionManagerImpl::sendGoAwayAndClose(bool graceful) { - ENVOY_CONN_LOG(trace, "connection manager sendGoAwayAndClose was triggerred from filters.", + ENVOY_CONN_LOG(trace, "connection manager sendGoAwayAndClose was triggered.", read_callbacks_->connection()); if (go_away_sent_) { return; diff --git a/source/common/http/conn_manager_impl.h b/source/common/http/conn_manager_impl.h index 0077c748672ad..f8e17265a1516 100644 --- a/source/common/http/conn_manager_impl.h +++ b/source/common/http/conn_manager_impl.h @@ -665,6 +665,8 @@ class ConnectionManagerImpl : Logger::Loggable, Server::ThreadLocalOverloadState& overload_state_; Server::LoadShedPoint* accept_new_http_stream_{nullptr}; Server::LoadShedPoint* hcm_ondata_creating_codec_{nullptr}; + Server::LoadShedPoint* should_send_go_away_on_dispatch_{nullptr}; + Server::LoadShedPoint* should_send_go_away_and_close_on_dispatch_{nullptr}; // References into the overload manager thread local state map. Using these lets us avoid a // map lookup in the hot path of processing each request. const Server::OverloadActionState& overload_stop_accepting_requests_ref_; diff --git a/source/common/http/http2/codec_impl.cc b/source/common/http/http2/codec_impl.cc index e02141e75fa60..f1e2005117638 100644 --- a/source/common/http/http2/codec_impl.cc +++ b/source/common/http/http2/codec_impl.cc @@ -2452,17 +2452,20 @@ int ServerConnectionImpl::onHeader(int32_t stream_id, HeaderString&& name, Heade Http::Status ServerConnectionImpl::dispatch(Buffer::Instance& data) { // Make sure downstream outbound queue was not flooded by the upstream frames. RETURN_IF_ERROR(protocol_constraints_.checkOutboundFrameLimits()); - if (should_send_go_away_and_close_on_dispatch_ != nullptr && - should_send_go_away_and_close_on_dispatch_->shouldShedLoad()) { - ConnectionImpl::goAway(); - sent_go_away_on_dispatch_ = true; - return envoyOverloadError( - "Load shed point http2_server_go_away_and_close_on_dispatch triggered"); - } - if (should_send_go_away_on_dispatch_ != nullptr && !sent_go_away_on_dispatch_ && - should_send_go_away_on_dispatch_->shouldShedLoad()) { - ConnectionImpl::goAway(); - sent_go_away_on_dispatch_ = true; + if (!Runtime::runtimeFeatureEnabled( + "envoy.reloadable_features.http2_fix_goaway_loadshed_point")) { + if (should_send_go_away_and_close_on_dispatch_ != nullptr && + should_send_go_away_and_close_on_dispatch_->shouldShedLoad()) { + ConnectionImpl::goAway(); + sent_go_away_on_dispatch_ = true; + return envoyOverloadError( + "Load shed point http2_server_go_away_and_close_on_dispatch triggered"); + } + if (should_send_go_away_on_dispatch_ != nullptr && !sent_go_away_on_dispatch_ && + should_send_go_away_on_dispatch_->shouldShedLoad()) { + ConnectionImpl::goAway(); + sent_go_away_on_dispatch_ = true; + } } return ConnectionImpl::dispatch(data); } diff --git a/source/common/http/http2/codec_impl.h b/source/common/http/http2/codec_impl.h index 20a620efdc2d9..20288fb3c7e70 100644 --- a/source/common/http/http2/codec_impl.h +++ b/source/common/http/http2/codec_impl.h @@ -885,6 +885,7 @@ class ServerConnectionImpl : public ServerConnection, public ConnectionImpl { // The action to take when a request header name contains underscore characters. envoy::config::core::v3::HttpProtocolOptions::HeadersWithUnderscoresAction headers_with_underscores_action_; + // Remove when removing runtime feature `http2_fix_goaway_loadshed_point`. Server::LoadShedPoint* should_send_go_away_on_dispatch_{nullptr}; Server::LoadShedPoint* should_send_go_away_and_close_on_dispatch_{nullptr}; bool sent_go_away_on_dispatch_{false}; diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc index cb461bd701b33..c609e90fb9f85 100644 --- a/source/common/runtime/runtime_features.cc +++ b/source/common/runtime/runtime_features.cc @@ -65,6 +65,7 @@ RUNTIME_GUARD(envoy_reloadable_features_health_check_after_cluster_warming); RUNTIME_GUARD(envoy_reloadable_features_hide_transport_failure_reason_in_response_body); RUNTIME_GUARD(envoy_reloadable_features_http1_close_connection_on_zombie_stream_complete); RUNTIME_GUARD(envoy_reloadable_features_http2_discard_host_header); +RUNTIME_GUARD(envoy_reloadable_features_http2_fix_goaway_loadshed_point); RUNTIME_GUARD(envoy_reloadable_features_http_async_client_retry_respect_buffer_limits); // Delay deprecation and decommission until UHV is enabled. RUNTIME_GUARD(envoy_reloadable_features_http_reject_path_with_fragment); diff --git a/test/common/http/conn_manager_impl_test_3.cc b/test/common/http/conn_manager_impl_test_3.cc index 2e5affa1f9969..cbb50fa3838de 100644 --- a/test/common/http/conn_manager_impl_test_3.cc +++ b/test/common/http/conn_manager_impl_test_3.cc @@ -848,6 +848,12 @@ TEST_F(HttpConnectionManagerImplTest, CodecCreationLoadShedPointCanCloseConnecti EXPECT_CALL(overload_manager_, getLoadShedPoint(Server::LoadShedPointName::get().HcmDecodeHeaders)) .WillOnce(Return(nullptr)); + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().H2ServerGoAwayOnDispatch)) + .WillOnce(Return(nullptr)); + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().H2ServerGoAwayAndCloseOnDispatch)) + .WillOnce(Return(nullptr)); setup(); @@ -874,6 +880,12 @@ TEST_F(HttpConnectionManagerImplTest, CodecCreationLoadShedPointBypasscheck) { EXPECT_CALL(overload_manager_, getLoadShedPoint(Server::LoadShedPointName::get().HttpDownstreamFilterCheck)) .WillOnce(Return(nullptr)); + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().H2ServerGoAwayOnDispatch)) + .WillOnce(Return(nullptr)); + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().H2ServerGoAwayAndCloseOnDispatch)) + .WillOnce(Return(nullptr)); setup(); @@ -904,6 +916,12 @@ TEST_F(HttpConnectionManagerImplTest, DecodeHeaderLoadShedPointCanRejectNewStrea EXPECT_CALL(overload_manager_, getLoadShedPoint(Server::LoadShedPointName::get().HttpDownstreamFilterCheck)) .WillRepeatedly(Return(nullptr)); + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().H2ServerGoAwayOnDispatch)) + .WillOnce(Return(nullptr)); + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().H2ServerGoAwayAndCloseOnDispatch)) + .WillOnce(Return(nullptr)); setup(); setupFilterChain(1, 0); @@ -938,6 +956,91 @@ TEST_F(HttpConnectionManagerImplTest, DecodeHeaderLoadShedPointCanRejectNewStrea ResponseHeaderMapPtr{new TestResponseHeaderMapImpl{{":status", "200"}}}, true, "details"); } +TEST_F(HttpConnectionManagerImplTest, GoAwayLoadShedPoint) { + if (!Runtime::runtimeFeatureEnabled( + "envoy.reloadable_features.http2_fix_goaway_loadshed_point")) { + GTEST_SKIP(); + } + Server::MockLoadShedPoint goaway_point; + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().HcmDecodeHeaders)) + .WillOnce(Return(nullptr)); + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().HcmCodecCreation)) + .WillOnce(Return(nullptr)); + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().HttpDownstreamFilterCheck)) + .WillRepeatedly(Return(nullptr)); + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().H2ServerGoAwayOnDispatch)) + .WillOnce(Return(&goaway_point)); + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().H2ServerGoAwayAndCloseOnDispatch)) + .WillOnce(Return(nullptr)); + + setup(); + setupFilterChain(1, 0); + + EXPECT_CALL(goaway_point, shouldShedLoad()).WillOnce(Return(true)); + + EXPECT_CALL(response_encoder_, encodeHeaders(_, true)) + .WillOnce(Invoke([](const ResponseHeaderMap& headers, bool) -> void { + EXPECT_EQ("200", headers.getStatusValue()); + })); + + // The client will see a warning GOAWAY, but no final GOAWAY. + EXPECT_CALL(*codec_, shutdownNotice()); + EXPECT_CALL(*codec_, goAway()).Times(0); + + startRequest(); + + // Clean up. + expectOnDestroy(); + + decoder_filters_[0]->callbacks_->streamInfo().setResponseCodeDetails(""); + decoder_filters_[0]->callbacks_->encodeHeaders( + ResponseHeaderMapPtr{new TestResponseHeaderMapImpl{{":status", "200"}}}, true, "details"); +} + +TEST_F(HttpConnectionManagerImplTest, GoAwayAndCloseLoadShedPoint) { + if (!Runtime::runtimeFeatureEnabled( + "envoy.reloadable_features.http2_fix_goaway_loadshed_point")) { + GTEST_SKIP(); + } + Server::MockLoadShedPoint goaway_and_close_point; + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().HcmDecodeHeaders)) + .WillOnce(Return(nullptr)); + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().HcmCodecCreation)) + .WillOnce(Return(nullptr)); + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().HttpDownstreamFilterCheck)) + .WillRepeatedly(Return(nullptr)); + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().H2ServerGoAwayOnDispatch)) + .WillOnce(Return(nullptr)); + EXPECT_CALL(overload_manager_, + getLoadShedPoint(Server::LoadShedPointName::get().H2ServerGoAwayAndCloseOnDispatch)) + .WillOnce(Return(&goaway_and_close_point)); + + setup(); + + EXPECT_CALL(goaway_and_close_point, shouldShedLoad()).WillOnce(Return(true)); + + EXPECT_CALL(response_encoder_, encodeHeaders(_, _)).Times(0); + + // The client will see the warning GOAWAY and final GOAWAY immediately afterward. + EXPECT_CALL(*codec_, shutdownNotice()); + EXPECT_CALL(*codec_, goAway()); + + // Gives the connection manager an event to handle. + Buffer::OwnedImpl fake_input; + conn_manager_->onData(fake_input, false); + + // No cleanup because the filter chain is not even created. +} + TEST_F(HttpConnectionManagerImplTest, TestStopAllIterationAndBufferOnDecodingPathFirstFilter) { setup(SetupOpts().setTracing(false)); setUpEncoderAndDecoder(true, true); diff --git a/test/common/http/http2/codec_impl_test.cc b/test/common/http/http2/codec_impl_test.cc index c9808854c1f1b..657270b201f26 100644 --- a/test/common/http/http2/codec_impl_test.cc +++ b/test/common/http/http2/codec_impl_test.cc @@ -4592,6 +4592,7 @@ TEST_P(Http2CodecImplTest, ChunkProcessingShouldNotScheduleIfReadDisabled) { } TEST_P(Http2CodecImplTest, ServerDispatchLoadShedPointCanCauseServerToSendGoAway) { + Runtime::maybeSetRuntimeGuard("envoy.reloadable_features.http2_fix_goaway_loadshed_point", false); initialize(); ASSERT_EQ(0, server_stats_store_.counter("http2.goaway_sent").value()); @@ -4613,6 +4614,7 @@ TEST_P(Http2CodecImplTest, ServerDispatchLoadShedPointCanCauseServerToSendGoAway } TEST_P(Http2CodecImplTest, ServerDispatchLoadShedPointSendGoAwayAndClose) { + Runtime::maybeSetRuntimeGuard("envoy.reloadable_features.http2_fix_goaway_loadshed_point", false); expect_buffered_data_on_teardown_ = true; initialize(); @@ -4633,6 +4635,8 @@ TEST_P(Http2CodecImplTest, ServerDispatchLoadShedPointSendGoAwayAndClose) { } TEST_P(Http2CodecImplTest, ServerDispatchLoadShedPointsAreOnlyConsultedOncePerDispatch) { + Runtime::maybeSetRuntimeGuard("envoy.reloadable_features.http2_fix_goaway_loadshed_point", false); + initialize(); int times_shed_load_goaway_invoked = 0; diff --git a/test/integration/BUILD b/test/integration/BUILD index 4d090d4b32eeb..510cff82d0bfc 100644 --- a/test/integration/BUILD +++ b/test/integration/BUILD @@ -1845,7 +1845,7 @@ envoy_cc_test( size = "large", srcs = ["overload_integration_test.cc"], rbe_pool = "6gig", - shard_count = 4, + shard_count = 8, tags = [ "cpu:3", ], diff --git a/test/integration/overload_integration_test.cc b/test/integration/overload_integration_test.cc index 693607e47d7bf..edd8574013898 100644 --- a/test/integration/overload_integration_test.cc +++ b/test/integration/overload_integration_test.cc @@ -1208,6 +1208,11 @@ TEST_P(LoadShedPointIntegrationTest, Http2ServerDispatchSendsGoAwayCompletingPen if (downstreamProtocol() != Http::CodecClient::Type::HTTP2) { return; } + config_helper_.addConfigModifier( + [=](envoy::extensions::filters::network::http_connection_manager::v3::HttpConnectionManager& + cm) -> void { + cm.mutable_drain_timeout()->MergeFrom(ProtobufUtil::TimeUtil::SecondsToDuration(7)); + }); autonomous_upstream_ = true; initializeOverloadManager( TestUtility::parseYaml(R"EOF( @@ -1236,13 +1241,24 @@ TEST_P(LoadShedPointIntegrationTest, Http2ServerDispatchSendsGoAwayCompletingPen first_request_encoder.encodeData(first_request_body, true); ASSERT_TRUE(first_request_decoder->waitForEndStream()); + // This is the initial GOAWAY, with max stream ID. EXPECT_TRUE(codec_client_->sawGoAway()); + + // This waits for the final GOAWAY, with a real stream ID. test_server_->waitForCounter("http2.goaway_sent", Eq(1)); - // The GOAWAY gets submitted with the first created stream as the last stream - // that will be processed on this connection, so the second stream's frames - // are ignored. - EXPECT_FALSE(second_request_decoder->complete()); + if (Runtime::runtimeFeatureEnabled("envoy.reloadable_features.http2_fix_goaway_loadshed_point")) { + // Because the load shed operation uses a two-phase GOAWAY, a request initiated before the drain + // timer fires will be processed as usual. + EXPECT_TRUE(second_request_decoder->waitForEndStream()); + + ASSERT_TRUE(codec_client_->waitForDisconnect()); + } else { + // The GOAWAY gets submitted with the first created stream as the last stream + // that will be processed on this connection, so the second stream's frames + // are ignored. + EXPECT_FALSE(second_request_decoder->complete()); + } updateResource(0.80); test_server_->waitForGauge( @@ -1282,9 +1298,9 @@ TEST_P(LoadShedPointIntegrationTest, Http2ServerDispatchSendsGoAwayAndClosesConn ASSERT_TRUE(codec_client_->waitForDisconnect()); EXPECT_TRUE(codec_client_->sawGoAway()); test_server_->waitForCounter("http2.goaway_sent", Eq(1)); - test_server_->waitForCounter("http.config_test.downstream_rq_overload_close", Eq(1)); - // The second request will not complete. + // The second request is ignored and will not complete, since the connection manager stops network + // filter iteration. EXPECT_FALSE(second_request_decoder->complete()); }