Skip to content

Commit 6b01139

Browse files
committed
Renew gRPC and REST connections after max_connection_age
1 parent 2ecbf4f commit 6b01139

11 files changed

Lines changed: 439 additions & 24 deletions

File tree

config/quickwit.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ version: 0.8
4949
# # How often cert_path/key_path are polled for changes and hot-reloaded; an
5050
# # immediate reload can also be triggered with SIGHUP. Defaults to 5m.
5151
# cert_reload_interval: 5m
52+
# # Maximum lifetime of a connection before the server sends an HTTP/2 GOAWAY and the
53+
# # client reconnects. Disabled when unset.
54+
# max_connection_age: 30m
55+
# # Grace period after the GOAWAY before a still-draining connection is forcefully
56+
# # closed. Requires `max_connection_age` to be set.
57+
# max_connection_age_grace: 30s
5258
#
5359
# Optional plaintext health-check server. Disabled unless `listen_port` is set (or the
5460
# `QW_HEALTH_LISTEN_PORT` environment variable). It serves only `/health/livez` and
@@ -75,6 +81,12 @@ version: 0.8
7581
# # How often cert_path/key_path are polled for changes and hot-reloaded; an
7682
# # immediate reload can also be triggered with SIGHUP. Defaults to 5m.
7783
# cert_reload_interval: 5m
84+
# # Maximum lifetime of an inbound connection before the server sends an HTTP/2 GOAWAY
85+
# # and the peer reconnects. Disabled when unset.
86+
# max_connection_age: 30m
87+
# # Grace period after the GOAWAY before a still-draining connection is forcefully
88+
# # closed. Requires `max_connection_age` to be set.
89+
# max_connection_age_grace: 30s
7890
#
7991
# IP address advertised by the node, i.e. the IP address that peer nodes should use to connect to the node for RPCs.
8092
# The environment variable `QW_ADVERTISE_ADDRESS` can also be used to override this value.

quickwit/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

quickwit/quickwit-config/resources/tests/node_config/quickwit.json

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,30 @@
2424
"extra_headers": {
2525
"x-header-1": "header-value-1",
2626
"x-header-2": "header-value-2"
27-
}
27+
},
28+
"tls": {
29+
"cert_path": "/path/to/rest.crt",
30+
"key_path": "/path/to/rest.key",
31+
"ca_path": "/path/to/ca.crt",
32+
"verify_client_cert": true
33+
},
34+
"max_connection_age": "30m",
35+
"max_connection_age_grace": "30s"
2836
},
2937
"health": {
3038
"listen_port": 4444
3139
},
3240
"grpc": {
33-
"max_message_size": "10 MB"
41+
"max_message_size": "10 MB",
42+
"tls": {
43+
"cert_path": "/path/to/grpc.crt",
44+
"key_path": "/path/to/grpc.key",
45+
"ca_path": "/path/to/ca.crt",
46+
"verify_client_cert": true,
47+
"expected_name": "quickwit.local"
48+
},
49+
"max_connection_age": "1h",
50+
"max_connection_age_grace": "10s"
3451
},
3552
"storage": {
3653
"azure": {

quickwit/quickwit-config/resources/tests/node_config/quickwit.toml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,33 @@ default_index_root_uri = "s3://quickwit-indexes"
1515

1616
[rest]
1717
listen_port = 1111
18+
max_connection_age = "30m"
19+
max_connection_age_grace = "30s"
1820

1921
[rest.extra_headers]
2022
x-header-1 = "header-value-1"
2123
x-header-2 = "header-value-2"
2224

25+
[rest.tls]
26+
cert_path = "/path/to/rest.crt"
27+
key_path = "/path/to/rest.key"
28+
ca_path = "/path/to/ca.crt"
29+
verify_client_cert = true
30+
2331
[health]
2432
listen_port = 4444
2533

2634
[grpc]
2735
max_message_size = "10 MB"
36+
max_connection_age = "1h"
37+
max_connection_age_grace = "10s"
38+
39+
[grpc.tls]
40+
cert_path = "/path/to/grpc.crt"
41+
key_path = "/path/to/grpc.key"
42+
ca_path = "/path/to/ca.crt"
43+
verify_client_cert = true
44+
expected_name = "quickwit.local"
2845

2946
[storage.azure]
3047
account = "quickwit-dev"

quickwit/quickwit-config/resources/tests/node_config/quickwit.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,27 @@ rest:
2222
extra_headers:
2323
x-header-1: header-value-1
2424
x-header-2: header-value-2
25+
tls:
26+
cert_path: /path/to/rest.crt
27+
key_path: /path/to/rest.key
28+
ca_path: /path/to/ca.crt
29+
verify_client_cert: true
30+
max_connection_age: 30m
31+
max_connection_age_grace: 30s
2532

2633
health:
2734
listen_port: 4444
2835

2936
grpc:
3037
max_message_size: 10 MB
38+
tls:
39+
cert_path: /path/to/grpc.crt
40+
key_path: /path/to/grpc.key
41+
ca_path: /path/to/ca.crt
42+
verify_client_cert: true
43+
expected_name: quickwit.local
44+
max_connection_age: 1h
45+
max_connection_age_grace: 10s
3146

3247
storage:
3348
azure:

quickwit/quickwit-config/src/node_config/mod.rs

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,13 @@ pub struct RestConfig {
5252
pub extra_headers: HeaderMap,
5353
#[serde(default, rename = "tls")]
5454
pub tls_config: Option<TlsConfig>,
55+
// See `GrpcConfig::max_connection_age`. Closes long-lived keep-alive connections so an updated
56+
// TLS certificate is eventually presented.
57+
#[serde(default, skip_serializing_if = "Option::is_none")]
58+
pub max_connection_age: Option<HumanDuration>,
59+
// See `GrpcConfig::max_connection_age_grace`.
60+
#[serde(default, skip_serializing_if = "Option::is_none")]
61+
pub max_connection_age_grace: Option<HumanDuration>,
5562
}
5663

5764
/// Configuration for the optional plaintext health-check HTTP server.
@@ -77,6 +84,14 @@ pub struct GrpcConfig {
7784
// keep alive ping request.
7885
#[serde(default, skip_serializing_if = "Option::is_none")]
7986
pub keep_alive: Option<KeepAliveConfig>,
87+
// Maximum lifetime of an inbound connection before the server sends an HTTP/2 GOAWAY and the
88+
// peer reconnects. Disabled when unset.
89+
#[serde(default, skip_serializing_if = "Option::is_none")]
90+
pub max_connection_age: Option<HumanDuration>,
91+
// Grace period after the GOAWAY before a still-draining connection is forcefully closed.
92+
// Requires `max_connection_age` to be set. Waits indefinitely when unset.
93+
#[serde(default, skip_serializing_if = "Option::is_none")]
94+
pub max_connection_age_grace: Option<HumanDuration>,
8095
}
8196

8297
fn default_http2_keep_alive_interval() -> HumanDuration {
@@ -116,6 +131,10 @@ impl GrpcConfig {
116131
if let Some(tls_config) = &self.tls_config {
117132
tls_config.validate()?;
118133
}
134+
ensure!(
135+
!(self.max_connection_age_grace.is_some() && self.max_connection_age.is_none()),
136+
"`grpc.max_connection_age_grace` requires `grpc.max_connection_age` to be set"
137+
);
119138
Ok(())
120139
}
121140
}
@@ -126,6 +145,8 @@ impl Default for GrpcConfig {
126145
max_message_size: Self::default_max_message_size(),
127146
tls_config: None,
128147
keep_alive: None,
148+
max_connection_age: None,
149+
max_connection_age_grace: None,
129150
}
130151
}
131152
}
@@ -1065,19 +1086,37 @@ mod tests {
10651086
fn test_grpc_config_validate() {
10661087
let grpc_config = GrpcConfig {
10671088
max_message_size: ByteSize::mb(1),
1068-
tls_config: None,
1069-
keep_alive: None,
1089+
..Default::default()
10701090
};
10711091
assert!(grpc_config.validate().is_ok());
10721092

10731093
let grpc_config = GrpcConfig {
10741094
max_message_size: ByteSize::kb(1),
1075-
tls_config: None,
1076-
keep_alive: None,
1095+
..Default::default()
10771096
};
10781097
assert!(grpc_config.validate().is_err());
10791098
}
10801099

1100+
#[test]
1101+
fn test_grpc_config_validate_rejects_connection_age_grace_without_age() {
1102+
let grpc_config = GrpcConfig {
1103+
max_connection_age_grace: Some(HumanDuration::try_from("10s".to_string()).unwrap()),
1104+
..Default::default()
1105+
};
1106+
let error = grpc_config.validate().unwrap_err().to_string();
1107+
assert!(
1108+
error.contains("requires `grpc.max_connection_age`"),
1109+
"unexpected error: {error}"
1110+
);
1111+
1112+
let grpc_config = GrpcConfig {
1113+
max_connection_age: Some(HumanDuration::try_from("1h".to_string()).unwrap()),
1114+
max_connection_age_grace: Some(HumanDuration::try_from("10s".to_string()).unwrap()),
1115+
..Default::default()
1116+
};
1117+
assert!(grpc_config.validate().is_ok());
1118+
}
1119+
10811120
fn tls_config(reload_interval: &str) -> TlsConfig {
10821121
TlsConfig {
10831122
cert_path: "/path/to/server.crt".to_string(),
@@ -1105,7 +1144,7 @@ mod tests {
11051144
let grpc_config = GrpcConfig {
11061145
max_message_size: ByteSize::mib(20),
11071146
tls_config: Some(tls_config("0s")),
1108-
keep_alive: None,
1147+
..Default::default()
11091148
};
11101149
assert!(grpc_config.validate().is_err());
11111150
}

quickwit/quickwit-config/src/node_config/serialize.rs

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use std::net::{IpAddr, SocketAddr};
1717
use std::str::FromStr;
1818
use std::time::Duration;
1919

20-
use anyhow::{Context, bail};
20+
use anyhow::{Context, bail, ensure};
2121
use bytesize::ByteSize;
2222
use http::HeaderMap;
2323
use quickwit_common::fs::get_disk_size;
@@ -31,6 +31,7 @@ use tracing::{info, warn};
3131
use super::{GrpcConfig, HealthConfig, RestConfig};
3232
use crate::config_value::ConfigValue;
3333
use crate::qw_env_vars::*;
34+
use crate::serde_utils::HumanDuration;
3435
use crate::service::QuickwitService;
3536
use crate::storage_config::StorageConfigs;
3637
use crate::templating::render_config;
@@ -457,6 +458,10 @@ struct RestConfigBuilder {
457458
pub extra_headers: HeaderMap,
458459
#[serde(default, rename = "tls")]
459460
pub tls_config: Option<TlsConfig>,
461+
#[serde(default, skip_serializing_if = "Option::is_none")]
462+
pub max_connection_age: Option<HumanDuration>,
463+
#[serde(default, skip_serializing_if = "Option::is_none")]
464+
pub max_connection_age_grace: Option<HumanDuration>,
460465
}
461466

462467
impl RestConfigBuilder {
@@ -475,11 +480,17 @@ impl RestConfigBuilder {
475480
if let Some(tls_config) = &self.tls_config {
476481
tls_config.validate()?;
477482
}
483+
ensure!(
484+
!(self.max_connection_age_grace.is_some() && self.max_connection_age.is_none()),
485+
"`rest.max_connection_age_grace` requires `rest.max_connection_age` to be set"
486+
);
478487
let rest_config = RestConfig {
479488
listen_addr: SocketAddr::new(listen_ip, listen_port),
480489
cors_allow_origins: self.cors_allow_origins,
481490
extra_headers: self.extra_headers,
482491
tls_config: self.tls_config,
492+
max_connection_age: self.max_connection_age,
493+
max_connection_age_grace: self.max_connection_age_grace,
483494
};
484495
Ok(rest_config)
485496
}
@@ -550,6 +561,8 @@ pub fn node_config_for_tests_from_ports(
550561
cors_allow_origins: Vec::new(),
551562
extra_headers: HeaderMap::new(),
552563
tls_config: None,
564+
max_connection_age: None,
565+
max_connection_age_grace: None,
553566
};
554567
NodeConfig {
555568
cluster_id: default_cluster_id().unwrap(),
@@ -624,7 +637,46 @@ mod tests {
624637
config.rest_config.extra_headers.get("x-header-2").unwrap(),
625638
"header-value-2"
626639
);
640+
assert_eq!(
641+
config.rest_config.tls_config,
642+
Some(TlsConfig {
643+
cert_path: "/path/to/rest.crt".to_string(),
644+
key_path: "/path/to/rest.key".to_string(),
645+
ca_path: "/path/to/ca.crt".to_string(),
646+
expected_name: None,
647+
verify_client_cert: true,
648+
cert_reload_interval: HumanDuration::try_from("5m".to_string()).unwrap(),
649+
})
650+
);
651+
assert_eq!(
652+
config.rest_config.max_connection_age,
653+
Some(HumanDuration::try_from("30m".to_string()).unwrap())
654+
);
655+
assert_eq!(
656+
config.rest_config.max_connection_age_grace,
657+
Some(HumanDuration::try_from("30s".to_string()).unwrap())
658+
);
659+
627660
assert_eq!(config.grpc_config.max_message_size, ByteSize::mb(10));
661+
assert_eq!(
662+
config.grpc_config.tls_config,
663+
Some(TlsConfig {
664+
cert_path: "/path/to/grpc.crt".to_string(),
665+
key_path: "/path/to/grpc.key".to_string(),
666+
ca_path: "/path/to/ca.crt".to_string(),
667+
expected_name: Some("quickwit.local".to_string()),
668+
verify_client_cert: true,
669+
cert_reload_interval: HumanDuration::try_from("5m".to_string()).unwrap(),
670+
})
671+
);
672+
assert_eq!(
673+
config.grpc_config.max_connection_age,
674+
Some(HumanDuration::try_from("1h".to_string()).unwrap())
675+
);
676+
assert_eq!(
677+
config.grpc_config.max_connection_age_grace,
678+
Some(HumanDuration::try_from("10s".to_string()).unwrap())
679+
);
628680

629681
assert_eq!(
630682
config

quickwit/quickwit-integration-tests/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ quickwit-parquet-engine = { workspace = true, optional = true }
5050
anyhow = { workspace = true }
5151
aws-sdk-sqs = { workspace = true }
5252
futures-util = { workspace = true }
53+
http-body-util = { workspace = true }
5354
hyper = { workspace = true }
5455
hyper-util = { workspace = true }
5556
itertools = { workspace = true }

0 commit comments

Comments
 (0)