Skip to content

Commit 218e736

Browse files
serrislewSerris Lew
andauthored
Add per_server.connection metrics (#12250)
* Add per_server_conn metrics * Add warning * add int64_t layout --------- Co-authored-by: Serris Lew <lserris@apple.com>
1 parent 773dcb1 commit 218e736

6 files changed

Lines changed: 179 additions & 20 deletions

File tree

include/iocore/net/ConnectionTracker.h

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "swoc/TextView.h"
4444
#include <tscore/MgmtDefs.h>
4545
#include "iocore/net/SessionSharingAPIEnums.h"
46+
#include "tsutil/Metrics.h"
4647

4748
/**
4849
* Singleton class to keep track of the number of inbound and outbound connections.
@@ -83,6 +84,8 @@ class ConnectionTracker
8384
struct GlobalConfig {
8485
std::chrono::seconds client_alert_delay{60}; ///< Alert delay in seconds.
8586
std::chrono::seconds server_alert_delay{60}; ///< Alert delay in seconds.
87+
bool metric_enabled{false}; ///< Enabling per server metrics.
88+
std::string metric_prefix; ///< Per server metric prefix.
8689
};
8790

8891
// The names of the configuration values.
@@ -93,6 +96,8 @@ class ConnectionTracker
9396
static constexpr std::string_view CONFIG_SERVER_VAR_MIN{"proxy.config.http.per_server.connection.min"};
9497
static constexpr std::string_view CONFIG_SERVER_VAR_MATCH{"proxy.config.http.per_server.connection.match"};
9598
static constexpr std::string_view CONFIG_SERVER_VAR_ALERT_DELAY{"proxy.config.http.per_server.connection.alert_delay"};
99+
static constexpr std::string_view CONFIG_SERVER_VAR_METRIC_ENABLED{"proxy.config.http.per_server.connection.metric_enabled"};
100+
static constexpr std::string_view CONFIG_SERVER_VAR_METRIC_PREFIX{"proxy.config.http.per_server.connection.metric_prefix"};
96101

97102
/// A record for the outbound connection count.
98103
/// These are stored per outbound session equivalence class, as determined by the session matching.
@@ -131,6 +136,11 @@ class ConnectionTracker
131136
std::atomic<int> _in_queue{0}; ///< # of connections queued, waiting for a connection.
132137
std::atomic<Ticker> _last_alert{0}; ///< Absolute time of the last alert.
133138

139+
// Recording data as metrics
140+
ts::Metrics::Gauge::AtomicType *_count_metric = nullptr;
141+
ts::Metrics::Counter::AtomicType *_count_total_metric = nullptr;
142+
ts::Metrics::Counter::AtomicType *_blocked_metric = nullptr;
143+
134144
/** Constructor.
135145
* Construct from @c Key because the use cases do a table lookup first so the @c Key is already constructed.
136146
* @param key A populated @c Key structure - values are copied to the @c Group.
@@ -149,7 +159,8 @@ class ConnectionTracker
149159
/// @return @c true if an alert should be generated, @c false otherwise.
150160
bool should_alert(std::time_t *lat = nullptr);
151161
/// Time of the last alert in epoch seconds.
152-
std::time_t get_last_alert_epoch_time() const;
162+
std::time_t get_last_alert_epoch_time() const;
163+
static std::string metric_name(const Key &key, std::string_view fqdn, std::string metric_prefix);
153164

154165
/// Release the reference count to this group and remove it from the
155166
/// group table if it is no longer referenced.
@@ -345,6 +356,31 @@ ConnectionTracker::Group::hash(const Key &key)
345356
}
346357
}
347358

359+
inline std::string
360+
ConnectionTracker::Group::metric_name(const Key &key, std::string_view fqdn, std::string metric_prefix)
361+
{
362+
std::string metric_name = "";
363+
char buf[INET6_ADDRSTRLEN];
364+
365+
switch (key._match_type) {
366+
case MATCH_IP:
367+
metric_name += ats_ip_ntop(&key._addr.sa, buf, sizeof(buf));
368+
break;
369+
case MATCH_PORT:
370+
metric_name += ats_ip_nptop(&key._addr.sa, buf, sizeof(buf));
371+
break;
372+
case MATCH_HOST:
373+
metric_name += std::string(fqdn);
374+
break;
375+
case MATCH_BOTH:
376+
metric_name += std::string(fqdn) + "." + ats_ip_nptop(&key._addr.sa, buf, sizeof(buf));
377+
break;
378+
default:
379+
Warning("Invalid matching type to add to per_server.connections metrics");
380+
}
381+
return metric_prefix.empty() ? metric_name : metric_prefix + "." + metric_name;
382+
}
383+
348384
inline bool
349385
ConnectionTracker::TxnState::is_active()
350386
{
@@ -355,6 +391,12 @@ inline int
355391
ConnectionTracker::TxnState::reserve()
356392
{
357393
_reserved_p = true;
394+
// If metric enabled, use metric as count
395+
if (_g->_count_metric != nullptr) {
396+
ts::Metrics::Gauge::increment(_g->_count_metric);
397+
ts::Metrics::Counter::increment(_g->_count_total_metric);
398+
return _g->_count_metric->load();
399+
}
358400
return ++_g->_count;
359401
}
360402

@@ -363,7 +405,12 @@ ConnectionTracker::TxnState::release()
363405
{
364406
if (_reserved_p) {
365407
_reserved_p = false;
366-
--_g->_count;
408+
// If metric enabled, use metric as count
409+
if (_g->_count_metric != nullptr) {
410+
ts::Metrics::Gauge::decrement(_g->_count_metric);
411+
} else {
412+
--_g->_count;
413+
}
367414
}
368415
}
369416

@@ -395,6 +442,9 @@ ConnectionTracker::TxnState::update_max_count(int count)
395442
inline void
396443
ConnectionTracker::TxnState::blocked()
397444
{
445+
if (_g->_blocked_metric != nullptr) {
446+
ts::Metrics::Counter::increment(_g->_blocked_metric);
447+
}
398448
++_g->_blocked;
399449
}
400450

src/iocore/net/ConnectionTracker.cc

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,30 @@ Config_Update_Conntrack_Client_Alert_Delay(const char *name, RecDataT dtype, Rec
149149
return Config_Update_Conntrack_Server_Alert_Delay_Helper(name, dtype, data, cookie, config->client_alert_delay);
150150
}
151151

152+
bool
153+
Config_Update_Conntrack_Metric_Enabled(const char * /* name ATS_UNUSED */, RecDataT dtype, RecData data, void *cookie)
154+
{
155+
auto config = static_cast<ConnectionTracker::GlobalConfig *>(cookie);
156+
157+
if (RECD_INT == dtype) {
158+
config->metric_enabled = data.rec_int;
159+
return true;
160+
}
161+
return false;
162+
}
163+
164+
bool
165+
Config_Update_Conntrack_Metric_Prefix(const char * /* name ATS_UNUSED */, RecDataT dtype, RecData data, void *cookie)
166+
{
167+
auto config = static_cast<ConnectionTracker::GlobalConfig *>(cookie);
168+
169+
if (RECD_STRING == dtype) {
170+
config->metric_prefix = data.rec_string;
171+
return true;
172+
}
173+
return false;
174+
}
175+
152176
// // helper function to build up a json string from the passed conn groups.
153177
std::string
154178
Groups_To_JSON(std::vector<std::shared_ptr<ConnectionTracker::Group const>> const &groups)
@@ -162,8 +186,8 @@ Groups_To_JSON(std::vector<std::shared_ptr<ConnectionTracker::Group const>> cons
162186
static const std::string_view trailer{" \n]}"};
163187

164188
static const auto printer = [](swoc::BufferWriter &w, ConnectionTracker::Group const *g) -> swoc::BufferWriter & {
165-
w.print(item_fmt, g->_match_type, g->_addr, g->_fqdn, g->_count.load(), g->_count_max.load(), g->_blocked.load(),
166-
g->get_last_alert_epoch_time());
189+
w.print(item_fmt, g->_match_type, g->_addr, g->_fqdn, g->_count_metric != nullptr ? g->_count_metric->load() : g->_count.load(),
190+
g->_count_max.load(), g->_blocked.load(), g->get_last_alert_epoch_time());
167191
return w;
168192
};
169193

@@ -200,6 +224,8 @@ ConnectionTracker::config_init(GlobalConfig *global, TxnConfig *txn, RecConfigUp
200224
Enable_Config_Var(CONFIG_SERVER_VAR_MAX, &Config_Update_Conntrack_Max, config_cb, txn);
201225
Enable_Config_Var(CONFIG_SERVER_VAR_MATCH, &Config_Update_Conntrack_Match, config_cb, txn);
202226
Enable_Config_Var(CONFIG_SERVER_VAR_ALERT_DELAY, &Config_Update_Conntrack_Server_Alert_Delay, config_cb, global);
227+
Enable_Config_Var(CONFIG_SERVER_VAR_METRIC_ENABLED, &Config_Update_Conntrack_Metric_Enabled, config_cb, global);
228+
Enable_Config_Var(CONFIG_SERVER_VAR_METRIC_PREFIX, &Config_Update_Conntrack_Metric_Prefix, config_cb, global);
203229
}
204230

205231
ConnectionTracker::TxnState
@@ -252,6 +278,19 @@ ConnectionTracker::Group::Group(DirectionType direction, Key const &key, std::st
252278
_alert_delay{direction == DirectionType::INBOUND ? _global_config->client_alert_delay : _global_config->server_alert_delay}
253279
{
254280
Metrics::Gauge::increment(net_rsb.connection_tracker_table_size);
281+
// only add metrics for server connections
282+
if (_global_config->metric_enabled && direction == DirectionType::OUTBOUND) {
283+
std::string _metric_name = metric_name(key, fqdn, _global_config->metric_prefix);
284+
_count_metric = Metrics::Gauge::createPtr("proxy.process.http.per_server.current_connection.", _metric_name);
285+
_count_total_metric = Metrics::Counter::createPtr("proxy.process.http.per_server.total_connection.", _metric_name);
286+
_blocked_metric = Metrics::Counter::createPtr("proxy.process.http.per_server.blocked_connection.", _metric_name);
287+
288+
if (dbg_ctl.on()) {
289+
swoc::LocalBufferWriter<256> w;
290+
w.print("Registered per_server_connection.{}\0", _metric_name);
291+
DbgPrint(dbg_ctl, "%s", w.data());
292+
}
293+
}
255294
// store the host name if relevant.
256295
if (MATCH_HOST == _match_type || MATCH_BOTH == _match_type) {
257296
_fqdn.assign(fqdn);
@@ -322,7 +361,25 @@ ConnectionTracker::Group::should_alert(std::time_t *lat)
322361
void
323362
ConnectionTracker::Group::release()
324363
{
325-
if (_count > 0) {
364+
// If metric enabled, use metric as count
365+
if (_count_metric != nullptr) {
366+
if (_count_metric->load() > 0) {
367+
ts::Metrics::Gauge::decrement(_count_metric);
368+
if (_count_metric->load() == 0) {
369+
TableSingleton &table = _direction == DirectionType::INBOUND ? _inbound_table : _outbound_table;
370+
std::lock_guard<std::mutex> lock(table._mutex); // Table lock
371+
if (_count_metric->load() > 0) {
372+
// Someone else grabbed the Group between our last check and taking the
373+
// lock.
374+
return;
375+
}
376+
table._table.erase(_key);
377+
}
378+
} else {
379+
// A bit dubious, as there's no guarantee it's still negative, but even that would be interesting to know.
380+
Error("Number of tracked connections should be greater than or equal to zero: %" PRId64, _count_metric->load());
381+
}
382+
} else if (_count > 0) {
326383
if (--_count == 0) {
327384
TableSingleton &table = _direction == DirectionType::INBOUND ? _inbound_table : _outbound_table;
328385
std::lock_guard<std::mutex> lock(table._mutex); // Table lock
@@ -397,7 +454,8 @@ ConnectionTracker::dump_outbound(FILE *f)
397454

398455
for (std::shared_ptr<Group const> g : groups) {
399456
swoc::LocalBufferWriter<128> w;
400-
w.print("{:7} | {:5} | {:24} | {:33} | {:8} |\n", g->_count.load(), g->_blocked.load(), g->_addr, g->_hash, g->_match_type);
457+
w.print("{:7} | {:5} | {:24} | {:33} | {:8} |\n", (g->_count_metric != nullptr ? g->_count_metric->load() : g->_count.load()),
458+
g->_blocked.load(), g->_addr, g->_hash, g->_match_type);
401459
fwrite(w.data(), w.size(), 1, f);
402460
}
403461

src/proxy/http/HttpSM.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5555,7 +5555,8 @@ HttpSM::do_http_server_open(bool raw, bool only_direct)
55555555
}
55565556

55575557
// See if the outbound connection tracker data is needed. If so, get it here for consistency.
5558-
if (t_state.txn_conf->connection_tracker_config.server_max > 0 || t_state.txn_conf->connection_tracker_config.server_min > 0) {
5558+
if (t_state.txn_conf->connection_tracker_config.server_max > 0 || t_state.txn_conf->connection_tracker_config.server_min > 0 ||
5559+
t_state.http_config_param->global_connection_tracker_config.metric_enabled) {
55595560
t_state.outbound_conn_track_state =
55605561
ConnectionTracker::obtain_outbound(t_state.txn_conf->connection_tracker_config,
55615562
std::string_view{t_state.current.server->name}, t_state.current.server->dst_addr);
@@ -5582,6 +5583,9 @@ HttpSM::do_http_server_open(bool raw, bool only_direct)
55825583
}
55835584

55845585
ct_state.update_max_count(ccount);
5586+
} else if (t_state.http_config_param->global_connection_tracker_config.metric_enabled) {
5587+
auto &ct_state = t_state.outbound_conn_track_state;
5588+
ct_state.reserve();
55855589
}
55865590

55875591
// We did not manage to get an existing session and need to open a new connection

src/records/RecordsConfig.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,12 +367,16 @@ static const RecordElement RecordsConfig[] =
367367
,
368368
{RECT_CONFIG, "proxy.config.http.per_server.connection.max", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL}
369369
,
370-
{RECT_CONFIG, "proxy.config.http.per_server.connection.match", RECD_STRING, "both", RECU_DYNAMIC, RR_NULL, RECC_STR, "^(?:ip|host|both|none)$", RECA_NULL}
370+
{RECT_CONFIG, "proxy.config.http.per_server.connection.match", RECD_STRING, "both", RECU_DYNAMIC, RR_NULL, RECC_STR, "^(?:ip|port|host|both|none)$", RECA_NULL}
371371
,
372372
{RECT_CONFIG, "proxy.config.http.per_server.connection.alert_delay", RECD_INT, "60", RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL}
373373
,
374374
{RECT_CONFIG, "proxy.config.http.per_server.connection.min", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL}
375375
,
376+
{RECT_CONFIG, "proxy.config.http.per_server.connection.metric_enabled", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_STR, "[0-1]", RECA_NULL}
377+
,
378+
{RECT_CONFIG, "proxy.config.http.per_server.connection.metric_prefix", RECD_STRING, "", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL}
379+
,
376380
{RECT_CONFIG, "proxy.config.http.attach_server_session_to_client", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-1]", RECA_NULL}
377381
,
378382
{RECT_CONFIG, "proxy.config.http.max_proxy_cycles", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
``
2+
> CONNECT``
3+
``
4+
< HTTP/1.1 200 OK
5+
``
6+
> CONNECT``
7+
``
8+
< HTTP/1.1 200 OK
9+
``

tests/gold_tests/origin_connection/per_server_connection_max.test.py

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,29 @@ def _configure_trafficserver(self) -> None:
4949
'proxy.config.dns.nameservers': f"127.0.0.1:{self._dns.Variables.Port}",
5050
'proxy.config.dns.resolv_conf': 'NULL',
5151
'proxy.config.diags.debug.enabled': 1,
52-
'proxy.config.diags.debug.tags': 'http',
52+
'proxy.config.diags.debug.tags': 'http|conn_track',
5353
'proxy.config.http.per_server.connection.max': self._origin_max_connections,
54+
'proxy.config.http.per_server.connection.metric_enabled': 1,
55+
'proxy.config.http.per_server.connection.metric_prefix': 'foo',
56+
'proxy.config.http.per_server.connection.match': 'port',
5457
})
5558
self._ts.Disk.diags_log.Content += Testers.ContainsExpression(
5659
f'WARNING:.*too many connections:.*limit={self._origin_max_connections}',
5760
'Verify the user is warned about the connection limit being hit.')
5861

62+
def _test_metrics(self) -> None:
63+
"""Use traffic_ctl to test metrics."""
64+
tr = Test.AddTestRun("Check connection metrics")
65+
tr.Processes.Default.Command = 'traffic_ctl metric match per_server'
66+
tr.Processes.Default.ReturnCode = 0
67+
tr.Processes.Default.Env = self._ts.Env
68+
tr.Processes.Default.Streams.All = Testers.ContainsExpression(
69+
f'per_server.total_connection.foo.127.0.0.1:{self._server.Variables.http_port} 4',
70+
'incorrect statistic return, or possible error.')
71+
tr.Processes.Default.Streams.All = Testers.ContainsExpression(
72+
f'per_server.blocked_connection.foo.127.0.0.1:{self._server.Variables.http_port} 1',
73+
'incorrect statistic return, or possible error.')
74+
5975
def run(self) -> None:
6076
"""Configure the TestRun."""
6177
tr = Test.AddTestRun('Verify we enforce proxy.config.http.per_server.connection.max')
@@ -65,18 +81,19 @@ def run(self) -> None:
6581

6682
tr.AddVerifierClientProcess('client', self._replay_file, http_ports=[self._ts.Variables.port])
6783

84+
self._test_metrics()
85+
6886

6987
class ConnectMethodTest:
7088
"""Test our max origin connection behavior with CONNECT traffic."""
7189

7290
_client_counter: int = 0
73-
_origin_max_connections: int = 3
7491

75-
def __init__(self) -> None:
92+
def __init__(self, max_conn) -> None:
7693
"""Configure the server processes in preparation for the TestRun."""
7794
self._configure_dns()
7895
self._configure_origin_server()
79-
self._configure_trafficserver()
96+
self._configure_trafficserver(max_conn)
8097

8198
def _configure_dns(self) -> None:
8299
"""Configure a nameserver for the test."""
@@ -86,18 +103,19 @@ def _configure_origin_server(self) -> None:
86103
"""Configure the httpbin origin server."""
87104
self._server = Test.MakeHttpBinServer("server2")
88105

89-
def _configure_trafficserver(self) -> None:
90-
self._ts = Test.MakeATSProcess("ts2")
106+
def _configure_trafficserver(self, max_conn) -> None:
107+
self._ts = Test.MakeATSProcess("ts2_" + str(max_conn))
91108

92109
self._ts.Disk.records_config.update(
93110
{
94111
'proxy.config.dns.nameservers': f"127.0.0.1:{self._dns.Variables.Port}",
95112
'proxy.config.dns.resolv_conf': 'NULL',
96113
'proxy.config.diags.debug.enabled': 1,
97-
'proxy.config.diags.debug.tags': 'http|dns|hostdb',
114+
'proxy.config.diags.debug.tags': 'http|dns|hostdb|conn_track',
98115
'proxy.config.http.server_ports': f"{self._ts.Variables.port}",
99116
'proxy.config.http.connect_ports': f"{self._server.Variables.Port}",
100-
'proxy.config.http.per_server.connection.max': self._origin_max_connections,
117+
'proxy.config.http.per_server.connection.metric_enabled': 1,
118+
'proxy.config.http.per_server.connection.max': max_conn,
101119
})
102120

103121
self._ts.Disk.remap_config.AddLines([
@@ -111,7 +129,20 @@ def _configure_client_with_slow_response(self, tr) -> 'Test.Process':
111129
tr.MakeCurlCommand(f"-v --fail -s -p -x 127.0.0.1:{self._ts.Variables.port} 'http://foo.com/delay/2'", p=p)
112130
return p
113131

114-
def run(self) -> None:
132+
def _test_metrics(self, blocked) -> None:
133+
"""Use traffic_ctl to test metrics."""
134+
tr = Test.AddTestRun("Check connection metrics")
135+
tr.Processes.Default.Command = 'traffic_ctl metric match per_server'
136+
tr.Processes.Default.ReturnCode = 0
137+
tr.Processes.Default.Env = self._ts.Env
138+
tr.Processes.Default.Streams.All = Testers.ContainsExpression(
139+
f'per_server.total_connection.www.this.origin.com.127.0.0.1:{self._server.Variables.Port} 5',
140+
'incorrect statistic return, or possible error.')
141+
tr.Processes.Default.Streams.All = Testers.ContainsExpression(
142+
f'per_server.blocked_connection.www.this.origin.com.127.0.0.1:{self._server.Variables.Port} {blocked}',
143+
'incorrect statistic return, or possible error.')
144+
145+
def run(self, blocked, gold_file) -> None:
115146
"""Verify per_server.connection.max with CONNECT traffic."""
116147
tr = Test.AddTestRun()
117148
tr.Processes.Default.StartBefore(self._dns)
@@ -134,10 +165,13 @@ def run(self) -> None:
134165
f"--next -v --fail -s -p -x 127.0.0.1:{self._ts.Variables.port} 'http://foo.com/get'")
135166
# Curl will have a 22 exit code if it receives a 5XX response (and we
136167
# expect a 503).
137-
tr.Processes.Default.ReturnCode = 22
138-
tr.Processes.Default.Streams.stderr = "gold/two_503_congested.gold"
168+
tr.Processes.Default.ReturnCode = 22 if blocked else 0
169+
tr.Processes.Default.Streams.stderr = gold_file
139170
tr.Processes.Default.TimeOut = 3
140171

172+
self._test_metrics(blocked)
173+
141174

142175
PerServerConnectionMaxTest().run()
143-
ConnectMethodTest().run()
176+
ConnectMethodTest(3).run(blocked=2, gold_file="gold/two_503_congested.gold")
177+
ConnectMethodTest(0).run(blocked=0, gold_file="gold/two_200_ok.gold")

0 commit comments

Comments
 (0)