diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d8d86441e..3861166f0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -53,7 +53,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout VERSION file and tag history - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: sparse-checkout: VERSION fetch-depth: 0 # Get all history regarding tags. We'll need that for VERSION checking below. @@ -474,7 +474,7 @@ jobs: run: pip install 'conan<2' - name: Checkout `flow` repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Add custom settings for Conan packages if: | @@ -673,7 +673,7 @@ jobs: - name: Upload test/demo logs (please inspect if failure(s) seen above) if: | always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: flow-test-logs-${{ matrix.compiler.id }}-${{ matrix.build-test-cfg.id }}-${{ matrix.cxx-std.id }} path: ${{ env.install-dir }}/bin/logs.tgz @@ -713,7 +713,7 @@ jobs: run: pip install 'conan<2' - name: Checkout `flow` repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 # See comments in Flow-IPC workflow counterpart. We use same techniques. - name: (On release creation only) Signal Pages about release (web site should update) @@ -766,7 +766,7 @@ jobs: ${{ github.workspace }}/build/${{ matrix.build-cfg.conan-profile-build-type }} - name: Upload documentation tarball - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: flow-doc path: ${{ github.workspace }}/doc/flow_doc.tgz diff --git a/conanfile.py b/conanfile.py index d81e916ab..22e175e22 100644 --- a/conanfile.py +++ b/conanfile.py @@ -54,7 +54,7 @@ class FlowRecipe(ConanFile): def configure(self): if self.options.build: # Currently need all headers; - # plus libs: chrono, filesystem, program_options, thread, timer (and all headers). + # plus libs: chrono, filesystem, program_options, random, thread, timer. # `filesystem` requires atomic. `thread` requires container, date_time, exception. # (Boost provides the with_* way of specifying it also; the Conan Boost pkg only has without_*.) self.options["boost"].without_charconv = True @@ -78,7 +78,6 @@ def configure(self): self.options["boost"].without_mpi = True self.options["boost"].without_nowide = True self.options["boost"].without_python = True - self.options["boost"].without_random = True self.options["boost"].without_regex = True self.options["boost"].without_serialization = True self.options["boost"].without_stacktrace = True diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 38bee4d43..02fe00b92 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -21,7 +21,7 @@ set(LIB_CMAKE_SCRIPT "../tools/cmake/FlowLikeLib.cmake") # Flow needs these Boost libs. # (Our dependents should do similar for any additional such dependencies; or omit this if none.) # (By the way no need to worry about find_package(Threads), as Boost::thread should take care of that.) -set(BOOST_LIBS thread chrono timer program_options filesystem) +set(BOOST_LIBS thread chrono timer program_options filesystem random) find_package(fmt REQUIRED) list(APPEND DEP_LIBS fmt::fmt) diff --git a/src/flow/net_flow/detail/low_lvl_io.cpp b/src/flow/net_flow/detail/low_lvl_io.cpp index cf57d94ee..6de21ecaa 100644 --- a/src/flow/net_flow/detail/low_lvl_io.cpp +++ b/src/flow/net_flow/detail/low_lvl_io.cpp @@ -383,8 +383,11 @@ void Node::async_low_lvl_packet_send_impl(const util::Udp_endpoint& low_lvl_remo const size_t bytes_to_send = packet->serialize_to_raw_data_and_log(&raw_bufs); assert(bytes_to_send != 0); - // Count an actual UDP stack send() call. - sock->m_snd_stats.low_lvl_packet_xfer_called(packet_type_id, delayed_by_pacing, bytes_to_send); + // Count an actual UDP stack send() call (if a socket was actually opened; else can't charge stats to one). + if (sock) + { + sock->m_snd_stats.low_lvl_packet_xfer_called(packet_type_id, delayed_by_pacing, bytes_to_send); + } const size_t limit = opt(m_opts.m_dyn_low_lvl_max_packet_size); if (bytes_to_send > limit) @@ -399,6 +402,7 @@ void Node::async_low_lvl_packet_send_impl(const util::Udp_endpoint& low_lvl_remo "[\n" << packet->m_verbose_ostream_manip << "]."); // Short-circuit this, since no send occurred. + assert(sock && "Really? A giant low-level packet that is not even DATA? Bug."); sock->m_snd_stats.low_lvl_packet_xfer_completed(packet_type_id, bytes_to_send, 0); return; } diff --git a/src/flow/net_flow/detail/port_space.cpp b/src/flow/net_flow/detail/port_space.cpp index 70864023f..e4285c5cb 100644 --- a/src/flow/net_flow/detail/port_space.cpp +++ b/src/flow/net_flow/detail/port_space.cpp @@ -19,6 +19,8 @@ #include "flow/net_flow/detail/port_space.hpp" #include "flow/util/util.hpp" #include "flow/error/error.hpp" +#include +#include #include namespace flow::net_flow @@ -47,8 +49,7 @@ Port_space::Port_space(log::Logger* logger_ptr) : // Set the bit fields to their permanent widths. m_service_ports(S_NUM_SERVICE_PORTS), m_ephemeral_ports(S_NUM_EPHEMERAL_PORTS), - m_ephemeral_and_recent_ephemeral_ports(S_NUM_EPHEMERAL_PORTS), - m_rnd_generator(Random_generator::result_type(util::time_since_posix_epoch().count())) + m_ephemeral_and_recent_ephemeral_ports(S_NUM_EPHEMERAL_PORTS) { // All 1s = all ports available. m_service_ports.set(); @@ -261,10 +262,14 @@ void Port_space::return_port(flow_port_t port, Error_code* err_code) size_t Port_space::find_available_port_bit_idx(const Bit_set& ports) { using boost::random::uniform_int_distribution; + using boost::random::random_device; - // Pick a random bit in bit field. + /* Pick a random bit in bit field. Use a CSPRNG (not the general-purpose mt19937-based + * Random_generator in flow::util) because ephemeral port selection must be unpredictable to off-path + * attackers attempting to guess the tuple of an established connection (cf. RFC 6056). */ + random_device rnd_dev; // Setting this up, in Linux at least, is ~microseconds per new endpoint. No prob. uniform_int_distribution range{0, ports.size() - 1}; - size_t port_bit_idx = range(m_rnd_generator); + size_t port_bit_idx = range(rnd_dev); // If that bit is 0, go right until you find a 1. if (!ports.test(port_bit_idx)) diff --git a/src/flow/net_flow/detail/port_space.hpp b/src/flow/net_flow/detail/port_space.hpp index 0c78fa03d..b89cffa31 100644 --- a/src/flow/net_flow/detail/port_space.hpp +++ b/src/flow/net_flow/detail/port_space.hpp @@ -22,10 +22,8 @@ #include "flow/net_flow/net_flow_fwd.hpp" #include "flow/net_flow/error/error.hpp" #include "flow/log/log.hpp" -#include "flow/util/random.hpp" #include #include -#include #include namespace flow::net_flow @@ -126,6 +124,9 @@ class Port_space : * Reserve the specified service port, or reserve_ephemeral_port() if the specified port is * #S_PORT_ANY. * + * @warning As an internal API, this one assumes `err_code` is not null. Passing null won't throw; + * undefined behavior results. + * * @param port * A valid and still available service port number, or #S_PORT_ANY. * @param err_code @@ -139,6 +140,9 @@ class Port_space : /** * Reserve a randomly chosen available ephemeral port. * + * @warning As an internal API, this one assumes `err_code` is not null. Passing null won't throw; + * undefined behavior results. + * * @param err_code * See flow::Error_code docs for error reporting semantics. error::Code generated: * error::Code::S_OUT_OF_PORTS. @@ -149,6 +153,9 @@ class Port_space : /** * Return a previously reserved port (of any type). * + * @warning As an internal API, this one assumes `err_code` is not null. Passing null won't throw; + * undefined behavior results. + * * @param port * A previously reserved port. * @param err_code @@ -163,9 +170,6 @@ class Port_space : /// Short-hand for bit set of arbitary length, representing a port set (each bit is a port; 1 open, 0 reserved). using Bit_set = boost::dynamic_bitset<>; - /// Random number generator. - using Random_generator = util::Rnd_gen_uniform_range_base::Random_generator; - /// A type same as #flow_port_t but larger, useful when doing arithmetic that might hit overflow in corner cases. using flow_port_sans_overflow_t = uint32_t; @@ -241,9 +245,6 @@ class Port_space : * oldest recently used port) and use that. If emptied, there are simply no more ports left. */ std::queue m_recent_ephemeral_ports; - - /// Random number generator for picking ports. - Random_generator m_rnd_generator; }; // class Port_space } // namespace flow::net_flow diff --git a/src/flow/net_flow/detail/seq_num.cpp b/src/flow/net_flow/detail/seq_num.cpp index a5cbc2671..b6069cb94 100644 --- a/src/flow/net_flow/detail/seq_num.cpp +++ b/src/flow/net_flow/detail/seq_num.cpp @@ -19,6 +19,8 @@ #include "flow/net_flow/detail/seq_num.hpp" #include "flow/util/random.hpp" #include +#include +#include #include #include @@ -33,9 +35,6 @@ namespace flow::net_flow * byte it would take many centuries for the sequence numbers to overflow seq_num_t. */ const Sequence_number::seq_num_t Sequence_number::Generator::S_MAX_INIT_SEQ_NUM = std::numeric_limits::max() / 2; -const Fine_duration Sequence_number::Generator::S_TIME_PER_SEQ_NUM = boost::chrono::microseconds{4}; // From RFC 793. -const Fine_duration Sequence_number::Generator::S_MIN_DELAY_BETWEEN_ISN - = boost::chrono::milliseconds{500}; // From TCP/IP Illustrated Vol. 2: The Implementation (BSD Net/3). // Implementations. @@ -47,50 +46,28 @@ Sequence_number::Generator::Generator(log::Logger* logger_ptr) : Sequence_number Sequence_number::Generator::generate_init_seq_num() { - using std::abs; - using util::Rnd_gen_uniform_range; + using boost::random::random_device; + using boost::random::uniform_int_distribution; - const Fine_time_pt& now = Fine_clock::now(); - - if (m_last_init_seq_num.m_num == 0) - { - /* First call to this. Just pick a random-ish ISN from the entire allowed range. Seed on current time. - * We only generate one random number ever (for this `this`), so it's fine to just seed it here and use once. - * @todo Could use a `static` data member RNG. Good randomness across multiple `this`s isn't required, so the - * various considerations this would introduce -- multi-threadedness, for instance -- might be too much to worry - * about given our modest, non-cryptographic needs here. */ - - Rnd_gen_uniform_range rnd_single_use{1, S_MAX_INIT_SEQ_NUM}; // 0 is a reserved number; do not use. - m_last_init_seq_num.m_num = rnd_single_use(); - } - else - { - // All subsequent calls. - - /* For now basically follow RFC 793 (original TCP spec): new sequence number every N - * microseconds, with N defined by the RFC. Additionally, add a large constant M, as if another - * 0.5 seconds had passed (as BSD did at least in 1995, as documented in TCP/IP Illustrated: - * Vol. 2). abs() should not be necessary with Fine_clock, but just in case.... */ - - m_last_init_seq_num.m_num += - seq_num_t((now - m_last_isn_generation + S_MIN_DELAY_BETWEEN_ISN) / S_TIME_PER_SEQ_NUM); + /* Generate each ISN independently from a CSPRNG. + * + * Historically (RFC 793: original TCP spec) this was done with a clock-based scheme (new sequence number every N + * usec, with N defined by the RFC). This code formerly (written circa 2011) implemented this; however + * clock-based ISN generation is predictable to off-path attackers. Hence for a full-on secure implementation + * we'd perhaps want the full RFC 6528 keyed-hash scheme used by modern production TCP stacks. @todo Do that. + * + * For the time being (2026), we will do a per-call CSPRNG pull instead. This is simpler and equally + * unpredictable for this context. */ - /* It's incredibly unlikely that overflowed seq_num_t given the times involved, but even if it - * did, so be it. In that case pretty much any random ISN is still OK. So just assume no - * overflow.... */ + random_device rnd_dev; // Setting this up, in Linux at least, is microseconds per connection. No prob. + uniform_int_distribution range{1, S_MAX_INIT_SEQ_NUM}; // 0 is a reserved number; do not use. - // Wrap ISN if needed. (It's perfectly possible, e.g., if original ISN was right near the end of allowed range.) - if (m_last_init_seq_num.m_num > S_MAX_INIT_SEQ_NUM) - { - // 0 is a reserved number; do not use. - m_last_init_seq_num.m_num = m_last_init_seq_num.m_num - S_MAX_INIT_SEQ_NUM; - } - } + Sequence_number result; + result.m_num = range(rnd_dev); - FLOW_LOG_TRACE("Generated ISN [" << m_last_init_seq_num << "]."); + FLOW_LOG_TRACE("Generated ISN [" << result << "]."); - m_last_isn_generation = now; - return m_last_init_seq_num; + return result; } // Sequence_number::Generator::generate_init_seq_num() Sequence_number::Sequence_number() : diff --git a/src/flow/net_flow/detail/seq_num.hpp b/src/flow/net_flow/detail/seq_num.hpp index 8b6f15573..02b47e20e 100644 --- a/src/flow/net_flow/detail/seq_num.hpp +++ b/src/flow/net_flow/detail/seq_num.hpp @@ -365,7 +365,18 @@ class Sequence_number * initialize its state and then call generate_init_seq_num() whenever an ISN is needed. * * ### Thread safety ### - * Not safe to read/write or write/write one object simultaneously. + * Safe to read/write or write/write one object simultaneously. + * + * ### Impl notes ### + * As it stands as of this writing a `Generator` holds no non-`static` data other than the logging context; + * for good randomness it uses `random_device` a-la `/dev/urandom` which requires no state. + * It could be replaced by a `static` function in Sequence_number, for example. + * + * However, historically, it used to be more complex (with a clock-based ISN-generation scheme RFC 793 from 1981) + * and thus did have state. While eventually we deemed this outdated and unnecessary (hence the simple current + * impl), there is also a to-do (not high-priority) inside generate_init_seq_num() for a more + * advanced approach which would require state. All in all we found it prudent to keep this encapsulated as + * an object class. */ class Sequence_number::Generator : public log::Log_context, @@ -403,23 +414,6 @@ class Sequence_number::Generator : * remove the need to worry about wrapping as well. */ static const seq_num_t S_MAX_INIT_SEQ_NUM; - - /// The ISN given out at a given time should increment every N; this is the value of N. - static const Fine_duration S_TIME_PER_SEQ_NUM; - - /** - * In addition to the actual time passed between two ISN generations, pretend this much additional - * time has also passed. - */ - static const Fine_duration S_MIN_DELAY_BETWEEN_ISN; - - // Data. - - /// The last initial sequence number returned by generate_init_seq_num() (or zero if never called). - Sequence_number m_last_init_seq_num; - - /// #Fine_clock time of the last invocation of generate_init_seq_num() (or default if never called). - Fine_time_pt m_last_isn_generation; }; // class Sequence_number::Generator // Free functions: in *_fwd.hpp. diff --git a/src/flow/net_flow/node.cpp b/src/flow/net_flow/node.cpp index 4bfcba5b2..28b7e8cc4 100644 --- a/src/flow/net_flow/node.cpp +++ b/src/flow/net_flow/node.cpp @@ -1031,7 +1031,8 @@ const Node_options& Node::validate_options(const Node_options& opts, bool init, const bool checks_ok = VALIDATE_CHECK(opts.m_st_low_lvl_max_buf_size >= 128 * 1024) && VALIDATE_CHECK(opts.m_st_timer_min_period.count() >= 0) && - VALIDATE_CHECK(opts.m_dyn_low_lvl_max_packet_size >= 512); + VALIDATE_CHECK(opts.m_dyn_low_lvl_max_packet_size >= 512) && + VALIDATE_CHECK(opts.m_dyn_accept_backlog_limit > 0); if (!checks_ok) { diff --git a/src/flow/net_flow/node.hpp b/src/flow/net_flow/node.hpp index 8c0e9ffb2..cb499b166 100644 --- a/src/flow/net_flow/node.hpp +++ b/src/flow/net_flow/node.hpp @@ -3737,12 +3737,6 @@ class Node : /// Sequence number generator (at least to generate ISNs). Only thread W can access this. Sequence_number::Generator m_seq_num_generator; - /** - * Random number generator for picking security tokens; seeded on time at Node construction and generates - * integers from the entire range. (Not thread-safe. Use only in thread W.) - */ - util::Rnd_gen_uniform_range m_rnd_security_tokens; - /** * The peer-to-peer connections this Node is currently tracking. Their states are not Peer_socket::State::S_CLOSED. * Only thread W can access this. diff --git a/src/flow/net_flow/options.cpp b/src/flow/net_flow/options.cpp index dc6c8b8fd..812f8d6d8 100644 --- a/src/flow/net_flow/options.cpp +++ b/src/flow/net_flow/options.cpp @@ -52,7 +52,9 @@ Node_options::Node_options() : // default max_block_size + a SMALL overhead. m_dyn_low_lvl_max_packet_size(1124), // This default is explained in the option description (as of this writing): it's faster. - m_dyn_guarantee_one_low_lvl_in_buf_per_socket(true) + m_dyn_guarantee_one_low_lvl_in_buf_per_socket(true), + // For TCP these days values like ~500 are not-atypical, but let's be modest by default. + m_dyn_accept_backlog_limit(64) { // Nothing. } @@ -136,6 +138,15 @@ void Node_options::setup_config_parsing_helper(Options_description* opts_desc, "faster, especially if low-lvl-max-packet-size is unnecessarily large; but arguably the zero-copy behavior " "may become faster if some implementation details related to this change. So this switch seemed worth " "keeping."); + ADD_CONFIG_OPTION + (m_dyn_accept_backlog_limit, + "Maximum backlog size for each `Server_socket` subsequently created via `Node::listen()`. The backlog " + "(for a given `Server_socket`) is defined as the total number of connections either in SYN_RCVD state " + "(SYN_ACK sent, awaiting SYN_ACK_ACK) or in ESTABLISHED state but not yet user-accepted via " + "`Server_socket::*accept()`. When a SYN arrives while the backlog is full, it is rejected with an RST " + "response. This value is captured at `Node::listen()` time and fixed for the resulting `Server_socket`'s " + "lifetime; subsequent changes affect only `Server_socket`s created by later `listen()` calls. " + "It *is* dynamic at the `Node` level, but does *not* dynamically affect existing listening `Server_socket`s."); Peer_socket_options::setup_config_parsing_helper(opts_desc, &target->m_dyn_sock_opts, @@ -195,6 +206,9 @@ Peer_socket_options::Peer_socket_options() : m_st_snd_buf_max_size(6 * 1024 * 1024), // @todo Ditto. m_st_rcv_buf_max_size(m_st_snd_buf_max_size), + /* If not for SYN-flood-like possibility, this could be ~= m_st_rcv_buf_max_size. Instead let's keep it modest. + * It would after all be strange to receive a large number of DATAs without a single retried SYN_ACK_ACK. */ + m_st_rcv_sync_rcvd_data_q_cumulative_max_size(64 * 1024), // Disabling flow control is an emergency measure only. m_st_rcv_flow_control_on(true), // Seems reasonable. Should be a few hundred KB typically. @@ -323,6 +337,11 @@ void Peer_socket_options::setup_config_parsing_helper(Options_description* opts_ "Maximum number of bytes that the Receive buffer can hold. This determines how many bytes " "can be received in the background by the Node without user doing any receive()s. " "It is also rounded up to to the nearest multiple of max-block-size."); + ADD_CONFIG_OPTION + (m_st_rcv_sync_rcvd_data_q_cumulative_max_size, + "Due to loss or reordering we may receive DATA packets before receiving the handshake-finishing SYN_ACK_ACK; " + "any such SYN_RCVD-state DATA packets beyond this cumulative payload size shall be silently dropped. " + "The value 0 will drop all such DATA packets."); ADD_CONFIG_OPTION (m_st_rcv_flow_control_on, "Whether flow control (a/k/a receive window a/k/a rcv_wnd management) is enabled. If this is " diff --git a/src/flow/net_flow/options.hpp b/src/flow/net_flow/options.hpp index bf92732f7..7f2fa4bdc 100644 --- a/src/flow/net_flow/options.hpp +++ b/src/flow/net_flow/options.hpp @@ -140,6 +140,13 @@ struct Peer_socket_options */ size_t m_st_rcv_buf_max_size; + /** + * Due to loss or reordering we may receive DATA packets before receiving the handshake-finishing SYN_ACK_ACK; + * any such SYN_RCVD-state DATA packets beyond this cumulative payload size shall be silently dropped. + * The value 0 will drop all such DATA packets. + */ + size_t m_st_rcv_sync_rcvd_data_q_cumulative_max_size; + /** * Whether flow control (a/k/a receive window a/k/a rcv_wnd management) is enabled. If this is * disabled, an infinite rcv_wnd will always be advertised to the sender; so if the Receive buffer @@ -567,6 +574,19 @@ struct Node_options */ bool m_dyn_guarantee_one_low_lvl_in_buf_per_socket; + /** + * Maximum backlog size for each `Server_socket` subsequently created via `Node::listen()`. The backlog + * (for a given `Server_socket`) is defined as the total number of connections either in SYN_RCVD state + * (SYN_ACK sent, awaiting SYN_ACK_ACK) or in ESTABLISHED state but not yet user-accepted via + * `Server_socket::*accept()`. When a SYN arrives while the backlog is full, it is rejected with an RST response. + * + * This value is captured at `Node::listen()` time and fixed for the resulting `Server_socket`'s + * lifetime; subsequent changes affect only `Server_socket`s created by later `listen()` calls. + * + * It *is* dynamic at the `Node` level, but does *not* dynamically affect existing listening `Server_socket`s. + */ + unsigned int m_dyn_accept_backlog_limit; + /** * The set of per-Peer_socket options in this per-Node set of options. This represents the * per-socket options each subsequent socket generated in the corresponding Node will get, unless diff --git a/src/flow/net_flow/peer_socket.cpp b/src/flow/net_flow/peer_socket.cpp index 04f8ca1bb..d72f9d1cd 100644 --- a/src/flow/net_flow/peer_socket.cpp +++ b/src/flow/net_flow/peer_socket.cpp @@ -4340,7 +4340,7 @@ void Node::handle_connection_rexmit_timer_event(const Socket_id& socket_id, Peer // We are in thread W. assert((sock->m_int_state == Peer_socket::Int_state::S_SYN_SENT) - || (sock->m_int_state != Peer_socket::Int_state::S_SYN_RCVD)); + || (sock->m_int_state == Peer_socket::Int_state::S_SYN_RCVD)); // Not an error (so not WARNING), but it's rare and interesting enough for INFO. FLOW_LOG_INFO("Connection handshake retransmit timer [" << sock << "] triggered; was on " @@ -6176,6 +6176,7 @@ bool Node::sock_validate_options(const Peer_socket_options& opts, VALIDATE_STATIC_OPTION(m_st_connect_retransmit_timeout) && VALIDATE_STATIC_OPTION(m_st_snd_buf_max_size) && VALIDATE_STATIC_OPTION(m_st_rcv_buf_max_size) && + VALIDATE_STATIC_OPTION(m_st_rcv_sync_rcvd_data_q_cumulative_max_size) && VALIDATE_STATIC_OPTION(m_st_rcv_flow_control_on) && VALIDATE_STATIC_OPTION(m_st_rcv_buf_max_size_slack_percent) && VALIDATE_STATIC_OPTION(m_st_rcv_buf_max_size_to_advertise_percent) && diff --git a/src/flow/net_flow/server_socket.cpp b/src/flow/net_flow/server_socket.cpp index 688162e61..a3e642131 100644 --- a/src/flow/net_flow/server_socket.cpp +++ b/src/flow/net_flow/server_socket.cpp @@ -21,6 +21,7 @@ #include "flow/net_flow/detail/stats/bandwidth.hpp" #include "flow/net_flow/detail/cong_ctl.hpp" #include "flow/async/util.hpp" +#include namespace flow::net_flow { @@ -36,7 +37,8 @@ Server_socket::Server_socket(log::Logger* logger_ptr, const Peer_socket_options* m_child_sock_opts(child_sock_opts ? new Peer_socket_options{*child_sock_opts} : nullptr), m_state(State::S_CLOSED), // Incorrect; set explicitly. m_node(nullptr), // Incorrect; set explicitly. - m_local_port(S_PORT_ANY) // Incorrect; set explicitly. + m_local_port(S_PORT_ANY), // Incorrect; set explicitly. + m_backlog_limit(0) // Incorrect; set explicitly. { // Only print pointer value, because most members are garbage at this point. FLOW_LOG_TRACE("Server_socket [" << static_cast(this) << "] created."); @@ -266,7 +268,12 @@ void Node::listen_worker(flow_port_t local_port, const Peer_socket_options* chil // else local_port = serv->m_local_port; // If they'd specified S_PORT_ANY, this is now a random port. - FLOW_LOG_INFO("NetFlow worker thread listening for passive-connects on [" << serv << "]."); + /* Save backlog limit. Note it is dynamic at the Node level (thus future listen()s can set different values here), + * but that does not (in and of itself anyway) mean it can be subsequently changed for the `serv` we are returning. */ + serv->m_backlog_limit = opt(m_opts.m_dyn_accept_backlog_limit); + + FLOW_LOG_INFO("NetFlow worker thread listening for passive-connects on [" << serv << "]; " + "backlog limit [" << serv->m_backlog_limit << "]."); if (util::key_exists(m_servs, local_port)) { @@ -433,12 +440,31 @@ Peer_socket::Ptr Node::handle_syn_to_listening_server(Server_socket::Ptr serv, const util::Udp_endpoint& low_lvl_remote_endpoint) { using util::Blob; - using boost::random::uniform_int_distribution; + using boost::random::random_device; + using security_token_t = Peer_socket::security_token_t; // We are in thread W. /* We just got SYN (an overture from the other side). Create a peer-to-peer socket to track that - * connection being established. */ + * connection being established. Though, if we are at the backlog limit, then there's no point: reject immediately + * without even temporarily taking the memory for the Peer_socket. */ + { + const auto backlog_sz = serv->m_unaccepted_socks.size() + serv->m_connecting_socks.size(); + if (backlog_sz >= static_cast(serv->m_backlog_limit)) + // As of this writing `==` is sufficient, but just in case it becomes mutable someday: use `>=`. + { + /* (Let's not use INFO or WARNING here; if there's a SYN-flood going on then no need to fill up the logs. + * After all it's not *that* interesting of a message, on balance. One could argue that logging rate-limiting + * is the `Logger`'s job -- and indeed it is -- but in this case we can defensibly avoid this + * difficulty altogether.) */ + FLOW_LOG_TRACE("NetFlow worker thread, on receipt of [" << syn->m_type_ostream_manip << "] was about to " + "start passive-connect on [" << serv << "], but the backlog would then exceed the " + "limit [" << serv->m_backlog_limit << "]; resetting connection."); + async_no_sock_low_lvl_rst_send(Low_lvl_packet::const_ptr_cast(syn), low_lvl_remote_endpoint); + return Peer_socket::Ptr{}; + } + // else OK; do create that peer-to-peer socket. + } Peer_socket::Ptr sock; if (serv->m_child_sock_opts) @@ -542,8 +568,19 @@ Peer_socket::Ptr Node::handle_syn_to_listening_server(Server_socket::Ptr serv, init_seq_num.set_metadata('L',init_seq_num + 1, sock->max_block_size()); // Sequence number of first bit of actual data. sock->m_snd_next_seq_num = init_seq_num + 1; - // Security token. Random number from entire numeric range. Remember it for later verification. - sock->m_security_token = m_rnd_security_tokens(); + + /* Security token. Random number from entire numeric range. Remember it for later verification. + * Use a CSPRNG (not the general-purpose mt19937-based Rnd_gen_uniform_range) because this token + * must be unpredictable to off-path attackers attempting to forge SYN_ACK_ACK packets. */ + random_device rnd_dev; // Setting this up, in Linux at least, is microseconds per connection. No prob. + static_assert((random_device::min() == 0) && (random_device::max() == 0xFFFF'FFFF), + "The following statement assumes full 32-bit range of random_device."); + static_assert(sizeof(decltype(Peer_socket::m_security_token)) == (64 / 8), + "The following statement assumes 64-bit security tokens."); + sock->m_security_token = ((static_cast(rnd_dev()) << 32) + | + static_cast(rnd_dev())); + // Initial receive window is simply the entire empty Receive buffer. sock->m_rcv_last_sent_rcv_wnd = sock_rcv_wnd(sock); @@ -714,7 +751,11 @@ void Node::handle_data_to_syn_rcvd(Peer_socket::Ptr sock, * empty until ESTABLISHED, it seems natural to limit this queue's cumulative byte size * according to the limit imposed on Receive buffer. (There is some extra overhead to store the * packet header info, but it's close enough.) After that, as when the Receive buffer fills up, - * we drop packets. */ + * we drop packets. + * + * Update (leaving preceding paragraph there for posterity): On 2nd thought, that is probably too + * generous given the possibility of SYN-flood-like attacks. Therefore we now use a separate option-knob to limit + * this queue's size. */ assert(sock->m_int_state == Peer_socket::Int_state::S_SYN_RCVD); const bool first_time = sock->m_rcv_syn_rcvd_data_q.empty(); @@ -732,12 +773,15 @@ void Node::handle_data_to_syn_rcvd(Peer_socket::Ptr sock, sock->m_rcv_syn_rcvd_data_cumulative_size = 0; // It's garbage at the moment. } else if ((sock->m_rcv_syn_rcvd_data_cumulative_size + packet->m_data.size()) - > sock->opt(sock->m_opts.m_st_snd_buf_max_size)) + > sock->opt(sock->m_opts.m_st_rcv_sync_rcvd_data_q_cumulative_max_size)) { - // Not a WARNING, because we didn't do anything wrong; could be network conditions. - FLOW_LOG_INFO("NetFlow worker thread received [" << packet->m_type_ostream_manip << "] packet while " - "in [" << Peer_socket::Int_state::S_SYN_RCVD << "] state for [" << sock << "]; " - "dropping because Receive queue full at [" << sock->m_rcv_syn_rcvd_data_cumulative_size << "]."); + /* Not a WARNING, because we didn't do anything wrong; could be network conditions. + * Not INFO either: a SYN-flood-like (DATA-flood?) attempt may not overfill the logs, given the limit, but generally + * logging on a per-packet basis (for one connection/connection-to-be) is not great. + * @todo An INFO-log the first time this happens would be nice. */ + FLOW_LOG_TRACE("NetFlow worker thread received [" << packet->m_type_ostream_manip << "] packet while " + "in [" << Peer_socket::Int_state::S_SYN_RCVD << "] state for [" << sock << "]; " + "dropping because Receive queue full at [" << sock->m_rcv_syn_rcvd_data_cumulative_size << "]."); return; } // else diff --git a/src/flow/net_flow/server_socket.hpp b/src/flow/net_flow/server_socket.hpp index 0c0c0095b..ef843bcaa 100644 --- a/src/flow/net_flow/server_socket.hpp +++ b/src/flow/net_flow/server_socket.hpp @@ -351,6 +351,14 @@ class Server_socket : */ flow_port_t m_local_port; + /** + * The immutable limit on `m_unaccepted_socks.size() + m_connecting_socks.size()`, such that excess SYNs + * beyond this limit shall be rejected with RST. Should be set before user gets access to `*this` and not + * changed afterwards. + * @todo Make #m_backlog_limit `const`? + */ + unsigned int m_backlog_limit; + /** * Queue of passively opened sockets in Peer_socket::Int_state::S_ESTABLISHED internal state that have not yet been * claimed by the user via `*accept()`. `back()` is the next socket to be accepted (i.e., the one diff --git a/src/flow/net_flow/test/backlog_test.cpp b/src/flow/net_flow/test/backlog_test.cpp new file mode 100644 index 000000000..18976b616 --- /dev/null +++ b/src/flow/net_flow/test/backlog_test.cpp @@ -0,0 +1,173 @@ +/* Flow + * Copyright 2023 Akamai Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in + * compliance with the License. You may obtain a copy + * of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in + * writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing + * permissions and limitations under the License. */ + +#include "flow/net_flow/node.hpp" +#include "flow/net_flow/endpoint.hpp" +#include "flow/net_flow/options.hpp" +#include "flow/net_flow/peer_socket.hpp" +#include "flow/net_flow/server_socket.hpp" +#include "flow/net_flow/error/error.hpp" +#include "flow/test/test_common_util.hpp" +#include "flow/test/test_logger.hpp" +#include "flow/util/util.hpp" +#include "flow/common.hpp" +#include +#include + +/* Tests for the per-Server_socket accept-backlog limit. This limit protects a listening server against a SYN-flood + * scenario (too many connections being half-opened or fully-opened-but-not-yet-accepted, tying up resources). The + * actual mechanism is implemented in Node::handle_syn_to_listening_server() and is driven by the option + * Node_options::m_dyn_accept_backlog_limit. */ + +namespace flow::net_flow::test +{ + +namespace +{ +using flow::test::Test_logger; +using flow::util::Ip_address_v4; +using flow::util::Udp_endpoint; +namespace chrono = boost::chrono; +} // Anonymous namespace + +// A trivial guard against silently huge changes to the default; basically we don't want it to be something massive. +TEST(Net_flow_backlog, Default_value) +{ + const Node_options defaults; + using lim_t = decltype(Node_options::m_dyn_accept_backlog_limit); + EXPECT_TRUE(flow::util::in_closed_range(lim_t(32), defaults.m_dyn_accept_backlog_limit, lim_t(128))) + << "Default should be something sane; [" << defaults.m_dyn_accept_backlog_limit << "] is not what " + "what we had envisioned -- so just look into it, and then fix the test or the default."; + + Test_logger logger; + try + { + Node node{&logger, Udp_endpoint{Ip_address_v4::loopback(), 0}}; + EXPECT_EQ(defaults.m_dyn_accept_backlog_limit, node.options().m_dyn_accept_backlog_limit) + << "This is pretty paranoid, but we're briefly checking that indeed Node::options() defaults to Node_options{}, " + "for this setting at least."; // Not a substitute for testing the options submodule of NetFlow. + } + catch (const std::exception& exc) + { + FAIL() << "Unexpected exception: [" << exc.what() << "]."; + } +} + +/* End-to-end exercise of the backlog limit. + * + * Strategy: bring up a server Node with a deliberately small backlog (smaller than the default 64, so this test runs + * fast and is unambiguous). Never call sync_accept() on it, so every successful handshake accumulates in the + * server's unaccepted-backlog. Then, from a single client Node, issue back-to-back sync_connect()s: + * - The first `backlog` should each succeed (they fill the backlog). + * - Each subsequent attempt should fail quickly with S_CONN_REFUSED: the server answers the SYN with RST without + * allocating a Peer_socket. (Fast = before the first SYN retransmit, which by default is 125ms apart; we allow + * generous slack for test-env scheduling.) + * + * Single client Node (not N client Nodes): the backlog check is a pure count -- it does not care about source UDP + * endpoint -- so N Nodes would exercise the same code path as one. + * + * @todo If we wanted to test it exhaustively as a black box, as opposed to a pragmatic check, then we could try various + * combinations of connects from the same Node versus multiple Nodes. + * + * @todo This exercises only the `m_unaccepted_socks.size()` side of the backlog count -- i.e., connections that + * fully handshook and sit unaccepted. The equally-relevant `m_connecting_socks.size()` side (SYN_RCVD half-opens, + * which is the real SYN-flood threat model) is not covered. For a hardening-grade suite, fill the backlog with + * half-opens via Net_env_simulator: pass a server-side simulator with a deterministic Packet_loss_seq that keeps + * incoming SYNs and drops incoming SYN_ACK_ACKs (pattern `{false, true, false, true, ..., false}`). Also raise the + * server's `m_dyn_sock_opts.m_st_connect_retransmit_period` beyond the test's wall-clock duration so the + * server does not retransmit SYN_ACK (which would prompt the client to re-send SYN_ACK_ACK past the end of the + * loss sequence, transitioning the server out of SYN_RCVD and defeating the setup). The `BACKLOG + 1`st SYN should + * still be RSTed identically to the current test. As above, we're going for a pragmatic check here, not an + * exhaustive blackbox-stressing thing for the time being. */ +TEST(Net_flow_backlog, Excess_syns_rejected) +{ + using chrono::milliseconds; + using chrono::seconds; + using chrono::round; + using std::vector; + + constexpr unsigned int BACKLOG = 3; + constexpr unsigned int EXTRA = 3; // Attempts beyond the backlog that we expect to be rejected. + constexpr flow_port_t LOCAL_FLOW_PORT = 50; + /* We expect the server-side RST to reach the client within well under the SYN-retransmit period (125ms). Loopback + * round-trip should be orders of magnitude below that. */ + constexpr auto REJECT_MAX_PERIOD = seconds{1}; + /* Give handshakes plenty of headroom. The effective cap is the protocol handshake timeout + * (m_st_connect_retransmit_timeout, default 3s); we just want sync_connect()'s user timeout to not fire first. */ + constexpr auto CONNECT_USER_TIMEOUT = seconds{10}; + + Test_logger logger; + + // Server. + Node_options srv_opts; + srv_opts.m_dyn_accept_backlog_limit = BACKLOG; + + /* nullptr/omitted Error_code* => throw. Nothing should throw until we start getting the RSTs on reaching BACKLOG, + * and those we handle inline below. Hence: one outer try/catch with FAIL() in the catch for unexpected throws. */ + try + { + Node srv{&logger, Udp_endpoint{Ip_address_v4::loopback(), 0}, nullptr, nullptr, srv_opts}; + ASSERT_TRUE(srv.running()); + + Server_socket::Ptr listener = srv.listen(LOCAL_FLOW_PORT); + ASSERT_TRUE(listener); + /* Deliberately never sync_accept() on `listener`: successful handshakes sit in the unaccepted-backlog, which is + * precisely what we want to fill. */ + + // Client (one Node, reused). + Node cli{&logger, Udp_endpoint{Ip_address_v4::loopback(), 0}}; + ASSERT_TRUE(cli.running()); + + const Remote_endpoint target{srv.local_low_lvl_endpoint(), LOCAL_FLOW_PORT}; + + // Hold onto the good sockets so they don't close (and thus free backlog slots) before we probe the overflow. + vector accepted; + accepted.reserve(BACKLOG); + + for (unsigned int idx = 0; idx != BACKLOG; ++idx) + { + FLOW_TEST_TRACE_CTX("Connect [", idx, "] (within backlog)."); + accepted.emplace_back(cli.sync_connect(target, CONNECT_USER_TIMEOUT)); + ASSERT_TRUE(accepted.back()); + } + + for (unsigned int idx = 0; idx != EXTRA; ++idx) + { + FLOW_TEST_TRACE_CTX("Connect [", BACKLOG + idx, "] (beyond backlog)."); + const auto from_when = Fine_clock::now(); + try + { + const auto sock = cli.sync_connect(target, CONNECT_USER_TIMEOUT); + ADD_FAILURE() << "Beyond-backlog connect unexpectedly succeeded (socket truthy? = " + "[" << bool(sock) << "])."; + } + catch (const flow::error::Runtime_error& exc) + { + EXPECT_EQ(exc.code(), error::Code::S_CONN_REFUSED) << "Threw unexpected error: [" << exc.what() << "]."; + } + const auto elapsed_period = Fine_clock::now() - from_when; + EXPECT_LT(round(elapsed_period), REJECT_MAX_PERIOD) + << "Rejection took [" << round(elapsed_period) << "] (expected < [" << REJECT_MAX_PERIOD << "])."; + } + } // try + catch (const std::exception& exc) + { + FAIL() << "Unexpected exception: [" << exc.what() << "]."; + } +} // TEST(Net_flow_backlog, excess_syns_rejected) + +} // namespace flow::net_flow::test diff --git a/src/flow/net_flow/test/data_in_syn_rcvd_test.cpp b/src/flow/net_flow/test/data_in_syn_rcvd_test.cpp new file mode 100644 index 000000000..191edc76b --- /dev/null +++ b/src/flow/net_flow/test/data_in_syn_rcvd_test.cpp @@ -0,0 +1,205 @@ +/* Flow + * Copyright 2023 Akamai Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in + * compliance with the License. You may obtain a copy + * of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in + * writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing + * permissions and limitations under the License. */ + +#include "flow/net_flow/node.hpp" +#include "flow/net_flow/endpoint.hpp" +#include "flow/net_flow/info.hpp" +#include "flow/net_flow/options.hpp" +#include "flow/net_flow/peer_socket.hpp" +#include "flow/net_flow/server_socket.hpp" +#include "flow/net_flow/net_env_simulator.hpp" +#include "flow/net_flow/error/error.hpp" +#include "flow/log/buffer_logger.hpp" +#include "flow/log/config.hpp" +#include "flow/test/test_common_util.hpp" +#include "flow/test/test_logger.hpp" +#include "flow/util/util.hpp" +#include "flow/common.hpp" +#include +#include +#include +#include +#include +#include + +/* Guardrail for the server-side DATA-in-SYN_RCVD buffer limit. Background: + * + * While the server sits in SYN_RCVD (SYN sent, SYN_ACK_ACK not yet received), DATA packets may + * already arrive from the peer (re-ordering, or peer-already-ESTABLISHED-while-our-SYN_ACK_ACK-lost). + * handle_data_to_syn_rcvd() buffers them, subject to + * Peer_socket_options::m_st_rcv_sync_rcvd_data_q_cumulative_max_size -- a defense against + * SYN-flood-style resource exhaustion where an attacker parks the server in SYN_RCVD and then keeps + * feeding DATA. This test verifies that cap is enforced. + * + * Approach: use Net_env_simulator on the server side to drop exactly one SYN_ACK_ACK (the first one + * the server would receive), which keeps the server in SYN_RCVD while the client -- which has + * already seen the SYN_ACK and believes itself ESTABLISHED -- happily sync_send()s DATA that then + * arrives at the server-in-SYN_RCVD. Once we've delivered enough DATA to overflow the cap, the + * server's SYN_ACK retransmit (bumped to ~500ms to give us comfortable headroom) prompts a fresh + * SYN_ACK_ACK from the client; the Net_env_simulator loss-sequence is exhausted by then, so that 2nd one + * passes, the handshake completes, and the socket becomes acceptable. + * + * Post-accept, we read Peer_socket_info::m_rcv.m_total_data_{size|count} off the server socket. + * DATA dropped by the SYN_RCVD cap never reaches that accounting; DATA that fit in + * the queue is replayed into the rcv pipeline on the + * SYN_RCVD=>ESTABLISHED transition and counted once there. So m_total_data_size measures + * precisely "bytes that survived the cap" -- must be <= CAP if the cap is working, and > 0 if the + * SYN_RCVD path was actually exercised. + * + * Precondition witness: an INFO log line from handle_data_to_syn_rcvd() ("first time? = [1]") + * confirms DATA really did arrive in SYN_RCVD -- without this, a too-fast handshake would silently + * invalidate the whole test. + * + * Depends on a specific INFO log line for the precondition check; if that line's format + * changes, it'll fail and encourage updating the regex. + * + * Also depends on the assumption that the 2nd received packet at the + * server is the SYN_ACK_ACK (i.e., Packet_loss_seq is per-received-packet-index, not per-type). */ + +namespace flow::net_flow::test +{ + +namespace +{ +using flow::test::Test_logger; +using flow::log::Buffer_logger; +using flow::log::Config; +using flow::log::Sev; +using flow::util::Ip_address_v4; +using flow::util::Udp_endpoint; +namespace chrono = boost::chrono; +using boost::regex; +using boost::regex_search; +using std::vector; +using std::cerr; +using std::flush; +} // namespace (anon) + +TEST(Net_flow_syn_rcvd_data, Limit_enforced_on_overflow) +{ + constexpr flow_port_t LOCAL_FLOW_PORT = 50; + constexpr size_t CAP = 1000; // Server SYN_RCVD-buffer byte cap. + constexpr size_t CHUNK_SZ = 500; // Per sync_send() -- two fit under CAP, rest overflow. + constexpr unsigned int N_CHUNKS = 5; + constexpr auto RETX_PERIOD = chrono::milliseconds{500}; // Server's SYN_ACK retransmit pause. + constexpr auto CONNECT_TIMEOUT = chrono::seconds{10}; + constexpr auto SEND_TIMEOUT = chrono::seconds{1}; + constexpr auto ACCEPT_TIMEOUT = chrono::seconds{10}; + + // INFO-level logging so our "first DATA in SYN_RCVD" precondition-witness line is captured. + Config log_cfg{Sev::S_INFO}; + log_cfg.init_component_to_union_idx_mapping + (100, Config::standard_component_payload_enum_sparse_length(), true); + log_cfg.init_component_names(S_FLOW_LOG_COMPONENT_NAME_MAP, false, "flow-"); + Buffer_logger logger{&log_cfg}; + + // Any return (including failed ASSERT()s) will run the thing (very helpful to figure out what happened). + const auto cleanup = util::setup_auto_cleanup([&]() + { + if (::testing::Test::HasFailure()) + { + cerr << "=== Buffered log follows ===\n" << logger.buffer_str() << '\n' << flush; + } + }); + + Node_options srv_opts; + srv_opts.m_dyn_sock_opts.m_st_rcv_sync_rcvd_data_q_cumulative_max_size = CAP; + // Long enough to send all our DATA before server retransmits SYN_ACK (which would promptly end SYN_RCVD). + srv_opts.m_dyn_sock_opts.m_st_connect_retransmit_period = RETX_PERIOD; + + /* Server-side simulator: drop exactly the 2nd received packet (the first SYN_ACK_ACK), keep the + * rest. Beyond the loss_seq, fallback loss prob=0 => pass. That second SYN_ACK_ACK (from the server's + * ~500ms SYN_ACK retransmit prompting the client to re-ACK) is what ultimately closes out the + * handshake. */ + Net_env_simulator::Packet_loss_seq loss_seq; + loss_seq.push(false); // SYN. + loss_seq.push(true); // SYN_ACK_ACK #1 -- dropped. + + // @todo Some unwise design in 2011 by me (ygoldfel). Make it take unique_ptr&&, or make Net_env_simulator movable. + const auto srv_sim = new Net_env_simulator{&logger, 0, 0.0, loss_seq}; + + size_t n_delivered_bytes = 0; + size_t n_delivered_pkts = 0; + + try + { + { + Node srv{&logger, Udp_endpoint{Ip_address_v4::loopback(), 0}, srv_sim, nullptr, srv_opts}; + ASSERT_TRUE(srv.running()); + Server_socket::Ptr listener = srv.listen(LOCAL_FLOW_PORT); + ASSERT_TRUE(listener); + + Node cli{&logger, Udp_endpoint{Ip_address_v4::loopback(), 0}}; + ASSERT_TRUE(cli.running()); + + const Remote_endpoint target{srv.local_low_lvl_endpoint(), LOCAL_FLOW_PORT}; + auto cli_sock = cli.sync_connect(target, CONNECT_TIMEOUT); + ASSERT_TRUE(cli_sock); + // Client thinks ESTABLISHED; server is in SYN_RCVD (simulator dropped its SYN_ACK_ACK). + + const vector payload(CHUNK_SZ, uint8_t{0xAB}); + for (unsigned int idx = 0; idx != N_CHUNKS; ++idx) + { + FLOW_TEST_TRACE_CTX("Chunk [", idx, "]."); + const size_t sent = cli_sock->sync_send(boost::asio::buffer(payload), SEND_TIMEOUT); + ASSERT_EQ(sent, payload.size()); + } + + /* Retransmitted SYN_ACK (~RETX_PERIOD later) prompts client's 2nd SYN_ACK_ACK, which the + * simulator now passes => server -> ESTABLISHED, listener becomes acceptable. */ + auto srv_sock = listener->sync_accept(ACCEPT_TIMEOUT); + ASSERT_TRUE(srv_sock); + + /* info().m_rcv.m_total_data_{size|count} = bytes/packets that survived the cap (replayed from the + * queue into the rcv pipeline). Dropped-by-cap DATA never reaches this accumulator; see top comment + * comment for why this is what we want. */ + const auto info = srv_sock->info(); + n_delivered_bytes = info.m_rcv.m_total_data_size; + n_delivered_pkts = info.m_rcv.m_total_data_count; + } // Both Nodes destroyed; worker threads joined => buffer_str() safe below. + } + catch (const std::exception& exc) + { + FAIL() << "Unexpected exception: [" << exc.what() << "]."; + } + + // Cap is hard: bytes that survived the cap (= everything the server's rcv pipeline ultimately accepted) must not exceed it. + EXPECT_LE(n_delivered_bytes, CAP) + << "Bytes delivered through the server rcv pipeline [" << n_delivered_bytes << "] exceed cap [" << CAP << "]; " + "cap was not enforced."; + // Some DATA must have made it through -- otherwise the test didn't actually exercise the SYN_RCVD path. + EXPECT_GE(n_delivered_bytes, CHUNK_SZ) + << "No DATA delivered (total = [" << n_delivered_bytes << "]); " + "server probably was not in SYN_RCVD when DATA arrived."; + // Not *all* chunks got through -- otherwise cap was not enforced. + EXPECT_LT(n_delivered_pkts, N_CHUNKS) + << "All [" << N_CHUNKS << "] chunks were delivered (count = [" << n_delivered_pkts << "]); cap was not enforced."; + + /* Precondition witness: the INFO-level line from handle_data_to_syn_rcvd() fired with + * `first time? = [1|true]`, confirming DATA really did hit SYN_RCVD (and not, e.g., ESTABLISHED + * because the handshake completed too quickly). Permissive regex: specific log fields may change + * formatting, but these markers should persist. */ + const regex first_data_in_syn_rcvd + {R"(received \[[^\]]+\] packet while in \[[^\]]*SYN_RCVD[^\]]*\][^\n]*first time\? = \[(1|true)\])"}; + EXPECT_TRUE(regex_search(logger.buffer_str(), first_data_in_syn_rcvd)) + << "Log did not contain the expected 'first DATA in SYN_RCVD' INFO line -- server may not have actually been " + "in SYN_RCVD when DATAs arrived. Log format may have changed; update the regex."; + + +} // TEST(Net_flow_syn_rcvd_data, Limit_enforced_on_overflow) + +} // namespace flow::net_flow::test diff --git a/src/flow/net_flow/test/rng_test.cpp b/src/flow/net_flow/test/rng_test.cpp new file mode 100644 index 000000000..d84daa99b --- /dev/null +++ b/src/flow/net_flow/test/rng_test.cpp @@ -0,0 +1,205 @@ +/* Flow + * Copyright 2023 Akamai Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in + * compliance with the License. You may obtain a copy + * of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in + * writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing + * permissions and limitations under the License. */ + +#include "flow/test/test_logger.hpp" +#include "flow/test/test_common_util.hpp" +#include "flow/net_flow/detail/port_space.hpp" +#include "flow/net_flow/detail/seq_num.hpp" +#include "flow/net_flow/node.hpp" +#include "flow/net_flow/endpoint.hpp" +#include "flow/net_flow/peer_socket.hpp" +#include "flow/net_flow/server_socket.hpp" +#include "flow/net_flow/error/error.hpp" +#include "flow/log/buffer_logger.hpp" +#include "flow/log/config.hpp" +#include "flow/util/util.hpp" +#include "flow/common.hpp" +#include +#include +#include +#include +#include + +/* Pragmatic seed-diversity tests for NetFlow's three RNG-driven primitives: ephemeral-port reservation (Port_space), + * initial sequence number generation (Sequence_number::Generator), and connection security token (picked internally + * during handshake, observable via INFO-level log output). + * + * We do *not* attempt to prove randomness quality. We only check that independent, + * freshly constructed instances do not produce identical values -- i.e., that nobody accidentally seeded them all + * from a constant, or from a too-coarse clock, or process-wide-only (where a single-process test would still see + * collisions if the generator is re-seeded from a fixed constant on each instantiation): N fresh instances, one + * value apiece, ensure all are distinct. Collision probability in the random-but-properly-seeded case: + * - ephemeral port (~60K-element space, N=10): ~7e-4 for ANY dup. Non-negligible. Accommodated by allowing up + * to 1 duplicate in that test only (which drops false-positive rate to ~1e-6; still catches the realistic + * failure modes of "all identical" or "small-group collisions from coarse seeding"). + * - ISN (64-bit), security token: collision probability vanishing; strict all-distinct check. + * So a failure here is an actual seed-diversity bug, not bad luck. */ + +namespace flow::net_flow::test +{ + +namespace +{ +using flow::test::Test_logger; +using flow::log::Buffer_logger; +using flow::log::Config; +using flow::log::Sev; +using flow::util::Ip_address_v4; +using flow::util::Udp_endpoint; +namespace chrono = boost::chrono; +using boost::regex; +using boost::smatch; +using boost::regex_search; +using std::vector; +using std::string; +using std::set; + +/// Number of independent instances to sample per test. See file-level comment for collision analysis. +constexpr unsigned int N = 10; + +/** + * Shared distinctness check: given N values, EXPECTs that at most `allowed_dups` are duplicates (i.e., at least + * `vals.size() - allowed_dups` are unique). Typically `allowed_dups == 0` (strict all-distinct). + */ +template +void expect_mostly_distinct(const vector& vals, unsigned int allowed_dups = 0) +{ + /* Use () and not {} for the iterator-pair ctor: though `set{begin, end}` happens to dispatch correctly here + * (iterators aren't convertible to T), () removes any doubt when initializer_list and iterator-pair overloads + * could in principle compete. */ + const set uniq(vals.begin(), vals.end()); + EXPECT_GE(uniq.size() + allowed_dups, vals.size()) + << "Expected at least [" << (vals.size() - allowed_dups) << "] unique of [" << vals.size() << "]; got [" + << uniq.size() << "] unique. All identical? = [" << (uniq.size() == 1) << "]."; +} + +} // namespace (anon) + +// Fresh Port_space instances should pick different random ephemeral ports. +TEST(Net_flow_rng, Port_space_ephemeral) +{ + Test_logger logger; + + vector ports; + ports.reserve(N); + for (unsigned int idx = 0; idx != N; ++idx) + { + Port_space port_spc{&logger}; + /* A fresh Port_space has ~60K free ports, so no error expected. + * Per documentation, as an internal API this one expects non-null Error_code; it won't throw. */ + Error_code err_code; + ports.push_back(port_spc.reserve_ephemeral_port(&err_code)); + ASSERT_FALSE(err_code) << "reserve_ephemeral_port() must succeed; failed: " + "[" << err_code << "] [" << err_code.message() << "]."; + } + + // Port space is only 16-bit; tolerate 1 dup in N=10 -- see file-level comment for probability analysis. + expect_mostly_distinct(ports, 1); +} + +// Fresh Sequence_number::Generator instances should pick different initial sequence numbers. +TEST(Net_flow_rng, Isn_generator) +{ + Test_logger logger; + + vector isns; + isns.reserve(N); + for (unsigned int idx = 0; idx != N; ++idx) + { + Sequence_number::Generator gen{&logger}; + isns.push_back(gen.generate_init_seq_num()); + } + + expect_mostly_distinct(isns); +} + +/* End-to-end check of the security token (as used during a real handshake) via log-scraping. + * + * Security token is not exposed via any public accessor; it is logged at INFO on the active-connect side + * on receipt of SYN_ACK. + * + * Raw ISNs are not prominently logged, so we don't check them end-to-end; the direct + * Sequence_number::Generator check in Net_flow_rng.Isn_generator should be sufficient. + * + * We use one Buffer_logger per session (not a shared one) so the regex only ever sees output from the pair under + * test -- avoids cross-pair contamination and races on buffer_str() across Node threads (which would keep logging + * in background... which we could avoid... but it's just easier to not deal with it). + * + * @warning This depends on the exact log format, so test could break, if someone changes the line. Usually that + * stuff is self-explanatory to fix. */ +TEST(Net_flow_rng, Security_token_via_logs) +{ + constexpr flow_port_t LOCAL_FLOW_PORT = 50; + constexpr auto CONNECT_USER_TIMEOUT = chrono::seconds{1}; + + Config log_cfg{Sev::S_INFO}; + log_cfg.init_component_to_union_idx_mapping + (100, Config::standard_component_payload_enum_sparse_length(), true); + log_cfg.init_component_names(S_FLOW_LOG_COMPONENT_NAME_MAP, false, "flow-"); + + /* Match the single active-connect-continuation INFO line, capturing the security token. Anchored on + * "continuing active-connect" (not just "security token") so we don't accidentally match any unrelated + * [...]-bracketed substring; passive-connect also has a "security token" field but no "continuing + * active-connect". */ + const regex line_re + {R"(continuing active-connect of [^\n]*?security token \[([^\]]+)\])"}; + + vector tokens; + tokens.reserve(N); + + try + { + for (unsigned int idx = 0; idx != N; ++idx) + { + FLOW_TEST_TRACE_CTX("Session [", idx, "]."); + + Buffer_logger logger{&log_cfg}; + { + Node srv{&logger, Udp_endpoint{Ip_address_v4::loopback(), 0}}; + ASSERT_TRUE(srv.running()); + Server_socket::Ptr listener = srv.listen(LOCAL_FLOW_PORT); + ASSERT_TRUE(listener); + + Node cli{&logger, Udp_endpoint{Ip_address_v4::loopback(), 0}}; + ASSERT_TRUE(cli.running()); + + /* The active-connect-continuation log line we key on fires from the client's worker thread upon SYN_ACK + * receipt, synchronously before sync_connect() returns -- so when sync_connect() returns successfully, + * the line has been appended to the buffer. */ + const Remote_endpoint target{srv.local_low_lvl_endpoint(), LOCAL_FLOW_PORT}; + auto sock = cli.sync_connect(target, CONNECT_USER_TIMEOUT); + ASSERT_TRUE(sock); + } // Both Nodes (and the Peer_socket) destroyed here; worker threads joined => no concurrent logging below. + + // Per Buffer_logger docs, buffer_str() is only safe to read when no other thread is logging -- hence the block. + const auto& log = logger.buffer_str(); + smatch m; + ASSERT_TRUE(regex_search(log, m, line_re)) + << "Log did not contain the expected active-connect-continuation line with security token. " + "Log format may have changed; update the regex."; + tokens.emplace_back(m[1].str()); + } + } + catch (const std::exception& exc) + { + FAIL() << "Unexpected exception: [" << exc.what() << "]."; // Connection failures, etc. + } + + expect_mostly_distinct(tokens); +} // TEST(Net_flow_rng, Security_token_via_logs) + +} // namespace flow::net_flow::test diff --git a/src/flow/test/test_common_util.hpp b/src/flow/test/test_common_util.hpp index 0e39bcde7..0005108a4 100644 --- a/src/flow/test/test_common_util.hpp +++ b/src/flow/test/test_common_util.hpp @@ -17,12 +17,30 @@ #pragma once +#include #include #include #include #include #include +/** + * Adds the caller's source location (file:line) to any gtest assertion failure within the enclosing + * `{ }` block, via `SCOPED_TRACE("")`. Use when file:line is enough context; otherwise see + * FLOW_TEST_TRACE_CTX() or plain `SCOPED_TRACE("something useful")`. + * + * Typical usage: + * `{ FLOW_TEST_TRACE(); some_checking_helper(args); }` + */ +#define FLOW_TEST_TRACE() SCOPED_TRACE("") + +/** + * Like FLOW_TEST_TRACE() but also prepends arbitrary context built from the given `<<`-streamable arguments. + * + * Example: `FLOW_TEST_TRACE_CTX("Session idx [", idx, "].");`. + */ +#define FLOW_TEST_TRACE_CTX(...) SCOPED_TRACE(::flow::util::ostream_op_string(__VA_ARGS__)) + namespace flow::test { diff --git a/test/suite/unit_test/CMakeLists.txt b/test/suite/unit_test/CMakeLists.txt index b323009c1..ab119bb2c 100644 --- a/test/suite/unit_test/CMakeLists.txt +++ b/test/suite/unit_test/CMakeLists.txt @@ -26,6 +26,9 @@ set(SRCS ${PROJECT_SOURCE_DIR}/src/flow/test/test_file_util.cpp ${PROJECT_SOURCE_DIR}/src/flow/log/detail/test/component_cfg_test.cpp ${PROJECT_SOURCE_DIR}/src/flow/log/test/log_test.cpp + ${PROJECT_SOURCE_DIR}/src/flow/net_flow/test/backlog_test.cpp + ${PROJECT_SOURCE_DIR}/src/flow/net_flow/test/data_in_syn_rcvd_test.cpp + ${PROJECT_SOURCE_DIR}/src/flow/net_flow/test/rng_test.cpp ${PROJECT_SOURCE_DIR}/src/flow/util/test/blob_test.cpp ${PROJECT_SOURCE_DIR}/src/flow/util/test/linked_hash_test.cpp ${PROJECT_SOURCE_DIR}/src/flow/util/test/thread_lcl_test.cpp