Skip to content

Commit c0058dd

Browse files
authored
dfp: Don't filter IP address families from the DNS cache (envoyproxy#39037)
In Mobile, whenever there is a network change, there's a check to see if there is IPv6 connectivity on the current network. If there is no IPv6 connectivity and the `dns_cache_set_ip_version_to_remove` runtime guard is true, the IPv6 addresses would get filtered out from the DNS lookup response. This is problematic because if there is a network change and the device now has IPv6 connectivity, the DNS cache will have filtered out the IPv6 addresses already and won't be able to use them to connect to the peer. This change fixes the issue by storing all resolved IP addresses in the DNS cache, and only filtering out the IP address families that don't have connectivity in the DFP cluster. This has the further benefit of avoiding having to re-resolve DNS host entries whenever there is a network change. The change is guarded by `envoy.reloadable_features.dns_cache_filter_unusable_ip_version`, which defaults to false. Risk Level: low (runtime guarded) Testing: Unit/Integration Docs Changes: n/a Release Notes: n/a Platform Specific Features: n/a Runtime guard: envoy.reloadable_features.dns_cache_filter_unusable_ip_version --------- Signed-off-by: Ali Beyad <abeyad@google.com>
1 parent e523276 commit c0058dd

11 files changed

Lines changed: 281 additions & 113 deletions

File tree

mobile/library/common/internal_engine.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,9 @@ void InternalEngine::handleNetworkChange(const int network_type, const bool has_
381381
envoy_netconf_t configuration =
382382
Network::ConnectivityManagerImpl::setPreferredNetwork(network_type);
383383
if (Runtime::runtimeFeatureEnabled(
384-
"envoy.reloadable_features.dns_cache_set_ip_version_to_remove")) {
384+
"envoy.reloadable_features.dns_cache_set_ip_version_to_remove") ||
385+
Runtime::runtimeFeatureEnabled(
386+
"envoy.reloadable_features.dns_cache_filter_unusable_ip_version")) {
385387
// The IP version to remove flag must be set first before refreshing the DNS cache so that
386388
// the DNS cache will be updated with whether or not the IPv6 addresses will need to be
387389
// removed.

mobile/test/common/integration/client_integration_test.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1452,6 +1452,21 @@ TEST_P(ClientIntegrationTest, OnNetworkChanged) {
14521452
}
14531453
}
14541454

1455+
// This test is simply to test the IPv6 connectivity check and DNS refresh and make sure the code
1456+
// doesn't crash. It doesn't really test the actual network change event, but it does ensure that
1457+
// requests/responses still work in the presence of IP version filtering.
1458+
TEST_P(ClientIntegrationTest, OnNetworkChangedFilterUnsableIps) {
1459+
builder_.addRuntimeGuard("dns_cache_set_ip_version_to_remove", false);
1460+
builder_.addRuntimeGuard("dns_cache_filter_unusable_ip_version", true);
1461+
builder_.setDisableDnsRefreshOnNetworkChange(true);
1462+
initialize();
1463+
internalEngine()->onDefaultNetworkChanged(1);
1464+
basicTest();
1465+
if (upstreamProtocol() == Http::CodecType::HTTP1) {
1466+
ASSERT_EQ(cc_.on_complete_received_byte_count_, 67);
1467+
}
1468+
}
1469+
14551470
// This test is simply to test the IPv6 connectivity check and DNS refresh and make sure the code
14561471
// doesn't crash. It doesn't really test the actual network change event.
14571472
TEST_P(ClientIntegrationTest, OnNetworkChangeEvent) {

source/common/runtime/runtime_features.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ FALSE_RUNTIME_GUARD(envoy_reloadable_features_reresolve_if_no_connections);
139139
FALSE_RUNTIME_GUARD(envoy_restart_features_xds_failover_support);
140140
// TODO(fredyw): evaluate and either make this a config knob or remove.
141141
FALSE_RUNTIME_GUARD(envoy_reloadable_features_dns_cache_set_ip_version_to_remove);
142+
// TODO(abeyad): evaluate and either make this the default or remove.
143+
FALSE_RUNTIME_GUARD(envoy_reloadable_features_dns_cache_filter_unusable_ip_version);
142144
// TODO(alyssawilk): evaluate and make this a config knob or remove.
143145
FALSE_RUNTIME_GUARD(envoy_reloadable_features_reset_brokenness_on_nework_change);
144146
// TODO(alyssawilk): evaluate and make this a config knob or remove.

source/extensions/clusters/dynamic_forward_proxy/cluster.cc

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -288,17 +288,15 @@ absl::Status Cluster::addOrUpdateHost(
288288
// cluster would create multiple logical hosts based on those addresses.
289289
// We will leave this is a follow up depending on need.
290290
ASSERT(host_info == host_map_it->second.shared_host_info_);
291-
ASSERT(host_map_it->second.shared_host_info_->address() !=
292-
host_map_it->second.logical_host_->address());
293291
ENVOY_LOG(debug, "updating dfproxy cluster host address '{}'", host);
294292
host_map_it->second.logical_host_->setNewAddresses(
295-
host_info->address(), host_info->addressList(), dummy_lb_endpoint_);
293+
host_info->address(), host_info->addressList(/*filtered=*/true), dummy_lb_endpoint_);
296294
return absl::OkStatus();
297295
}
298296

299297
ENVOY_LOG(debug, "adding new dfproxy cluster host '{}'", host);
300298
auto host_or_error = Upstream::LogicalHost::create(
301-
info(), std::string{host}, host_info->address(), host_info->addressList(),
299+
info(), std::string{host}, host_info->address(), host_info->addressList(/*filtered=*/true),
302300
dummy_locality_lb_endpoint_, dummy_lb_endpoint_, nullptr, time_source_);
303301
RETURN_IF_NOT_OK_REF(host_or_error.status());
304302

source/extensions/common/dynamic_forward_proxy/dns_cache.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,13 @@ class DnsHostInfo {
5050
/**
5151
* Returns the host's currently resolved address. These addresses may change periodically due to
5252
* async re-resolution.
53+
*
54+
* If `filtered` is true and the runtime guard
55+
* `envoy.reloadable_features.dns_cache_filter_unusable_ip_version` is true, return a filtered
56+
* list where the IP addresses of IP families unsupported on the current network are removed.
5357
*/
54-
virtual std::vector<Network::Address::InstanceConstSharedPtr> addressList() const PURE;
58+
virtual std::vector<Network::Address::InstanceConstSharedPtr>
59+
addressList(bool filtered) const PURE;
5560

5661
/**
5762
* Returns the host that was actually resolved via DNS. If port was originally specified it will
@@ -282,6 +287,11 @@ class DnsCache {
282287
*/
283288
virtual void setIpVersionToRemove(absl::optional<Network::Address::IpVersion> ip_version) PURE;
284289

290+
/**
291+
* Gets the `IpVersion` addresses to be removed from the DNS response.
292+
*/
293+
virtual absl::optional<Network::Address::IpVersion> getIpVersionToRemove() PURE;
294+
285295
/**
286296
* Stops the DNS cache background tasks by canceling the pending queries and stopping the timeout
287297
* and refresh timers. This function can be useful when the network is unavailable, such as when

source/extensions/common/dynamic_forward_proxy/dns_cache_impl.cc

Lines changed: 166 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -345,8 +345,40 @@ void DnsCacheImpl::forceRefreshHosts() {
345345
}
346346

347347
void DnsCacheImpl::setIpVersionToRemove(absl::optional<Network::Address::IpVersion> ip_version) {
348+
bool has_changed = false;
349+
{
350+
absl::MutexLock lock{&ip_version_to_remove_lock_};
351+
has_changed = ip_version_to_remove_ != ip_version;
352+
ip_version_to_remove_ = ip_version;
353+
}
354+
355+
if (has_changed && Runtime::runtimeFeatureEnabled(
356+
"envoy.reloadable_features.dns_cache_filter_unusable_ip_version")) {
357+
// The IP version to remove has changed, so we need to refresh all logical hosts in the DFP
358+
// cluster so they filter out the unsupported/unusable IP addresses from their address list.
359+
absl::ReaderMutexLock reader_lock{&primary_hosts_lock_};
360+
for (auto& primary_host : primary_hosts_) {
361+
for (auto* callbacks : update_callbacks_) {
362+
auto status = callbacks->callbacks_.onDnsHostAddOrUpdate(primary_host.first,
363+
primary_host.second->host_info_);
364+
if (!status.ok()) {
365+
// TODO(abeyad): Do something better with a failure status.
366+
ENVOY_LOG(warn, "Failed to update DFP host after IP version update due to {}",
367+
status.message());
368+
}
369+
}
370+
}
371+
ENVOY_LOG(debug, "refresh all {} logical hosts in host map, unsupported IP version {}",
372+
primary_hosts_.size(),
373+
ip_version.has_value()
374+
? (*ip_version == Network::Address::IpVersion::v4 ? "v4" : "v6")
375+
: "none");
376+
}
377+
}
378+
379+
absl::optional<Network::Address::IpVersion> DnsCacheImpl::getIpVersionToRemove() {
348380
absl::MutexLock lock{&ip_version_to_remove_lock_};
349-
ip_version_to_remove_ = ip_version;
381+
return ip_version_to_remove_;
350382
}
351383

352384
void DnsCacheImpl::stop() {
@@ -464,13 +496,6 @@ void DnsCacheImpl::finishResolve(const std::string& host,
464496
}
465497
}
466498

467-
// If the DNS resolver successfully resolved with an empty response list, the dns cache does not
468-
// update. This ensures that a potentially previously resolved address does not stabilize back to
469-
// 0 hosts.
470-
const auto new_address =
471-
!response.empty() ? Network::Utility::getAddressWithPort(
472-
*(response.front().addrInfo().address_), primary_host_info->port_)
473-
: nullptr;
474499
auto address_list = DnsUtils::generateAddressList(response, primary_host_info->port_);
475500
// Only the change the address if:
476501
// 1) The new address is valid &&
@@ -487,10 +512,14 @@ void DnsCacheImpl::finishResolve(const std::string& host,
487512
}
488513
std::chrono::seconds dns_ttl =
489514
std::chrono::duration_cast<std::chrono::seconds>(refresh_interval_);
490-
if (new_address) {
515+
516+
// If the DNS resolver successfully resolved with an empty response list, the dns cache does not
517+
// update. This ensures that a potentially previously resolved address does not stabilize back to
518+
// 0 hosts.
519+
if (!address_list.empty()) {
491520
// Update the cache entry and staleness any time the ttl changes.
492521
if (!from_cache) {
493-
addCacheEntry(host, new_address, address_list, response.front().addrInfo().ttl_);
522+
addCacheEntry(host, address_list, response.front().addrInfo().ttl_);
494523
}
495524
// Arbitrarily cap DNS re-resolution at min_refresh_interval_ to avoid constant DNS queries.
496525
dns_ttl = std::max<std::chrono::seconds>(
@@ -499,22 +528,23 @@ void DnsCacheImpl::finishResolve(const std::string& host,
499528
primary_host_info->host_info_->updateStale(resolution_time.value(), dns_ttl);
500529
}
501530

502-
bool changed_to_non_null_address =
503-
(new_address != nullptr &&
504-
(current_address == nullptr || *current_address != *new_address ||
505-
DnsUtils::listChanged(address_list, primary_host_info->host_info_->addressList())));
531+
bool should_update_cache =
532+
!address_list.empty() &&
533+
DnsUtils::listChanged(address_list,
534+
primary_host_info->host_info_->addressList(/*filtered=*/false));
506535
// If this was a proxy lookup it's OK to send a null address resolution as
507536
// long as this isn't a transition from non-null to null address.
508-
bool proxying_and_didnt_unresolve = is_proxy_lookup && !current_address;
537+
should_update_cache |= is_proxy_lookup && !current_address;
509538

510-
if (changed_to_non_null_address || proxying_and_didnt_unresolve) {
539+
if (should_update_cache) {
540+
primary_host_info->host_info_->setAddresses(std::move(address_list), details_with_maybe_trace,
541+
status);
511542
ENVOY_LOG_EVENT(debug, "dns_cache_update_address",
512543
"host '{}' address has changed from {} to {}", host,
513544
current_address ? current_address->asStringView() : "<empty>",
514-
new_address ? new_address->asStringView() : "<empty>");
515-
primary_host_info->host_info_->setAddresses(new_address, std::move(address_list));
516-
primary_host_info->host_info_->setDetails(details_with_maybe_trace);
517-
primary_host_info->host_info_->setResolutionStatus(status);
545+
primary_host_info->host_info_->address()
546+
? primary_host_info->host_info_->address()->asStringView()
547+
: "<empty>");
518548

519549
absl::Status host_status = runAddUpdateCallbacks(host, primary_host_info->host_info_);
520550
ENVOY_BUG(host_status.ok(),
@@ -616,8 +646,7 @@ DnsCacheImpl::PrimaryHostInfo::PrimaryHostInfo(DnsCacheImpl& parent,
616646
: parent_(parent), port_(port),
617647
refresh_timer_(parent.main_thread_dispatcher_.createTimer(refresh_timer_cb)),
618648
timeout_timer_(parent.main_thread_dispatcher_.createTimer(timeout_timer_cb)),
619-
host_info_(std::make_shared<DnsHostInfoImpl>(parent.main_thread_dispatcher_.timeSource(),
620-
host_to_resolve, is_ip_address)),
649+
host_info_(std::make_shared<DnsHostInfoImpl>(parent, host_to_resolve, is_ip_address)),
621650
failure_backoff_strategy_(
622651
Config::Utility::prepareDnsRefreshStrategy<
623652
envoy::extensions::common::dynamic_forward_proxy::v3::DnsCacheConfig>(
@@ -632,23 +661,18 @@ DnsCacheImpl::PrimaryHostInfo::~PrimaryHostInfo() {
632661
}
633662

634663
void DnsCacheImpl::addCacheEntry(
635-
const std::string& host, const Network::Address::InstanceConstSharedPtr& address,
664+
const std::string& host,
636665
const std::vector<Network::Address::InstanceConstSharedPtr>& address_list,
637666
const std::chrono::seconds ttl) {
638-
if (!key_value_store_) {
667+
if (!key_value_store_ || address_list.empty()) {
639668
return;
640669
}
641670
MonotonicTime now = main_thread_dispatcher_.timeSource().monotonicTime();
642671
uint64_t seconds_since_epoch =
643672
std::chrono::duration_cast<std::chrono::seconds>(now.time_since_epoch()).count();
644-
std::string value;
645-
if (address_list.empty()) {
646-
value = absl::StrCat(address->asString(), "|", ttl.count(), "|", seconds_since_epoch);
647-
} else {
648-
value = absl::StrJoin(address_list, "\n", [&](std::string* out, const auto& addr) {
649-
absl::StrAppend(out, addr->asString(), "|", ttl.count(), "|", seconds_since_epoch);
650-
});
651-
}
673+
std::string value = absl::StrJoin(address_list, "\n", [&](std::string* out, const auto& addr) {
674+
absl::StrAppend(out, addr->asString(), "|", ttl.count(), "|", seconds_since_epoch);
675+
});
652676
key_value_store_->addOrUpdate(host, value, absl::nullopt);
653677
}
654678

@@ -730,6 +754,116 @@ void DnsCacheImpl::loadCacheEntries(
730754
key_value_store_->iterate(load);
731755
}
732756

757+
DnsCacheImpl::DnsHostInfoImpl::DnsHostInfoImpl(DnsCacheImpl& parent,
758+
absl::string_view resolved_host, bool is_ip_address)
759+
: parent_(parent), resolved_host_(resolved_host), is_ip_address_(is_ip_address),
760+
stale_at_time_(parent_.main_thread_dispatcher_.timeSource().monotonicTime()) {
761+
touch();
762+
}
763+
764+
Network::Address::InstanceConstSharedPtr DnsCacheImpl::DnsHostInfoImpl::address() const {
765+
const bool filter_unusable_ips = Runtime::runtimeFeatureEnabled(
766+
"envoy.reloadable_features.dns_cache_filter_unusable_ip_version");
767+
absl::optional<Network::Address::IpVersion> ip_version_to_remove = parent_.getIpVersionToRemove();
768+
absl::ReaderMutexLock lock{&resolve_lock_};
769+
for (const auto& address : address_list_) {
770+
// If not filtering unusable IPs, OR if there is no IP version to remove, OR if the address is
771+
// not of the IP family to remove, use the address. This means if the
772+
// `dns_cache_filter_unusable_ip_version` feature is off OR there is no set IP family to remove,
773+
// the first address in the list will automatically be returned.
774+
if (!filter_unusable_ips || !ip_version_to_remove ||
775+
address->ip()->version() != *ip_version_to_remove) {
776+
return address;
777+
}
778+
}
779+
// If no address was returned yet, return the first address in the list, if any.
780+
return !address_list_.empty() ? address_list_.front() : nullptr;
781+
}
782+
783+
std::vector<Network::Address::InstanceConstSharedPtr>
784+
DnsCacheImpl::DnsHostInfoImpl::addressList(const bool filtered) const {
785+
if (filtered && Runtime::runtimeFeatureEnabled(
786+
"envoy.reloadable_features.dns_cache_filter_unusable_ip_version")) {
787+
auto ip_version_to_remove = parent_.getIpVersionToRemove();
788+
if (ip_version_to_remove.has_value()) {
789+
std::vector<Network::Address::InstanceConstSharedPtr> ret;
790+
absl::ReaderMutexLock lock{&resolve_lock_};
791+
for (const auto& address : address_list_) {
792+
if (address->ip()->version() != *ip_version_to_remove) {
793+
ret.push_back(address);
794+
}
795+
}
796+
return ret;
797+
}
798+
}
799+
std::vector<Network::Address::InstanceConstSharedPtr> ret;
800+
absl::ReaderMutexLock lock{&resolve_lock_};
801+
ret = address_list_;
802+
return ret;
803+
}
804+
805+
const std::string& DnsCacheImpl::DnsHostInfoImpl::resolvedHost() const { return resolved_host_; }
806+
807+
bool DnsCacheImpl::DnsHostInfoImpl::isIpAddress() const { return is_ip_address_; }
808+
809+
void DnsCacheImpl::DnsHostInfoImpl::touch() {
810+
last_used_time_ = parent_.main_thread_dispatcher_.timeSource().monotonicTime().time_since_epoch();
811+
}
812+
813+
void DnsCacheImpl::DnsHostInfoImpl::updateStale(MonotonicTime resolution_time,
814+
std::chrono::seconds ttl) {
815+
stale_at_time_ = resolution_time + ttl;
816+
}
817+
818+
bool DnsCacheImpl::DnsHostInfoImpl::isStale() {
819+
return parent_.main_thread_dispatcher_.timeSource().monotonicTime() >
820+
static_cast<MonotonicTime>(stale_at_time_);
821+
}
822+
823+
void DnsCacheImpl::DnsHostInfoImpl::setAddresses(
824+
std::vector<Network::Address::InstanceConstSharedPtr>&& list, absl::string_view details,
825+
Network::DnsResolver::ResolutionStatus resolution_status) {
826+
absl::WriterMutexLock lock{&resolve_lock_};
827+
address_list_ = std::move(list);
828+
details_ = details;
829+
resolution_status_ = resolution_status;
830+
}
831+
832+
void DnsCacheImpl::DnsHostInfoImpl::setDetails(absl::string_view details) {
833+
absl::WriterMutexLock lock{&resolve_lock_};
834+
details_ = details;
835+
}
836+
837+
std::string DnsCacheImpl::DnsHostInfoImpl::details() {
838+
absl::ReaderMutexLock lock{&resolve_lock_};
839+
return details_;
840+
}
841+
842+
std::chrono::steady_clock::duration DnsCacheImpl::DnsHostInfoImpl::lastUsedTime() const {
843+
return last_used_time_.load();
844+
}
845+
846+
bool DnsCacheImpl::DnsHostInfoImpl::firstResolveComplete() const {
847+
absl::ReaderMutexLock lock{&resolve_lock_};
848+
return first_resolve_complete_;
849+
}
850+
851+
void DnsCacheImpl::DnsHostInfoImpl::setFirstResolveComplete() {
852+
absl::WriterMutexLock lock{&resolve_lock_};
853+
first_resolve_complete_ = true;
854+
}
855+
856+
void DnsCacheImpl::DnsHostInfoImpl::setResolutionStatus(
857+
Network::DnsResolver::ResolutionStatus resolution_status) {
858+
absl::WriterMutexLock lock{&resolve_lock_};
859+
resolution_status_ = resolution_status;
860+
}
861+
862+
Network::DnsResolver::ResolutionStatus DnsCacheImpl::DnsHostInfoImpl::resolutionStatus() const {
863+
absl::WriterMutexLock lock{&resolve_lock_};
864+
return resolution_status_;
865+
}
866+
733867
} // namespace DynamicForwardProxy
734868
} // namespace Common
735869
} // namespace Extensions

0 commit comments

Comments
 (0)