Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -201,18 +201,32 @@ jobs:
uses: reactivecircus/android-emulator-runner@v2
env:
ANDROID_NDK_HOME: ${{ steps.setup-ndk.outputs.ndk-path }}
RUST_LOG: trace
with:
api-level: ${{ matrix.api-level }}
arch: x86_64
target: google_apis
force-avd-creation: false
emulator-options: -no-window -no-audio -no-boot-anim -gpu swiftshader_indirect
# `-dns-server` + `-netfast` + `-no-metrics` are the combination
# reported to give working public-internet connectivity on the
# GH-hosted runner; without them `wlan0` stays NO-CARRIER and
# every connect() to a public IP fails with ENETUNREACH. See
# https://github.com/ReactiveCircus/android-emulator-runner/issues/348#issuecomment-2578082030
emulator-options: -no-window -no-audio -no-boot-anim -gpu swiftshader_indirect -dns-server 8.8.8.8 -netfast -no-metrics
disable-animations: true
# cargo-ndk pushes each test binary to /data/local/tmp on the
# emulator and runs it via adb.
script: |
adb wait-for-device
adb shell 'i=0; while [ -z "$(getprop sys.boot_completed | tr -d "\r")" ]; do i=$((i+1)); if [ $i -gt 300 ]; then echo "boot did not complete within 600s"; exit 1; fi; sleep 2; done'
# `sys.boot_completed=1` only signals that Android finished
# booting, not that the radio/Wi-Fi stack is up. Poll until
# the kernel actually has a route to a public IP, otherwise
# any test that hits the network fails with ENETUNREACH.
adb shell 'i=0; while ! ip route get 8.8.8.8 >/dev/null 2>&1; do i=$((i+1)); if [ $i -gt 60 ]; then echo "no route to 8.8.8.8 after 60s"; ip addr; ip route; exit 1; fi; sleep 1; done'
echo "=== ip addr ===" && adb shell ip addr
echo "=== ip route ===" && adb shell ip route
echo "=== net.dns1 ===" && adb shell getprop net.dns1
cargo ndk test -p iroh-base --all-features
cargo ndk test -p iroh-dns --features tls-ring
cargo ndk test -p iroh-relay --features tls-ring,metrics
Expand Down
46 changes: 32 additions & 14 deletions iroh-dns/tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,13 @@

use std::time::Duration;

#[cfg(target_os = "android")]
use iroh_dns::dns::DnsProtocol;
use iroh_dns::dns::DnsResolver;

const TIMEOUT: Duration = Duration::from_secs(8);
const HOST: &str = "dns.iroh.link";

#[tokio::test]
async fn resolver_constructs_without_panic() {
let _resolver = DnsResolver::new();
}

// Ignored on Android: in the GitHub-hosted emulator the public DNS
// fallback's hickory connection pool repeatedly returns
// "no connections available" within ~30 ms, well before the 8s
// per-lookup timeout, so a resolution that works locally fails in
// CI. Tracking the actual fix separately; see Frando/android-dns-fix.
#[cfg_attr(
target_os = "android",
ignore = "flaky on emulator (no connections available)"
)]
#[tokio::test]
async fn resolver_resolves_dns_iroh_link() {
let resolver = DnsResolver::new();
Expand Down Expand Up @@ -54,3 +42,33 @@ async fn resolver_resolves_dns_iroh_link() {
);
eprintln!("{HOST} resolved to: {hits:?}");
}

/// Resolves through the Android emulator's QEMU NAT DNS proxy.
///
/// 10.0.2.3 is the well-known emulator DNS gateway, documented at
/// <https://developer.android.com/studio/run/emulator-networking>.
/// Pointing the resolver at it explicitly sidesteps the missing
/// system-DNS reader (no JNI context here) so this test exercises
/// hickory's pool, sockets, and our `DnsResolver` plumbing against a
/// nameserver that is always reachable inside the emulator,
/// independent of whether public DNS is reachable on the runner.
#[cfg(target_os = "android")]
#[tokio::test]
async fn resolves_via_emulator_dns_proxy() {
let nameserver = "10.0.2.3:53".parse().unwrap();
let resolver = DnsResolver::builder()
.with_nameserver(nameserver, DnsProtocol::Udp)
.build();

let addrs: Vec<_> = resolver
.lookup_ipv4(HOST, TIMEOUT)
.await
.expect("IPv4 lookup via 10.0.2.3 should succeed in the emulator")
.collect();

assert!(
!addrs.is_empty(),
"expected at least one A record for {HOST} via 10.0.2.3",
);
eprintln!("{HOST} resolved via 10.0.2.3 to: {addrs:?}");
}
9 changes: 0 additions & 9 deletions iroh/src/socket.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2370,15 +2370,6 @@ mod tests {
Ok(())
}

// Skipped on Android: the GitHub-hosted emulator's network stack
// returns EADDRINUSE long enough after force_network_change() that
// the rebind here fails and the subsequent connect() never wakes
// the connection driver. Locally on a real emulator this passes,
// so the test is only ignored under cfg(target_os = "android").
#[cfg_attr(
target_os = "android",
ignore = "rebind flakes against the GitHub Android emulator"
)]
#[tokio::test]
#[traced_test]
async fn test_regression_network_change_rebind_wakes_connection_driver() -> Result {
Expand Down
66 changes: 58 additions & 8 deletions iroh/src/socket/transports/ip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,23 @@ use std::{
net::{IpAddr, SocketAddr, SocketAddrV4, SocketAddrV6},
num::NonZeroUsize,
pin::Pin,
sync::Arc,
sync::{Arc, Mutex},
task::{Context, Poll},
time::Duration,
};

use ipnet::{Ipv4Net, Ipv6Net};
use n0_future::task::AbortOnDropHandle;
use n0_watcher::Watchable;
use netwatch::{UdpSender, UdpSocket};
use pin_project::pin_project;
use tracing::{debug, info, trace};
use tokio::time;
use tracing::{debug, info, trace, warn};

/// Total budget for retrying a rebind that fails with `EADDRINUSE`.
const REBIND_RETRY_ATTEMPTS: u32 = 12;
/// Delay between rebind attempts that failed with `EADDRINUSE`.
const REBIND_RETRY_DELAY: Duration = Duration::from_millis(250);

use super::{Addr, Transmit};
use crate::metrics::{EndpointMetrics, SocketMetrics};
Expand Down Expand Up @@ -242,6 +250,7 @@ impl IpTransport {
IpNetworkChangeSender {
socket: self.socket.clone(),
local_addr: self.local_addr.clone(),
rebind_task: Default::default(),
}
}

Expand All @@ -259,17 +268,58 @@ impl IpTransport {
pub(super) struct IpNetworkChangeSender {
socket: Arc<UdpSocket>,
local_addr: Watchable<SocketAddr>,
rebind_task: Mutex<Option<AbortOnDropHandle<()>>>,
}

impl IpNetworkChangeSender {
pub(super) fn rebind(&self) -> io::Result<()> {
let old_addr = self.local_addr.get();
self.socket.rebind()?;
let addr = self.socket.local_addr()?;
self.local_addr.set(addr).ok();
trace!("rebound from {} to {}", old_addr, addr);

Ok(())
// Clear any previous rebind task.
let mut rebind_task = self.rebind_task.lock().expect("poisoned");
*rebind_task = None;
// Try to rebind immediately.
match self.socket.rebind() {
Ok(()) => {
let addr = self.socket.local_addr()?;
self.local_addr.set(addr).ok();
trace!("rebound from {} to {}", old_addr, addr);
Ok(())
}
Err(err) if err.kind() == io::ErrorKind::AddrInUse => {
let socket = self.socket.clone();
let local_addr = self.local_addr.clone();
let fut = async move {
let mut attempt = 0;
loop {
match socket.rebind() {
Ok(()) => break,
Err(err)
if err.kind() == io::ErrorKind::AddrInUse
&& attempt < REBIND_RETRY_ATTEMPTS =>
{
attempt += 1;
debug!(
?err,
attempt, "rebind hit EADDRINUSE on {old_addr}, retrying"
);
time::sleep(REBIND_RETRY_DELAY).await;
}
Err(err) => {
warn!("rebinding IP transport failed: {err:#}");
return;
}
}
}
if let Ok(addr) = socket.local_addr() {
local_addr.set(addr).ok();
trace!("rebound from {} to {}", old_addr, addr);
}
};
*rebind_task = Some(AbortOnDropHandle::new(n0_future::task::spawn(fut)));
Ok(())
}
Err(err) => Err(err),
}
}

pub(super) fn on_network_change(&self, _info: &crate::socket::Report) {
Expand Down
2 changes: 0 additions & 2 deletions iroh/tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ use wasm_bindgen_test::wasm_bindgen_test as test;

const ECHO_ALPN: &[u8] = b"echo";

// Skipped on Android: Test is flaky in the emulator.
#[cfg_attr(target_os = "android", ignore = "flaky against staging from emulator")]
#[test]
async fn simple_endpoint_id_based_connection_transfer() -> Result {
std::panic::set_hook(Box::new(console_error_panic_hook::hook));
Expand Down