Skip to content

Commit b8f4832

Browse files
ammarioclaude
andcommitted
fix: use DNAT to intercept all DNS queries
Previously, httpjail attempted to control DNS by manipulating /etc/resolv.conf via /etc/netns/<namespace>/ directories. This approach was broken because: 1. The auto-bind-mount feature of `ip netns` fails when /etc/resolv.conf is a symlink (common on systemd systems) 2. Created persistent resources (/etc/netns/ directories) that could leak 3. Depended on the host's /etc/resolv.conf configuration This commit removes all DNS file manipulation (~200 lines) and instead uses nftables DNAT to intercept ALL DNS queries at the network layer: - Add DNAT rule: `udp dport 53 dnat to {host_ip}` - DNS queries to any nameserver (8.8.8.8, 1.1.1.1, etc.) are transparently redirected to our dummy DNS server - No mounts, no persistent files, completely independent of host config - Simple, robust, portable across all Linux systems Changes: - nftables.rs: Add DNS DNAT rule in namespace output chain - mod.rs: Remove fix_systemd_resolved_dns() and ensure_namespace_dns() - resources.rs: Remove NamespaceConfig resource - mod.rs: Remove namespace_config field from LinuxJail struct All 23 integration tests pass on ci-1. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 1fadbda commit b8f4832

3 files changed

Lines changed: 7 additions & 272 deletions

File tree

src/jail/linux/mod.rs

Lines changed: 1 addition & 219 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use super::JailConfig;
1010
use crate::sys_resource::ManagedResource;
1111
use anyhow::{Context, Result};
1212
use dns::DummyDnsServer;
13-
use resources::{NFTable, NamespaceConfig, NetworkNamespace, VethPair};
13+
use resources::{NFTable, NetworkNamespace, VethPair};
1414
use std::process::{Command, ExitStatus};
1515
use std::sync::{Arc, Mutex};
1616
use tracing::{debug, info, warn};
@@ -65,7 +65,6 @@ pub struct LinuxJail {
6565
config: JailConfig,
6666
namespace: Option<ManagedResource<NetworkNamespace>>,
6767
veth_pair: Option<ManagedResource<VethPair>>,
68-
namespace_config: Option<ManagedResource<NamespaceConfig>>,
6968
nftables: Option<ManagedResource<NFTable>>,
7069
dns_server: Option<Arc<Mutex<DummyDnsServer>>>,
7170
// Per-jail computed networking (unique /30 inside 10.99/16)
@@ -83,7 +82,6 @@ impl LinuxJail {
8382
config,
8483
namespace: None,
8584
veth_pair: None,
86-
namespace_config: None,
8785
nftables: None,
8886
dns_server: None,
8987
host_ip,
@@ -210,10 +208,6 @@ impl LinuxJail {
210208
let namespace_name = self.namespace_name();
211209
let veth_ns = self.veth_ns();
212210

213-
// Ensure DNS is properly configured in the namespace
214-
// This is a fallback in case the bind mount didn't work
215-
self.ensure_namespace_dns()?;
216-
217211
// Format the host IP once
218212
let host_ip = format_ip(self.host_ip);
219213

@@ -370,212 +364,6 @@ impl LinuxJail {
370364
Ok(())
371365
}
372366

373-
/// Fix DNS resolution in network namespaces
374-
///
375-
/// ## The DNS Problem
376-
///
377-
/// Network namespaces have isolated network stacks, including their own loopback.
378-
/// When we create a namespace, it gets a copy of /etc/resolv.conf from the host.
379-
///
380-
/// Common issues:
381-
/// 1. **systemd-resolved**: Points to 127.0.0.53 which doesn't exist in the namespace
382-
/// 2. **Local DNS**: Any local DNS resolver (127.0.0.1, etc.) won't be accessible
383-
/// 3. **Corporate DNS**: Internal DNS servers might not be reachable from the namespace
384-
/// 4. **CI environments**: Often have minimal or no DNS configuration
385-
///
386-
/// ## Why We Can't Route Loopback Traffic to the Host
387-
///
388-
/// You might think: "Just route 127.0.0.0/8 from the namespace to the host!"
389-
/// This doesn't work due to Linux kernel security:
390-
///
391-
/// 1. **Martian Packet Protection**: The kernel considers packets with 127.x.x.x
392-
/// addresses coming from non-loopback interfaces as "martian" (impossible/spoofed)
393-
/// 2. **Source Address Validation**: Even with rp_filter=0, the kernel won't accept
394-
/// 127.x.x.x packets from external interfaces
395-
/// 3. **Built-in Security**: This is hardcoded in the kernel's IP stack for security -
396-
/// loopback addresses should NEVER appear on the network
397-
///
398-
/// Even if we tried:
399-
/// - `ip route add 127.0.0.53/32 via 10.99.X.1` - packets get dropped
400-
/// - `nftables DNAT` to rewrite 127.0.0.53 -> host IP - happens too late
401-
/// - Disabling rp_filter - doesn't help with loopback addresses
402-
///
403-
/// ## Our Solution
404-
///
405-
/// Instead of fighting the kernel's security measures, we:
406-
/// 1. Always create a custom resolv.conf for the namespace
407-
/// 2. Use public DNS servers (Google's 8.8.8.8 and 8.8.4.4)
408-
/// 3. These DNS queries go out through our veth pair and work normally
409-
///
410-
/// **IMPORTANT**: `ip netns add` automatically bind-mounts files from
411-
/// /etc/netns/<namespace-name>/ to /etc/ inside the namespace when the namespace
412-
/// is created. We MUST create /etc/netns/<namespace-name>/resolv.conf BEFORE
413-
/// creating the namespace for this to work. This overrides /etc/resolv.conf
414-
/// ONLY for processes running in the namespace. The host's /etc/resolv.conf
415-
/// remains completely untouched.
416-
///
417-
/// This is simpler, more reliable, and doesn't compromise security.
418-
fn fix_systemd_resolved_dns(&mut self) -> Result<()> {
419-
let namespace_name = self.namespace_name();
420-
421-
// Always create namespace config resource and custom resolv.conf
422-
// This ensures DNS works in all environments, not just systemd-resolved
423-
info!(
424-
"Setting up DNS for namespace {} with custom resolv.conf",
425-
namespace_name
426-
);
427-
428-
// Ensure /etc/netns/<namespace>/ directory exists
429-
let netns_namespace_dir = format!("/etc/netns/{}", namespace_name);
430-
431-
// Use mkdir -p to ensure directory exists (more robust than Rust's create_dir_all)
432-
let mkdir_output = Command::new("mkdir")
433-
.args(["-p", &netns_namespace_dir])
434-
.output()
435-
.context("Failed to execute mkdir")?;
436-
437-
if !mkdir_output.status.success() {
438-
anyhow::bail!(
439-
"Failed to create directory {}: {}",
440-
netns_namespace_dir,
441-
String::from_utf8_lossy(&mkdir_output.stderr)
442-
);
443-
}
444-
445-
// Verify directory exists
446-
if !std::path::Path::new(&netns_namespace_dir).is_dir() {
447-
anyhow::bail!(
448-
"Directory {} does not exist after creation",
449-
netns_namespace_dir
450-
);
451-
}
452-
453-
debug!("Created directory: {}", netns_namespace_dir);
454-
455-
// Write custom resolv.conf that will be bind-mounted into the namespace
456-
// Point directly to the host's veth IP where our DNS server listens
457-
let resolv_conf_path = format!("{}/resolv.conf", netns_namespace_dir);
458-
let host_ip = format_ip(self.host_ip);
459-
let resolv_conf_content = format!(
460-
"# Custom DNS for httpjail namespace\n\
461-
# Points to dummy DNS server on host to prevent exfiltration\n\
462-
nameserver {}\n",
463-
host_ip
464-
);
465-
std::fs::write(&resolv_conf_path, &resolv_conf_content)
466-
.context("Failed to write namespace-specific resolv.conf")?;
467-
468-
info!(
469-
"Created namespace-specific resolv.conf at {} pointing to local DNS server",
470-
resolv_conf_path
471-
);
472-
473-
// Verify the file was created
474-
if !std::path::Path::new(&resolv_conf_path).exists() {
475-
anyhow::bail!("Failed to create resolv.conf at {}", resolv_conf_path);
476-
}
477-
478-
// Create namespace config resource for cleanup tracking
479-
// IMPORTANT: Create this AFTER writing resolv.conf to ensure the file exists
480-
self.namespace_config = Some(ManagedResource::<NamespaceConfig>::create(
481-
&self.config.jail_id,
482-
)?);
483-
484-
Ok(())
485-
}
486-
487-
/// Ensure DNS works in the namespace by copying resolv.conf if needed
488-
#[allow(clippy::collapsible_if)]
489-
fn ensure_namespace_dns(&self) -> Result<()> {
490-
let namespace_name = self.namespace_name();
491-
492-
// Check if DNS is already working by testing /etc/resolv.conf in namespace
493-
let check_cmd = Command::new("ip")
494-
.args(["netns", "exec", &namespace_name, "cat", "/etc/resolv.conf"])
495-
.output();
496-
497-
let needs_fix = if let Ok(output) = check_cmd {
498-
if !output.status.success() {
499-
info!("Cannot read /etc/resolv.conf in namespace, will fix DNS");
500-
true
501-
} else {
502-
let content = String::from_utf8_lossy(&output.stdout);
503-
// Check if it's pointing to systemd-resolved or is empty
504-
if content.is_empty() || content.contains("127.0.0.53") {
505-
info!("DNS points to systemd-resolved or is empty in namespace, will fix");
506-
true
507-
} else if content.contains("nameserver") {
508-
info!("DNS already configured in namespace {}", namespace_name);
509-
false
510-
} else {
511-
info!("No nameserver found in namespace resolv.conf, will fix");
512-
true
513-
}
514-
}
515-
} else {
516-
info!("Failed to check DNS in namespace, will attempt fix");
517-
true
518-
};
519-
520-
if !needs_fix {
521-
return Ok(());
522-
}
523-
524-
// DNS not working, try to fix it by copying a working resolv.conf
525-
info!(
526-
"Fixing DNS in namespace {} by copying resolv.conf",
527-
namespace_name
528-
);
529-
530-
// Setup DNS for the namespace
531-
// Create a temporary resolv.conf before running the nsenter command
532-
let temp_dir = crate::jail::get_temp_dir();
533-
std::fs::create_dir_all(&temp_dir).ok();
534-
let temp_resolv = temp_dir
535-
.join(format!("httpjail_resolv_{}.conf", &namespace_name))
536-
.to_string_lossy()
537-
.to_string();
538-
// Use the host veth IP where our dummy DNS server listens
539-
let host_ip = format_ip(self.host_ip);
540-
let dns_content = format!("nameserver {}\n", host_ip);
541-
std::fs::write(&temp_resolv, &dns_content)
542-
.with_context(|| format!("Failed to create temp resolv.conf: {}", temp_resolv))?;
543-
544-
// SAFE FALLBACK: Update the /etc/netns/<name>/resolv.conf file
545-
// This avoids dangerous operations inside the namespace that could escape isolation.
546-
//
547-
// IMPORTANT: We do NOT use bind mounts inside namespaces because:
548-
// 1. `ip netns exec` only enters the network namespace, NOT the mount namespace
549-
// 2. Bind mounting /etc/resolv.conf (which is a symlink) in the host mount namespace
550-
// will follow the symlink and corrupt /run/systemd/resolve/stub-resolv.conf on the HOST
551-
// 3. This breaks DNS for the entire system, not just the namespace
552-
//
553-
// Instead, we update /etc/netns/<name>/resolv.conf which should have been automatically
554-
// bind-mounted by the kernel when the namespace was created.
555-
let netns_resolv_path = format!("/etc/netns/{}/resolv.conf", namespace_name);
556-
557-
match std::fs::write(&netns_resolv_path, &dns_content) {
558-
Ok(_) => {
559-
info!(
560-
"Updated namespace-specific resolv.conf at {}",
561-
netns_resolv_path
562-
);
563-
}
564-
Err(e) => {
565-
warn!(
566-
"Failed to update {}: {}. DNS may not work in namespace. \
567-
This is safe but the namespace will not have working DNS.",
568-
netns_resolv_path, e
569-
);
570-
}
571-
}
572-
573-
// Clean up temp file
574-
let _ = std::fs::remove_file(&temp_resolv);
575-
576-
Ok(())
577-
}
578-
579367
/// Start the dummy DNS server in the namespace
580368
fn start_dns_server(&mut self) -> Result<()> {
581369
let namespace_name = self.namespace_name();
@@ -619,10 +407,6 @@ impl Jail for LinuxJail {
619407
// Check for root access
620408
Self::check_root()?;
621409

622-
// Fix DNS BEFORE creating namespace so bind mount works
623-
// The /etc/netns/<namespace>/ directory must exist before namespace creation
624-
self.fix_systemd_resolved_dns()?;
625-
626410
// Create network namespace
627411
self.create_namespace()?;
628412

@@ -768,7 +552,6 @@ impl Jail for LinuxJail {
768552
// When these go out of scope, they will clean themselves up
769553
let _namespace = ManagedResource::<NetworkNamespace>::for_existing(jail_id);
770554
let _veth = ManagedResource::<VethPair>::for_existing(jail_id);
771-
let _config = ManagedResource::<NamespaceConfig>::for_existing(jail_id);
772555
let _nftables = ManagedResource::<NFTable>::for_existing(jail_id);
773556

774557
Ok(())
@@ -783,7 +566,6 @@ impl Clone for LinuxJail {
783566
config: self.config.clone(),
784567
namespace: None,
785568
veth_pair: None,
786-
namespace_config: None,
787569
nftables: None,
788570
dns_server: None,
789571
host_ip: self.host_ip,

src/jail/linux/nftables.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,14 @@ table ip {table_name} {{
123123
let ruleset = format!(
124124
r#"
125125
table ip {table_name} {{
126-
# NAT output chain: redirect HTTP/HTTPS to host proxy
126+
# NAT output chain: redirect HTTP/HTTPS/DNS to host
127127
chain output {{
128128
type nat hook output priority -100; policy accept;
129129
130+
# Redirect all DNS queries to our dummy DNS server on host
131+
# This works regardless of what nameserver is in /etc/resolv.conf
132+
udp dport 53 dnat to {host_ip}
133+
130134
# Redirect HTTP to proxy running on host
131135
tcp dport 80 dnat to {host_ip}:{http_port}
132136
@@ -141,7 +145,7 @@ table ip {table_name} {{
141145
# Always allow established/related traffic
142146
ct state established,related accept
143147
144-
# Allow DNS traffic directly to the host (UDP only)
148+
# Allow DNS traffic to the host (after DNAT redirection)
145149
ip daddr {host_ip} udp dport 53 accept
146150
147151
# Allow traffic to the host proxy ports after DNAT

src/jail/linux/resources.rs

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -145,57 +145,6 @@ impl SystemResource for VethPair {
145145
}
146146
}
147147

148-
/// Namespace configuration directory (/etc/netns/<namespace>)
149-
pub struct NamespaceConfig {
150-
path: String,
151-
created: bool,
152-
}
153-
154-
impl SystemResource for NamespaceConfig {
155-
fn create(jail_id: &str) -> Result<Self> {
156-
let namespace_name = format!("httpjail_{}", jail_id);
157-
let path = format!("/etc/netns/{}", namespace_name);
158-
159-
// Create directory if needed
160-
if !std::path::Path::new(&path).exists() {
161-
std::fs::create_dir_all(&path)
162-
.context("Failed to create namespace config directory")?;
163-
debug!("Created namespace config directory: {}", path);
164-
}
165-
166-
Ok(Self {
167-
path,
168-
created: true,
169-
})
170-
}
171-
172-
fn cleanup(&mut self) -> Result<()> {
173-
if !self.created {
174-
return Ok(());
175-
}
176-
177-
if std::path::Path::new(&self.path).exists() {
178-
if let Err(e) = std::fs::remove_dir_all(&self.path) {
179-
// Log but don't fail
180-
debug!("Failed to remove namespace config directory: {}", e);
181-
} else {
182-
debug!("Removed namespace config directory: {}", self.path);
183-
}
184-
}
185-
186-
self.created = false;
187-
Ok(())
188-
}
189-
190-
fn for_existing(jail_id: &str) -> Self {
191-
let namespace_name = format!("httpjail_{}", jail_id);
192-
Self {
193-
path: format!("/etc/netns/{}", namespace_name),
194-
created: true,
195-
}
196-
}
197-
}
198-
199148
/// NFTable resource wrapper for a jail
200149
pub struct NFTable {
201150
#[allow(dead_code)]

0 commit comments

Comments
 (0)