Skip to content

Commit 169f75d

Browse files
authored
Provision HTTPS certificates for Core and Proxy (#263)
* acme flow * move acme trigger * send acme logs to core * verify domain before acme * remove dead code * Update proto * build temporary testing image * Update grpc.rs * remove temporary workflow * bump packages
1 parent 5de5433 commit 169f75d

File tree

10 files changed

+1091
-78
lines changed

10 files changed

+1091
-78
lines changed

Cargo.lock

Lines changed: 433 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ tower = "0.5"
5454
futures-util = "0.3"
5555
ammonia = "4.1"
5656
chrono = "0.4"
57+
axum-server = { version = "0.8", features = ["tls-rustls"] }
58+
rustls = { version = "0.23", default-features = false, features = [
59+
"aws-lc-rs",
60+
] }
61+
instant-acme = { version = "0.8", features = ["hyper-rustls", "aws-lc-rs"] }
62+
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-native-roots", "json"] }
5763

5864
[build-dependencies]
5965
tonic-prost-build = "0.14"

proto

src/acme.rs

Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
use std::{
2+
collections::HashMap,
3+
sync::{Arc, Mutex},
4+
};
5+
6+
use anyhow::{Context, anyhow};
7+
use axum::{Router, extract::Path, routing::get};
8+
use instant_acme::{
9+
Account, AccountCredentials, ChallengeType, Identifier, LetsEncrypt, NewAccount, NewOrder,
10+
RetryPolicy,
11+
};
12+
use serde::Deserialize;
13+
use tokio::{
14+
net::TcpListener,
15+
sync::{mpsc, oneshot},
16+
};
17+
use tracing::{debug, error, info, warn};
18+
19+
use crate::proto::AcmeStep;
20+
21+
/// Coordinates graceful hand-off of port 80 between the main HTTP server and the ACME task.
22+
///
23+
/// When the proxy's main server is already listening on port 80, the HTTP server loop hands this
24+
/// permit to the ACME task. The task awaits `ready` before it tries to bind port 80 (guaranteeing
25+
/// the main server has fully shut down), and drops `done_tx` (or sends on it) when the temporary
26+
/// challenge listener is closed, so the main server loop can restart.
27+
pub struct Port80Permit {
28+
/// Resolves once the main server has stopped and port 80 is free.
29+
pub ready: oneshot::Receiver<()>,
30+
/// Must be consumed (sent) when the ACME listener has been dropped and port 80 is released.
31+
pub done_tx: oneshot::Sender<()>,
32+
}
33+
34+
/// Result of a successful ACME HTTP-01 certificate issuance.
35+
pub struct AcmeCertResult {
36+
pub cert_pem: String,
37+
pub key_pem: String,
38+
/// JSON-serialized `AccountCredentials` for reuse on renewal.
39+
pub account_credentials_json: String,
40+
}
41+
42+
/// Minimal subset of the Cloudflare DoH JSON response we care about.
43+
#[derive(Debug, Deserialize)]
44+
struct DohResponse {
45+
/// DNS response code (0 = NOERROR).
46+
#[serde(rename = "Status")]
47+
status: u32,
48+
/// Answer records; may be absent on NXDOMAIN.
49+
#[serde(rename = "Answer")]
50+
answer: Option<Vec<serde_json::Value>>,
51+
}
52+
53+
/// Performs a DNS pre-flight check for `domain` using Cloudflare's DoH endpoint.
54+
///
55+
/// Returns `Ok(())` if the domain resolves to at least one A or AAAA record.
56+
/// Returns `Err(...)` if the lookup fails or the
57+
/// domain does not resolve.
58+
async fn check_domain_resolves(domain: &str) -> anyhow::Result<()> {
59+
let client = reqwest::Client::builder()
60+
.timeout(std::time::Duration::from_secs(10))
61+
.build()
62+
.context("Failed to build HTTP client for DoH pre-flight check")?;
63+
64+
// Try both A and AAAA; succeed as long as either has an answer.
65+
for qtype in &["A", "AAAA"] {
66+
let url = format!("https://1.1.1.1/dns-query?name={domain}&type={qtype}");
67+
let response = client
68+
.get(&url)
69+
.header("Accept", "application/dns-json")
70+
.send()
71+
.await;
72+
73+
match response {
74+
Ok(resp) if resp.status().is_success() => match resp.json::<DohResponse>().await {
75+
Ok(doh) if doh.status == 0 => {
76+
let has_answers = doh.answer.as_ref().is_some_and(|a| !a.is_empty());
77+
if has_answers {
78+
info!("DNS pre-flight: domain '{domain}' resolved ({qtype} record found)");
79+
return Ok(());
80+
}
81+
}
82+
Ok(doh) => {
83+
debug!(
84+
"DNS pre-flight: {qtype} lookup for '{domain}' returned status {}",
85+
doh.status
86+
);
87+
}
88+
Err(e) => {
89+
warn!(
90+
"DNS pre-flight: failed to parse DoH response for '{domain}' ({qtype}): {e}"
91+
);
92+
}
93+
},
94+
Ok(resp) => {
95+
warn!(
96+
"DNS pre-flight: DoH request for '{domain}' ({qtype}) returned HTTP {}",
97+
resp.status()
98+
);
99+
}
100+
Err(e) => {
101+
warn!("DNS pre-flight: DoH request for '{domain}' ({qtype}) failed: {e}");
102+
}
103+
}
104+
}
105+
106+
Err(anyhow!(
107+
"Domain '{domain}' does not resolve to any A or AAAA record. \
108+
Make sure your DNS is configured to point '{domain}' to this server's public IP \
109+
address before obtaining a Let's Encrypt certificate."
110+
))
111+
}
112+
113+
/// Run a full ACME HTTP-01 certificate issuance for the given domain.
114+
///
115+
/// - If `existing_credentials_json` is non-empty, the ACME account is restored from it.
116+
/// - Otherwise a fresh account is created.
117+
/// - A temporary axum server is spun up on port 80 to serve the challenge.
118+
/// - If the proxy's main server is already on port 80, pass `port80_permit` so the function
119+
/// waits until the main server has vacated the port before binding.
120+
/// - Progress steps are sent on `progress_tx` as they happen; send errors are silently ignored
121+
/// - On success, returns the certificate chain PEM, private key PEM, and
122+
/// the (potentially refreshed) account credentials JSON.
123+
pub async fn run_acme_http01(
124+
domain: String,
125+
existing_credentials_json: String,
126+
port80_permit: Option<Port80Permit>,
127+
progress_tx: mpsc::UnboundedSender<AcmeStep>,
128+
) -> anyhow::Result<AcmeCertResult> {
129+
info!("Starting ACME HTTP-01 certificate issuance for domain: {domain}");
130+
info!("Using Let's Encrypt production environment");
131+
132+
// DNS pre-flight: verify the domain resolves before attempting ACME.
133+
let _ = progress_tx.send(AcmeStep::CheckingDomain);
134+
info!("DNS pre-flight check for domain: {domain}");
135+
check_domain_resolves(&domain).await?;
136+
137+
let _ = progress_tx.send(AcmeStep::Connecting);
138+
139+
// Restore or create account.
140+
let (account, credentials) = if existing_credentials_json.is_empty() {
141+
info!("No stored ACME account found; creating a new one with Let's Encrypt");
142+
let builder = Account::builder().context("Failed to create ACME account builder")?;
143+
let dir_url = LetsEncrypt::Production.url().to_owned();
144+
info!("Registering account at ACME directory: {dir_url}");
145+
let (account, credentials) = builder
146+
.create(
147+
&NewAccount {
148+
terms_of_service_agreed: true,
149+
contact: &[],
150+
only_return_existing: false,
151+
},
152+
dir_url,
153+
None,
154+
)
155+
.await
156+
.context("Failed to create ACME account")?;
157+
info!("ACME account registered successfully");
158+
(account, credentials)
159+
} else {
160+
info!("Restoring existing ACME account from stored credentials");
161+
let creds: AccountCredentials = serde_json::from_str(&existing_credentials_json)
162+
.context("Failed to deserialize stored ACME account credentials")?;
163+
let builder = Account::builder().context("Failed to create ACME account builder")?;
164+
let account = builder
165+
.from_credentials(creds)
166+
.await
167+
.context("Failed to restore ACME account from credentials")?;
168+
info!("ACME account restored successfully");
169+
// After restoring there are no new credentials returned - re-serialize the same ones.
170+
let restored_creds: AccountCredentials =
171+
serde_json::from_str(&existing_credentials_json)
172+
.context("Failed to re-deserialize ACME credentials for storage")?;
173+
(account, restored_creds)
174+
};
175+
176+
let account_credentials_json =
177+
serde_json::to_string(&credentials).context("Failed to serialize ACME credentials")?;
178+
179+
let mut order = account
180+
.new_order(&NewOrder::new(&[Identifier::Dns(domain.clone())]))
181+
.await
182+
.context("Failed to create ACME order")?;
183+
info!("ACME order placed for domain: {domain}");
184+
185+
// Collect all (token, key_authorization) pairs we need to serve.
186+
let challenge_map: Arc<Mutex<HashMap<String, String>>> = Arc::new(Mutex::new(HashMap::new()));
187+
188+
// Spin up temporary HTTP server on port 80 to serve challenges BEFORE calling
189+
// set_ready(), so the server is already accepting requests when LE attempts validation.
190+
let map_for_server = Arc::clone(&challenge_map);
191+
let app = Router::new().route(
192+
"/.well-known/acme-challenge/{token}",
193+
get(move |Path(token): Path<String>| {
194+
let map = Arc::clone(&map_for_server);
195+
async move {
196+
let map = map.lock().unwrap();
197+
match map.get(&token) {
198+
Some(key_auth) => {
199+
debug!("Serving ACME challenge for token: {token}");
200+
(
201+
axum::http::StatusCode::OK,
202+
[(axum::http::header::CONTENT_TYPE, "text/plain")],
203+
key_auth.clone(),
204+
)
205+
}
206+
None => {
207+
error!("Unknown ACME challenge token: {token}");
208+
(
209+
axum::http::StatusCode::NOT_FOUND,
210+
[(axum::http::header::CONTENT_TYPE, "text/plain")],
211+
String::new(),
212+
)
213+
}
214+
}
215+
}
216+
}),
217+
);
218+
219+
// If the main HTTP server is on port 80, wait for it to vacate before binding.
220+
// We destructure the permit here so `ready` (a oneshot::Receiver) can be consumed.
221+
let (listener, port80_permit) = if let Some(permit) = port80_permit {
222+
info!("Waiting for main HTTP server to release port 80 before ACME challenge bind");
223+
let _ = permit.ready.await;
224+
info!("Port 80 released by main HTTP server; binding for ACME challenge");
225+
let listener = TcpListener::bind("0.0.0.0:80")
226+
.await
227+
.context("Failed to bind port 80 for ACME HTTP-01 challenge server")?;
228+
(listener, Some(permit.done_tx))
229+
} else {
230+
let listener = TcpListener::bind("0.0.0.0:80")
231+
.await
232+
.context("Failed to bind port 80 for ACME HTTP-01 challenge server")?;
233+
(listener, None::<tokio::sync::oneshot::Sender<()>>)
234+
};
235+
info!("ACME challenge server listening on port 80");
236+
237+
let server_handle = tokio::spawn(async move {
238+
if let Err(err) = axum::serve(listener, app).await {
239+
error!("ACME challenge server error: {err}");
240+
}
241+
});
242+
243+
// Now populate the challenge map and notify LE - server is already up.
244+
let mut authorizations = order.authorizations();
245+
246+
while let Some(result) = authorizations.next().await {
247+
let mut authz = result.context("Failed to retrieve ACME authorization")?;
248+
let mut challenge = authz
249+
.challenge(ChallengeType::Http01)
250+
.ok_or_else(|| anyhow!("ACME server did not offer HTTP-01 challenge"))?;
251+
252+
let token = challenge.token.clone();
253+
let key_auth = challenge.key_authorization().as_str().to_owned();
254+
255+
info!("Preparing HTTP-01 challenge for domain: {domain} (token: {token})");
256+
257+
{
258+
let mut map = challenge_map.lock().unwrap();
259+
map.insert(token, key_auth);
260+
}
261+
262+
challenge
263+
.set_ready()
264+
.await
265+
.context("Failed to signal ACME challenge as ready")?;
266+
info!("HTTP-01 challenge signalled as ready; waiting for Let's Encrypt to validate");
267+
}
268+
269+
// LE will now attempt HTTP-01 validation against our challenge server.
270+
let _ = progress_tx.send(AcmeStep::ValidatingDomain);
271+
info!("Polling Let's Encrypt for domain validation result...");
272+
273+
// Wait for the order to become ready for finalization.
274+
let status = order
275+
.poll_ready(&RetryPolicy::default())
276+
.await
277+
.context("ACME order did not become ready")?;
278+
info!("Domain validation complete, order status: {status:?}");
279+
280+
server_handle.abort();
281+
info!("ACME challenge server shut down; port 80 released");
282+
283+
if let Some(done_tx) = port80_permit {
284+
let _ = done_tx.send(());
285+
}
286+
287+
// Domain validated; finalizing order and retrieving the certificate.
288+
let _ = progress_tx.send(AcmeStep::IssuingCertificate);
289+
info!("Finalizing ACME order and requesting certificate issuance...");
290+
291+
let key_pem = order
292+
.finalize()
293+
.await
294+
.context("Failed to finalize ACME order")?;
295+
info!("ACME order finalized; polling for certificate...");
296+
297+
// Poll until the certificate is issued.
298+
let cert_pem = order
299+
.poll_certificate(&RetryPolicy::default())
300+
.await
301+
.context("Failed to retrieve ACME certificate")?;
302+
303+
info!("ACME certificate issued successfully for domain: {domain}");
304+
305+
Ok(AcmeCertResult {
306+
cert_pem,
307+
key_pem,
308+
account_credentials_json,
309+
})
310+
}

src/config.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ pub struct EnvConfig {
7979
default_value = "/etc/defguard/certs"
8080
)]
8181
pub cert_dir: PathBuf,
82+
83+
/// Port for the HTTPS server. When Core sends TLS certificates over gRPC, the HTTP
84+
/// server is restarted on this port using those certificates.
85+
#[arg(long, env = "DEFGUARD_PROXY_HTTPS_PORT", default_value_t = 443)]
86+
pub https_port: u16,
8287
}
8388

8489
#[derive(thiserror::Error, Debug)]

0 commit comments

Comments
 (0)