diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index fec4748..d29b3c1 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -93,4 +93,27 @@ jobs: # --no-fail-fast runs every scenario binary even after one fails, so a # red run surfaces all failing scenarios at once rather than stopping at # the first. - run: cargo test --all-features --test '*' --no-fail-fast -- --nocapture + # + # These tests hit the live production API, so a transient connectivity + # blip (the suite has seen TCP connect timeouts to the API host) would + # otherwise fail the whole run and need a manual re-run. Retry the suite + # a few times with backoff so a brief outage self-heals; the bounded + # connect timeout in tests/common/mod.rs keeps a down-API attempt to + # seconds rather than minutes. A genuine failure still goes red after + # exhausting the attempts. + run: | + for attempt in 1 2 3; do + echo "::group::Integration test attempt ${attempt}/3" + if cargo test --all-features --test '*' --no-fail-fast -- --nocapture; then + echo "::endgroup::" + exit 0 + fi + echo "::endgroup::" + if [ "${attempt}" -lt 3 ]; then + backoff=$((attempt * 30)) + echo "Attempt ${attempt} failed; retrying in ${backoff}s..." + sleep "${backoff}" + fi + done + echo "::error::Integration tests failed after 3 attempts" + exit 1 diff --git a/Cargo.toml b/Cargo.toml index 3b12943..b1301c2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,11 @@ tokio = { version = "^1.46.0", features = ["rt-multi-thread", "macros", "time"] futures = "^0.3" wiremock = "0.6" uuid = { version = "1", features = ["v4"] } +# Lets the integration harness build a reqwest client with a bounded connect +# timeout (see tests/common/mod.rs). default-features = false mirrors the lib; +# the TLS backend comes from the crate's own `native-tls`/`rustls` features via +# Cargo feature unification. +reqwest = { version = "^0.13", default-features = false } [package.metadata.docs.rs] all-features = true diff --git a/tests/auth_missing_token_401.rs b/tests/auth_missing_token_401.rs index f903ecd..16d7e5e 100644 --- a/tests/auth_missing_token_401.rs +++ b/tests/auth_missing_token_401.rs @@ -20,9 +20,12 @@ async fn auth_missing_token_401() { let _client = skip_if_no_creds!(); let env = common::load_env(); - // No bearer token, no workspace header — just the API host. + // No bearer token, no workspace header — just the API host. Share the + // harness client so a down API host fails fast instead of stalling on the + // OS-level connect timeout (see common::test_http_client). let mut config = Configuration::new(); config.base_path = env.api_url.trim_end_matches('/').to_string(); + config.client = common::test_http_client(); let result = workspaces_api::list_workspaces(&config, None).await; match result { diff --git a/tests/auth_unknown_workspace.rs b/tests/auth_unknown_workspace.rs index b4f78fb..aaa5999 100644 --- a/tests/auth_unknown_workspace.rs +++ b/tests/auth_unknown_workspace.rs @@ -27,6 +27,7 @@ async fn auth_unknown_workspace() { .api_token(api_key) .workspace_id(fake_workspace.clone()) .base_url(env.api_url) + .reqwest_client(common::test_http_client()) .build() .expect("Client::build should succeed"); diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 1a27e9c..655ae21 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -11,8 +11,36 @@ #![allow(dead_code)] +use std::time::Duration; + use hotdata::Client; +/// Connect-phase ceiling for the shared test client. +/// +/// `reqwest::Client::default()` (what the SDK uses when no client is supplied) +/// has no connect timeout, so an unreachable API host blocks each call on the +/// OS-level TCP timeout (~60s observed in CI). With ~20 scenario binaries run +/// sequentially by `cargo test`, a transient connectivity blip turns into a +/// ~20-minute red run. Bounding the connect phase fails fast — and lets hyper +/// fall through to the next resolved address — so an outage is cheap to retry. +const CONNECT_TIMEOUT: Duration = Duration::from_secs(10); + +/// Overall per-request ceiling. Generous enough for the tiny fixture upload and +/// each poll request; purely a backstop against a hung socket. +const REQUEST_TIMEOUT: Duration = Duration::from_secs(60); + +/// Build the reqwest client every scenario shares: identical to the SDK default +/// except for the bounded [`CONNECT_TIMEOUT`]/[`REQUEST_TIMEOUT`]. Pass it via +/// `ClientBuilder::reqwest_client` (or assign to `Configuration::client`) so a +/// down API host can't stall the suite. +pub fn test_http_client() -> reqwest::Client { + reqwest::Client::builder() + .connect_timeout(CONNECT_TIMEOUT) + .timeout(REQUEST_TIMEOUT) + .build() + .expect("building the test reqwest client should not fail") +} + /// Default API host. The auth-token -> JWT exchange and every endpoint live on /// the API host, so the ergonomic `Client` always points here unless overridden /// by `HOTDATA_SDK_TEST_API_URL`. @@ -67,6 +95,7 @@ pub fn client_or_skip() -> Option { .api_token(env.api_key.expect("checked above")) .workspace_id(env.workspace_id.expect("checked above")) .base_url(env.api_url) + .reqwest_client(test_http_client()) .build() .expect("Client::build with valid credentials should not fail"); Some(client)