Skip to content

Commit af83fa5

Browse files
authored
feat(gax): retry on Unknown and 503 (#5023)
1 parent 7a6dd6f commit af83fa5

2 files changed

Lines changed: 35 additions & 13 deletions

File tree

src/gax/src/retry_policy.rs

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,9 @@ pub struct Aip194Strict;
257257

258258
impl RetryPolicy for Aip194Strict {
259259
fn on_error(&self, state: &RetryState, error: Error) -> RetryResult {
260+
use crate::error::rpc::Code;
261+
use http::StatusCode;
262+
260263
if error.is_transient_and_before_rpc() {
261264
return RetryResult::Continue(error);
262265
}
@@ -266,20 +269,19 @@ impl RetryPolicy for Aip194Strict {
266269
if error.is_io() {
267270
return RetryResult::Continue(error);
268271
}
269-
if let Some(status) = error.status() {
270-
return if status.code == crate::error::rpc::Code::Unavailable {
271-
RetryResult::Continue(error)
272-
} else {
273-
RetryResult::Permanent(error)
274-
};
272+
if error.status().is_some_and(|s| s.code == Code::Unavailable) {
273+
return RetryResult::Continue(error);
275274
}
276-
277-
match error.http_status_code() {
278-
Some(code) if code == http::StatusCode::SERVICE_UNAVAILABLE.as_u16() => {
279-
RetryResult::Continue(error)
280-
}
281-
_ => RetryResult::Permanent(error),
275+
// Some services return a status of "Unknown" and a http status code of 503
276+
// (SERVICE_UNAVAILABLE). That is not how gRPC status codes are supposed to work, but the
277+
// intent is clear: we need to retry.
278+
if error
279+
.http_status_code()
280+
.is_some_and(|code| code == StatusCode::SERVICE_UNAVAILABLE.as_u16())
281+
{
282+
return RetryResult::Continue(error);
282283
}
284+
RetryResult::Permanent(error)
283285
}
284286
}
285287

@@ -617,6 +619,19 @@ pub mod tests {
617619
ThrottleResult::Continue(_)
618620
));
619621

622+
assert!(
623+
p.on_error(&idempotent_state(now), unknown_and_503())
624+
.is_continue()
625+
);
626+
assert!(
627+
p.on_error(&non_idempotent_state(now), unknown_and_503())
628+
.is_permanent()
629+
);
630+
assert!(matches!(
631+
p.on_throttle(&idempotent_state(now), unknown_and_503()),
632+
ThrottleResult::Continue(_)
633+
));
634+
620635
assert!(
621636
p.on_error(&idempotent_state(now), permission_denied())
622637
.is_permanent()
@@ -825,6 +840,14 @@ pub mod tests {
825840
Error::service(status)
826841
}
827842

843+
fn unknown_and_503() -> Error {
844+
use crate::error::rpc::Code;
845+
let status = crate::error::rpc::Status::default()
846+
.set_code(Code::Unknown)
847+
.set_message("UNAVAILABLE");
848+
Error::service_full(status, Some(503), None, Some("source error".into()))
849+
}
850+
828851
fn permission_denied() -> Error {
829852
use crate::error::rpc::Code;
830853
let status = crate::error::rpc::Status::default()

tests/discovery/tests/driver.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ mod compute {
1717
use google_cloud_test_utils::errors::anydump;
1818
use google_cloud_test_utils::tracing::enable_tracing;
1919

20-
#[ignore = "TODO(#5017) - disabled because it was flaky"]
2120
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2221
async fn run_compute_zones() -> anyhow::Result<()> {
2322
let _guard = enable_tracing();

0 commit comments

Comments
 (0)