@@ -586,21 +586,22 @@ impl TunnelService {
586586 } ,
587587 status : None ,
588588 } ;
589- proxy = proxies
590- . create ( & PostParams :: default ( ) , & proxy)
591- . await
592- . map_err ( |err| {
593- warn ! (
594- %project_id,
595- connector = %connector_name,
596- endpoint = %endpoint,
597- "HTTPProxy create failed: {err:#}"
598- ) ;
599- format_quota_error ( & err, "HTTPProxy" ) . unwrap_or_else ( || {
600- format ! ( "Failed to create HTTPProxy: {err}" )
601- } )
602- } )
603- . map_err ( |err| n0_error:: anyerr!( err) ) ?;
589+ let post_params = PostParams :: default ( ) ;
590+ proxy = with_quota_check_retry ( "HTTPProxy create" , || {
591+ proxies. create ( & post_params, & proxy)
592+ } )
593+ . await
594+ . map_err ( |err| {
595+ warn ! (
596+ %project_id,
597+ connector = %connector_name,
598+ endpoint = %endpoint,
599+ "HTTPProxy create failed: {err:#}"
600+ ) ;
601+ format_quota_error ( & err, "HTTPProxy" )
602+ . unwrap_or_else ( || format ! ( "Failed to create HTTPProxy: {err}" ) )
603+ } )
604+ . map_err ( |err| n0_error:: anyerr!( err) ) ?;
604605 let proxy_name = proxy. name_any ( ) ;
605606 debug ! (
606607 %project_id,
@@ -624,20 +625,22 @@ impl TunnelService {
624625 spec : ad_spec,
625626 status : None ,
626627 } ;
627- ads. create ( & PostParams :: default ( ) , & ad)
628- . await
629- . map_err ( |err| {
630- warn ! (
631- %project_id,
632- proxy = %proxy_name,
633- connector = %connector_name,
634- "ConnectorAdvertisement create failed: {err:#}"
635- ) ;
636- format_quota_error ( & err, "ConnectorAdvertisement" ) . unwrap_or_else ( || {
637- format ! ( "Failed to create ConnectorAdvertisement: {err}" )
638- } )
639- } )
640- . map_err ( |err| n0_error:: anyerr!( err) ) ?;
628+ let ad_post = PostParams :: default ( ) ;
629+ with_quota_check_retry ( "ConnectorAdvertisement create" , || {
630+ ads. create ( & ad_post, & ad)
631+ } )
632+ . await
633+ . map_err ( |err| {
634+ warn ! (
635+ %project_id,
636+ proxy = %proxy_name,
637+ connector = %connector_name,
638+ "ConnectorAdvertisement create failed: {err:#}"
639+ ) ;
640+ format_quota_error ( & err, "ConnectorAdvertisement" )
641+ . unwrap_or_else ( || format ! ( "Failed to create ConnectorAdvertisement: {err}" ) )
642+ } )
643+ . map_err ( |err| n0_error:: anyerr!( err) ) ?;
641644 debug ! (
642645 %project_id,
643646 proxy = %proxy_name,
@@ -681,7 +684,10 @@ impl TunnelService {
681684 } ,
682685 status : None ,
683686 } ;
684- tpps. create ( & PostParams :: default ( ) , & tpp)
687+ let tpp_post = PostParams :: default ( ) ;
688+ with_quota_check_retry ( "TrafficProtectionPolicy create" , || {
689+ tpps. create ( & tpp_post, & tpp)
690+ } )
685691 . await
686692 . map_err ( |err| {
687693 warn ! (
@@ -883,9 +889,12 @@ impl TunnelService {
883889 spec : ad_spec,
884890 status : None ,
885891 } ;
886- ads. create ( & PostParams :: default ( ) , & ad)
887- . await
888- . std_context ( "Failed to create ConnectorAdvertisement" ) ?;
892+ let ad_post = PostParams :: default ( ) ;
893+ with_quota_check_retry ( "ConnectorAdvertisement create" , || {
894+ ads. create ( & ad_post, & ad)
895+ } )
896+ . await
897+ . std_context ( "Failed to create ConnectorAdvertisement" ) ?;
889898 }
890899 }
891900 } else if ads
@@ -1157,10 +1166,12 @@ impl TunnelService {
11571166 } ,
11581167 status : None ,
11591168 } ;
1160- connector = connectors
1161- . create ( & PostParams :: default ( ) , & connector)
1162- . await
1163- . std_context ( "Failed to create Connector" ) ?;
1169+ let conn_post = PostParams :: default ( ) ;
1170+ connector = with_quota_check_retry ( "Connector create" , || {
1171+ connectors. create ( & conn_post, & connector)
1172+ } )
1173+ . await
1174+ . std_context ( "Failed to create Connector" ) ?;
11641175
11651176 if let Some ( details) = build_connection_details ( & self . listen ) {
11661177 let details_value = serde_json:: to_value ( details)
@@ -1477,6 +1488,13 @@ async fn patch_device_annotations(api: &Api<Connector>, connector: &mut Connecto
14771488
14781489fn format_quota_error ( err : & dyn std:: error:: Error , resource_type : & str ) -> Option < String > {
14791490 let err_msg = err. to_string ( ) ;
1491+ // Transient quota-check timeout — the error literally says "Please try
1492+ // again in a moment". Don't relabel it as "exceeded"; with the retry
1493+ // wrapper applied at creation sites we'll usually never get here, and
1494+ // when we do the original message is the most accurate signal.
1495+ if err_msg. contains ( "took too long to be checked against your quota" ) {
1496+ return None ;
1497+ }
14801498 if err_msg. contains ( "quota" ) || err_msg. contains ( "Insufficient quota" ) {
14811499 return Some ( format ! (
14821500 "Quota limit exceeded for {resource_type} resources.\n \n \
@@ -1490,6 +1508,57 @@ fn format_quota_error(err: &dyn std::error::Error, resource_type: &str) -> Optio
14901508 None
14911509}
14921510
1511+ /// True if `err` is the operator's transient quota-check timeout (a 403
1512+ /// whose message says "Please try again in a moment"). Distinct from
1513+ /// real quota exhaustion, which produces a different message and
1514+ /// shouldn't be retried.
1515+ fn is_quota_check_timeout ( err : & kube:: Error ) -> bool {
1516+ matches ! (
1517+ err,
1518+ kube:: Error :: Api ( e)
1519+ if e. code == 403
1520+ && e. message. contains( "took too long to be checked against your quota" )
1521+ )
1522+ }
1523+
1524+ /// Retry a kube API call up to ~15 seconds while it keeps tripping the
1525+ /// operator's quota-check timeout. Other errors return immediately so
1526+ /// real failures still surface fast. Prints a one-line stderr notice on
1527+ /// the first retry so the user knows we're waiting on the server.
1528+ async fn with_quota_check_retry < T , F , Fut > ( op_name : & str , mut f : F ) -> kube:: Result < T >
1529+ where
1530+ F : FnMut ( ) -> Fut ,
1531+ Fut : std:: future:: Future < Output = kube:: Result < T > > ,
1532+ {
1533+ let delays = [
1534+ std:: time:: Duration :: from_secs ( 1 ) ,
1535+ std:: time:: Duration :: from_secs ( 2 ) ,
1536+ std:: time:: Duration :: from_secs ( 4 ) ,
1537+ std:: time:: Duration :: from_secs ( 8 ) ,
1538+ ] ;
1539+ for ( i, delay) in delays. iter ( ) . enumerate ( ) {
1540+ match f ( ) . await {
1541+ Ok ( v) => return Ok ( v) ,
1542+ Err ( err) if is_quota_check_timeout ( & err) => {
1543+ if i == 0 {
1544+ eprintln ! (
1545+ " … quota check timed out for {op_name}; retrying for up to 15s"
1546+ ) ;
1547+ }
1548+ warn ! (
1549+ op = op_name,
1550+ attempt = i + 1 ,
1551+ next_delay_s = delay. as_secs( ) ,
1552+ "quota check timed out; retrying"
1553+ ) ;
1554+ tokio:: time:: sleep ( * delay) . await ;
1555+ }
1556+ Err ( err) => return Err ( err) ,
1557+ }
1558+ }
1559+ f ( ) . await
1560+ }
1561+
14931562fn publish_tickets_enabled ( ) -> bool {
14941563 std:: env:: var ( "DATUM_CONNECT_PUBLISH_TICKETS" )
14951564 . map ( |value| matches ! ( value. as_str( ) , "1" | "true" | "TRUE" | "yes" | "YES" ) )
@@ -1678,6 +1747,46 @@ mod tests {
16781747 ) ;
16791748 }
16801749
1750+ fn api_error ( code : u16 , message : & str ) -> kube:: Error {
1751+ kube:: Error :: Api ( kube:: core:: ErrorResponse {
1752+ status : "Failure" . into ( ) ,
1753+ message : message. into ( ) ,
1754+ reason : if code == 403 { "Forbidden" . into ( ) } else { "Unknown" . into ( ) } ,
1755+ code,
1756+ } )
1757+ }
1758+
1759+ #[ test]
1760+ fn quota_check_timeout_classifier_matches_transient_403 ( ) {
1761+ // The exact phrase the operator emits when the quota check itself
1762+ // times out — distinct from real quota exhaustion. The error message
1763+ // literally says "Please try again in a moment".
1764+ let err = api_error (
1765+ 403 ,
1766+ "connectoradvertisements.networking.datumapis.com \" tunnel-x\" is forbidden: \
1767+ Your request took too long to be checked against your quota. Please try again \
1768+ in a moment — if this keeps happening, contact support.",
1769+ ) ;
1770+ assert ! ( is_quota_check_timeout( & err) ) ;
1771+
1772+ // Real exhaustion shouldn't trigger retry.
1773+ let exhausted = api_error ( 403 , "Insufficient quota for ConnectorAdvertisement" ) ;
1774+ assert ! ( !is_quota_check_timeout( & exhausted) ) ;
1775+
1776+ // 401 with similar text shouldn't match — different failure class.
1777+ let unauthorized = api_error ( 401 , "took too long to be checked against your quota" ) ;
1778+ assert ! ( !is_quota_check_timeout( & unauthorized) ) ;
1779+
1780+ // format_quota_error should NOT mangle the timeout message into a
1781+ // misleading "Quota limit exceeded" string.
1782+ assert ! (
1783+ format_quota_error( & err, "ConnectorAdvertisement" ) . is_none( ) ,
1784+ "transient timeout must propagate verbatim, not become 'exceeded'"
1785+ ) ;
1786+ // It SHOULD format real exhaustion.
1787+ assert ! ( format_quota_error( & exhausted, "ConnectorAdvertisement" ) . is_some( ) ) ;
1788+ }
1789+
16811790 #[ test]
16821791 fn progress_pending_when_status_is_stale_for_current_generation ( ) {
16831792 // `tunnel listen --id` PATCHes the HTTPProxy spec to re-point the
0 commit comments