|
85 | 85 | # Full scan upload retry policy. Production gateways occasionally drop an upload mid-request |
86 | 86 | # (a backend pod stalls and stops reading the body; the client then sees a 502/408 or a reset |
87 | 87 | # connection). Those episodes are transient and pod-local: a retried upload routed to another |
88 | | -# backend almost always succeeds, and because the server never finished reading the request |
89 | | -# body, no scan was created, so retrying cannot duplicate a scan. |
| 88 | +# backend almost always succeeds. In this failure mode the server never finished reading the |
| 89 | +# request body, so no scan was created and a retry does not duplicate one. (A duplicate is |
| 90 | +# possible only if a gateway timeout races a request the server later completes; that is |
| 91 | +# benign - the retried scan supersedes the orphaned one, same as running the CLI twice.) |
90 | 92 | FULL_SCAN_UPLOAD_MAX_ATTEMPTS = 3 |
91 | 93 | # Wait before retry attempt 2 and attempt 3 respectively (plus a little jitter so a fleet of |
92 | 94 | # CI jobs hitting the same episode doesn't retry in lock-step). |
|
104 | 106 | def _is_transient_full_scan_upload_error(error: Exception) -> bool: |
105 | 107 | """Whether a full-scan upload failure is transient and safe to retry. |
106 | 108 |
|
107 | | - Transient means the failure happened at the gateway/connection level before the server |
108 | | - finished reading the request body (so no scan was created server-side): HTTP 502/503/504/408, |
109 | | - client-side timeouts, and dropped/reset connections. 4xx client errors (400/401/403/404/429) |
110 | | - and success responses carrying an error payload are never retried. |
| 109 | + Transient means the failure happened at the gateway/connection level, normally before the |
| 110 | + server finished reading the request body (so no scan was created server-side): HTTP |
| 111 | + 502/503/504/408, client-side timeouts, and dropped/reset connections. 4xx client errors |
| 112 | + (400/401/403/404/429) and success responses carrying an error payload are never retried. |
111 | 113 | """ |
112 | 114 | if isinstance(error, (APIBadGateway, APIConnectionError, APITimeout)): |
113 | 115 | # 502 / connection reset-dropped / request timeout - the SDK raises dedicated classes. |
@@ -835,11 +837,12 @@ def create_full_scan(self, files: List[str], params: FullScanParams, base_paths: |
835 | 837 | try: |
836 | 838 | # Retry transient gateway/timeout failures (502/503/504/408, dropped connections, |
837 | 839 | # timeouts) with increasing waits; a stalled backend pod recovers or gets routed |
838 | | - # around within minutes, and since it never finished reading the request body no |
839 | | - # scan was created, so a retry cannot duplicate one. fullscans.post() rebuilds its |
840 | | - # lazy file loaders from the plain paths in upload_files on every call, so simply |
841 | | - # calling it again per attempt is safe. The loop must stay inside this try so the |
842 | | - # temp .br files (cleaned up in the finally below) outlive every attempt. |
| 840 | + # around within minutes, and in this failure mode the server never finished reading |
| 841 | + # the request body, so no scan was created and a retry does not duplicate one (see |
| 842 | + # the retry-policy comment above FULL_SCAN_UPLOAD_MAX_ATTEMPTS). fullscans.post() |
| 843 | + # rebuilds its lazy file loaders from the plain paths in upload_files on every call, |
| 844 | + # so simply calling it again per attempt is safe. The loop must stay inside this try |
| 845 | + # so the temp .br files (cleaned up in the finally below) outlive every attempt. |
843 | 846 | for attempt in range(1, FULL_SCAN_UPLOAD_MAX_ATTEMPTS + 1): |
844 | 847 | try: |
845 | 848 | res = self.sdk.fullscans.post(upload_files, params, use_types=True, use_lazy_loading=True, max_open_files=50, base_paths=base_paths) |
|
0 commit comments