|
27 | 27 | import java.util.List; |
28 | 28 | import java.util.Map; |
29 | 29 | import java.util.Properties; |
30 | | -import java.util.function.Supplier; |
31 | 30 | import java.util.regex.Pattern; |
32 | 31 | import lombok.Getter; |
33 | 32 | import lombok.Setter; |
@@ -196,38 +195,36 @@ private Response getHostIpInternal() { |
196 | 195 |
|
197 | 196 | /** |
198 | 197 | * Check the pipeline service status with an exception backoff to make sure we don't raise any |
199 | | - * false positives. |
| 198 | + * false positives. Delegates to {@link #getServiceStatus()}, which already retries transient |
| 199 | + * failures. |
200 | 200 | */ |
201 | 201 | public String getServiceStatusBackoff() { |
| 202 | + PipelineServiceClientResponse status = getServiceStatus(); |
| 203 | + return status.getCode() != 200 ? UNHEALTHY_STATUS : HEALTHY_STATUS; |
| 204 | + } |
| 205 | + |
| 206 | + /** |
| 207 | + * Check the status of pipeline service to ensure it is healthy. Retries up to {@link |
| 208 | + * #MAX_ATTEMPTS} times with {@link #BACKOFF_TIME_SECONDS}-second backoff so that a single |
| 209 | + * transient failure (e.g. Airflow restart, network blip) does not permanently mark the agent as |
| 210 | + * unavailable. |
| 211 | + */ |
| 212 | + public PipelineServiceClientResponse getServiceStatus() { |
| 213 | + if (!pipelineServiceClientEnabled) { |
| 214 | + return buildHealthyStatus(DISABLED_STATUS).withPlatform(DISABLED_STATUS); |
| 215 | + } |
| 216 | + |
202 | 217 | RetryConfig retryConfig = |
203 | | - RetryConfig.<String>custom() |
| 218 | + RetryConfig.<PipelineServiceClientResponse>custom() |
204 | 219 | .maxAttempts(MAX_ATTEMPTS) |
205 | 220 | .waitDuration(Duration.ofMillis(BACKOFF_TIME_SECONDS * 1_000L)) |
206 | | - .retryOnResult(response -> !HEALTHY_STATUS.equals(response)) |
| 221 | + .retryOnResult(response -> response.getCode() != 200) |
207 | 222 | .failAfterMaxAttempts(false) |
208 | 223 | .build(); |
209 | 224 |
|
210 | 225 | Retry retry = Retry.of("getServiceStatus", retryConfig); |
211 | 226 |
|
212 | | - Supplier<String> responseSupplier = |
213 | | - () -> { |
214 | | - try { |
215 | | - PipelineServiceClientResponse status = getServiceStatus(); |
216 | | - return status.getCode() != 200 ? UNHEALTHY_STATUS : HEALTHY_STATUS; |
217 | | - } catch (Exception e) { |
218 | | - throw new RuntimeException(e); |
219 | | - } |
220 | | - }; |
221 | | - |
222 | | - return retry.executeSupplier(responseSupplier); |
223 | | - } |
224 | | - |
225 | | - /* Check the status of pipeline service to ensure it is healthy */ |
226 | | - public PipelineServiceClientResponse getServiceStatus() { |
227 | | - if (pipelineServiceClientEnabled) { |
228 | | - return getServiceStatusInternal(); |
229 | | - } |
230 | | - return buildHealthyStatus(DISABLED_STATUS).withPlatform(DISABLED_STATUS); |
| 227 | + return retry.executeSupplier(this::getServiceStatusInternal); |
231 | 228 | } |
232 | 229 |
|
233 | 230 | public List<PipelineStatus> getQueuedPipelineStatus(IngestionPipeline ingestionPipeline) { |
|
0 commit comments