|
1 | 1 | use chrono::{Local, Utc}; |
2 | | -use rand::Rng; |
3 | 2 | use std::collections::HashMap; |
4 | 3 | use std::fs::{self, File, OpenOptions}; |
5 | 4 | use std::io::Write; |
6 | 5 | use std::path::{Path, PathBuf}; |
7 | 6 | use std::time::{Duration, Instant}; |
8 | 7 | use tokio::process::Command; |
9 | | -use tokio::time::sleep; |
10 | 8 | use tracing::{debug, error, info, warn}; |
11 | 9 |
|
12 | | -use crate::config::{Job, RetryConfig, RunnerConfig, TimezoneConfig}; |
| 10 | +use crate::config::{Job, RunnerConfig, TimezoneConfig}; |
13 | 11 | use crate::env; |
14 | 12 | use crate::git; |
15 | 13 | use crate::webhook::{self, BuildFailure, JobFailure}; |
16 | 14 |
|
17 | | -/// Default jitter ratio when not explicitly configured (25% of base delay) |
18 | | -const AUTO_JITTER_RATIO: u32 = 25; |
19 | | - |
20 | 15 | /// Grace period to wait after SIGTERM before sending SIGKILL |
21 | 16 | const GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(10); |
22 | 17 |
|
@@ -329,79 +324,47 @@ pub async fn execute_job(job: &Job, sot_path: &Path, runner: &RunnerConfig) -> b |
329 | 324 | .as_ref() |
330 | 325 | .and_then(|p| create_log_file(&job_dir, p, job.log_max_size)); |
331 | 326 |
|
332 | | - let max_attempts = job.retry.as_ref().map(|r| r.max + 1).unwrap_or(1); |
333 | | - let mut last_result: Option<CommandResult> = None; |
334 | | - |
335 | | - for attempt in 0..max_attempts { |
336 | | - if attempt > 0 { |
337 | | - if let Some(retry) = job.retry.as_ref() { |
338 | | - let delay = calculate_backoff(retry, attempt - 1); |
339 | | - info!( |
340 | | - target: "rollcron::job", |
341 | | - job_id = %job.id, |
342 | | - attempt, |
343 | | - max_retries = max_attempts - 1, |
344 | | - delay = ?delay, |
345 | | - "Retrying" |
346 | | - ); |
347 | | - sleep(delay).await; |
348 | | - } |
349 | | - } |
350 | | - |
351 | | - info!( |
352 | | - target: "rollcron::job", |
353 | | - job_id = %job.id, |
354 | | - name = %job.name, |
355 | | - command = %job.command, |
356 | | - "Starting job" |
357 | | - ); |
358 | | - |
359 | | - if let Some(ref mut file) = log_file { |
360 | | - let marker = if attempt > 0 { |
361 | | - format!("Job started (retry {}/{})", attempt, max_attempts - 1) |
362 | | - } else { |
363 | | - "Job started".to_string() |
364 | | - }; |
365 | | - write_log_marker(file, &runner.timezone, job.timezone.as_ref(), &marker); |
366 | | - } |
367 | | - |
368 | | - let start_time = Instant::now(); |
369 | | - let result = run_command(job, &work_dir, sot_path, runner).await; |
370 | | - let duration = start_time.elapsed(); |
371 | | - let success = handle_result(job, &result, log_file.as_mut(), &runner.timezone, duration); |
| 327 | + info!( |
| 328 | + target: "rollcron::job", |
| 329 | + job_id = %job.id, |
| 330 | + name = %job.name, |
| 331 | + command = %job.command, |
| 332 | + "Starting job" |
| 333 | + ); |
372 | 334 |
|
373 | | - if success { |
374 | | - return true; |
375 | | - } |
| 335 | + if let Some(ref mut file) = log_file { |
| 336 | + write_log_marker(file, &runner.timezone, job.timezone.as_ref(), "Job started"); |
| 337 | + } |
376 | 338 |
|
377 | | - last_result = Some(result); |
| 339 | + let start_time = Instant::now(); |
| 340 | + let result = run_command(job, &work_dir, sot_path, runner).await; |
| 341 | + let duration = start_time.elapsed(); |
| 342 | + let success = handle_result(job, &result, log_file.as_mut(), &runner.timezone, duration); |
378 | 343 |
|
379 | | - if attempt + 1 < max_attempts { |
380 | | - debug!(target: "rollcron::job", job_id = %job.id, "Will retry..."); |
381 | | - } |
| 344 | + if success { |
| 345 | + return true; |
382 | 346 | } |
383 | 347 |
|
384 | | - // All retries exhausted - send webhook notifications if configured |
| 348 | + // Job failed - send webhook notifications if configured |
385 | 349 | if !job.webhook.is_empty() { |
386 | | - let (error, stderr) = match &last_result { |
387 | | - Some(CommandResult::Completed(output)) => { |
| 350 | + let (error, stderr) = match &result { |
| 351 | + CommandResult::Completed(output) => { |
388 | 352 | let err = format!("exit code {:?}", output.status.code()); |
389 | 353 | let stderr = String::from_utf8_lossy(&output.stderr).to_string(); |
390 | 354 | (err, stderr) |
391 | 355 | } |
392 | | - Some(CommandResult::ExecError(e)) => (format!("exec error: {}", e), String::new()), |
393 | | - Some(CommandResult::Timeout) => { |
| 356 | + CommandResult::ExecError(e) => (format!("exec error: {}", e), String::new()), |
| 357 | + CommandResult::Timeout => { |
394 | 358 | (format!("timeout after {:?}", job.timeout), String::new()) |
395 | 359 | } |
396 | | - None => ("unknown error".to_string(), String::new()), |
397 | 360 | }; |
398 | 361 |
|
399 | 362 | let failure = JobFailure { |
400 | 363 | job_id: &job.id, |
401 | 364 | job_name: &job.name, |
402 | 365 | error, |
403 | 366 | stderr, |
404 | | - attempts: max_attempts, |
| 367 | + attempts: 1, |
405 | 368 | }; |
406 | 369 |
|
407 | 370 | let runner_env = load_runner_env_vars(sot_path, runner); |
@@ -714,27 +677,6 @@ fn handle_result(job: &Job, result: &CommandResult, log_file: Option<&mut File>, |
714 | 677 | } |
715 | 678 | } |
716 | 679 |
|
717 | | -// === Backoff === |
718 | | - |
719 | | -fn calculate_backoff(retry: &RetryConfig, attempt: u32) -> Duration { |
720 | | - let base_delay = retry.delay.saturating_mul(2u32.saturating_pow(attempt)); |
721 | | - let jitter_max = |
722 | | - retry.jitter.unwrap_or_else(|| retry.delay.saturating_mul(AUTO_JITTER_RATIO) / 100); |
723 | | - base_delay.saturating_add(generate_jitter(jitter_max)) |
724 | | -} |
725 | | - |
726 | | -fn generate_jitter(max: Duration) -> Duration { |
727 | | - if max.is_zero() { |
728 | | - return Duration::ZERO; |
729 | | - } |
730 | | - let millis = max.as_millis(); |
731 | | - if millis == 0 { |
732 | | - return Duration::ZERO; |
733 | | - } |
734 | | - let jitter_millis = rand::thread_rng().gen_range(0..=millis); |
735 | | - Duration::from_millis(jitter_millis as u64) |
736 | | -} |
737 | | - |
738 | 680 | // === Logging === |
739 | 681 |
|
740 | 682 | fn rotate_log_file(path: &Path, max_size: u64) { |
@@ -819,7 +761,6 @@ mod tests { |
819 | 761 | command: cmd.to_string(), |
820 | 762 | timeout: Duration::from_secs(timeout_secs), |
821 | 763 | concurrency: Concurrency::Skip, |
822 | | - retry: None, |
823 | 764 | working_dir: None, |
824 | 765 | enabled: true, |
825 | 766 | timezone: None, |
@@ -858,37 +799,6 @@ mod tests { |
858 | 799 | execute_job(&job, &dir.path().to_path_buf(), &runner).await; |
859 | 800 | } |
860 | 801 |
|
861 | | - #[test] |
862 | | - fn exponential_backoff_calculation() { |
863 | | - let retry = RetryConfig { |
864 | | - max: 5, |
865 | | - delay: Duration::from_secs(1), |
866 | | - jitter: None, |
867 | | - }; |
868 | | - let backoff_0 = calculate_backoff(&retry, 0); |
869 | | - assert!(backoff_0 >= Duration::from_secs(1)); |
870 | | - assert!(backoff_0 <= Duration::from_millis(1250)); |
871 | | - |
872 | | - let backoff_1 = calculate_backoff(&retry, 1); |
873 | | - assert!(backoff_1 >= Duration::from_secs(2)); |
874 | | - assert!(backoff_1 <= Duration::from_millis(2250)); |
875 | | - } |
876 | | - |
877 | | - #[test] |
878 | | - fn generate_jitter_bounds() { |
879 | | - let max = Duration::from_millis(100); |
880 | | - for _ in 0..10 { |
881 | | - let jitter = generate_jitter(max); |
882 | | - assert!(jitter <= max); |
883 | | - } |
884 | | - } |
885 | | - |
886 | | - #[test] |
887 | | - fn generate_jitter_zero() { |
888 | | - let jitter = generate_jitter(Duration::ZERO); |
889 | | - assert_eq!(jitter, Duration::ZERO); |
890 | | - } |
891 | | - |
892 | 802 | #[test] |
893 | 803 | fn format_duration_milliseconds() { |
894 | 804 | assert_eq!(format_duration(Duration::from_millis(0)), "0ms"); |
|
0 commit comments