-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
fix: Core SDK architectural gaps - retry jitter, timeout enforcement, exception handling #1554
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fd4296b
3002112
9f170d6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -940,6 +940,7 @@ def workflow(self): | |
| stacklevel=3 | ||
| ) | ||
| current_iter = 0 # Track how many times we've looped | ||
| workflow_start = time.monotonic() # For timeout enforcement | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Timeout does not cover pre-loop CSV/file expansion.
Consider adding an early timeout check immediately before entering the main loop, or moving Also applies to: 1072-1078 🤖 Prompt for AI Agents |
||
| # Build workflow relationships first | ||
| for task in self.tasks.values(): | ||
| if task.next_tasks: | ||
|
|
@@ -1068,6 +1069,14 @@ def workflow(self): | |
| logging.info(f"Max iteration limit {self.max_iter} reached, ending workflow.") | ||
| break | ||
|
|
||
| # Enforce workflow timeout if set | ||
| if self.workflow_timeout is not None: | ||
| elapsed = time.monotonic() - workflow_start | ||
| if elapsed > self.workflow_timeout: | ||
| logging.warning(f"Workflow timeout ({self.workflow_timeout}s) exceeded after {elapsed:.1f}s, ending workflow.") | ||
| self.workflow_cancelled = True | ||
| break | ||
|
|
||
| # ADDED: Check workflow finished flag at the start of each cycle | ||
| if self.workflow_finished: | ||
| logging.info("Workflow finished early as all tasks are completed.") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| """ | ||
| Test for retry jitter in error classifier (Issue #1553 Gap 2) | ||
| """ | ||
| import pytest | ||
| from praisonaiagents.llm.error_classifier import ErrorCategory, get_retry_delay | ||
|
|
||
|
|
||
| def test_rate_limit_jitter(): | ||
| """Test that RATE_LIMIT errors use jitter with minimum floor""" | ||
| delays = [] | ||
| for _ in range(20): | ||
| delay = get_retry_delay(ErrorCategory.RATE_LIMIT, attempt=1) | ||
| delays.append(delay) | ||
|
|
||
| # All delays should be in valid range [base_delay=1.0, max_delay=3.0] | ||
| assert all(1.0 <= delay <= 3.0 for delay in delays), f"Some delays out of range: {delays}" | ||
|
|
||
| # Should have some variation (jitter working) | ||
| unique_delays = len(set(delays)) | ||
| assert unique_delays >= 5, f"Not enough variation in delays (got {unique_delays} unique out of 20)" | ||
|
|
||
| # Should have minimum floor (no zero delays) | ||
| assert all(delay >= 1.0 for delay in delays), f"Some delays below minimum: {min(delays)}" | ||
|
|
||
|
|
||
| def test_transient_jitter(): | ||
| """Test that TRANSIENT errors use jitter with minimum floor""" | ||
| delays = [] | ||
| for _ in range(20): | ||
| delay = get_retry_delay(ErrorCategory.TRANSIENT, attempt=1) | ||
| delays.append(delay) | ||
|
|
||
| # All delays should be in valid range [base_delay=1.0, max_delay=2.0] | ||
| assert all(1.0 <= delay <= 2.0 for delay in delays), f"Some delays out of range: {delays}" | ||
|
|
||
| # Should have some variation | ||
| unique_delays = len(set(delays)) | ||
| assert unique_delays >= 5, f"Not enough variation in delays (got {unique_delays} unique out of 20)" | ||
|
|
||
| # Should have minimum floor | ||
| assert all(delay >= 1.0 for delay in delays), f"Some delays below minimum: {min(delays)}" | ||
|
|
||
|
|
||
| def test_context_limit_deterministic(): | ||
| """Test that CONTEXT_LIMIT delays remain deterministic (no jitter needed)""" | ||
| delay1 = get_retry_delay(ErrorCategory.CONTEXT_LIMIT, attempt=1) | ||
| delay2 = get_retry_delay(ErrorCategory.CONTEXT_LIMIT, attempt=1) | ||
| delay3 = get_retry_delay(ErrorCategory.CONTEXT_LIMIT, attempt=2) | ||
|
|
||
| # Context limits should be deterministic | ||
| assert delay1 == delay2, "Context limit delays should be deterministic" | ||
| assert delay1 == 0.5, f"Context limit delay should be 0.5, got {delay1}" | ||
| assert delay3 == 0.5, f"Context limit delay should be 0.5 regardless of attempt, got {delay3}" | ||
|
|
||
|
|
||
| def test_exponential_backoff_with_jitter(): | ||
| """Test that exponential backoff still works with jitter""" | ||
| # Test increasing attempts for rate limits | ||
| delay_attempt1 = get_retry_delay(ErrorCategory.RATE_LIMIT, attempt=1) # range: [1.0, 3.0] | ||
| delay_attempt2 = get_retry_delay(ErrorCategory.RATE_LIMIT, attempt=2) # range: [1.0, 9.0] | ||
| delay_attempt3 = get_retry_delay(ErrorCategory.RATE_LIMIT, attempt=3) # range: [1.0, 27.0] | ||
|
|
||
| # Higher attempts should generally produce higher maximum possible delays | ||
| # (though jitter means specific values may vary) | ||
| assert delay_attempt1 <= 3.0, f"Attempt 1 delay should be <= 3.0, got {delay_attempt1}" | ||
| assert delay_attempt2 <= 9.0, f"Attempt 2 delay should be <= 9.0, got {delay_attempt2}" | ||
| assert delay_attempt3 <= 60.0, f"Attempt 3 delay should be <= 60.0 (capped), got {delay_attempt3}" | ||
|
|
||
|
|
||
| def test_no_retry_categories(): | ||
| """Test that AUTH and other non-retryable categories return 0""" | ||
| assert get_retry_delay(ErrorCategory.AUTH, attempt=1) == 0 | ||
| assert get_retry_delay(ErrorCategory.AUTH, attempt=5) == 0 |
Uh oh!
There was an error while loading. Please reload this page.