Skip to content

Commit dd5474e

Browse files
bgagentclaude
andcommitted
fix(linear): address PR #87 must-fix review items
- linear_reactions: guard auth-circuit globals with `_auth_state_lock` so the daemon sweep thread and the main thread can't race the read-modify-write on `_consecutive_auth_failures` / `_auth_circuit_open`. - linear_reactions: wrap the daemon sweep target in `_sweep_stale_reactions_safe` so an unexpected exception logs at ERROR instead of dying silently (stderr from a daemon thread doesn't reliably reach CloudWatch). - linear_reactions: only increment the sweep delete counter when `_graphql(_DELETE_MUTATION, ...)` actually returns a non-None response — previously the summary log overstated success. - config: hoist `import boto3` out of the catch-narrowed try/except so an `ImportError` (boto3 missing from the image) degrades to a WARN log instead of crashing the agent. - orchestrate-task: wrap `notifyLinearOnConcurrencyCap` in a defensive try/catch — durable-execution retries the entire admission-control step on throw, which would re-fire `failTask` + `emitTaskEvent` and produce duplicate events. - tests: 1 new throw-propagation test for `notifyLinearOnConcurrencyCap`, 3 new tests for `resolve_linear_api_token` (cached env, no-arn, ImportError fallback). Auto-reset fixture in `test_linear_reactions.py` now also resets the circuit-breaker globals between tests so future cases don't leak state. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 09ace05 commit dd5474e

6 files changed

Lines changed: 114 additions & 17 deletions

File tree

agent/src/config.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,14 @@ def resolve_linear_api_token() -> str:
6060
try:
6161
import boto3
6262
from botocore.exceptions import BotoCoreError, ClientError
63+
except ImportError as e:
64+
# boto3 missing from the container image — degrade gracefully rather
65+
# than hard-crashing the agent. The Linear MCP will fail on first
66+
# call with a clear auth error.
67+
log("WARN", f"resolve_linear_api_token: boto3 unavailable ({e}); skipping")
68+
return ""
6369

70+
try:
6471
region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
6572
client = boto3.client("secretsmanager", region_name=region)
6673
resp = client.get_secret_value(SecretId=secret_arn)

agent/src/linear_reactions.py

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,12 @@
9595
#: Linear's quota. After ``_AUTH_FAILURE_THRESHOLD`` consecutive 401/403
9696
#: responses, ``_auth_circuit_open`` flips to True and all later calls
9797
#: short-circuit (return None) without hitting the network. A successful
98-
#: 2xx response resets the counter.
98+
#: 2xx response resets the counter. The lock guards the read-modify-write
99+
#: against the daemon sweep thread.
99100
_AUTH_FAILURE_THRESHOLD = 3
100101
_consecutive_auth_failures = 0
101102
_auth_circuit_open = False
103+
_auth_state_lock = threading.Lock()
102104

103105

104106
def _enabled(channel_source: str, channel_metadata: dict[str, str] | None) -> str | None:
@@ -126,8 +128,9 @@ def _graphql(query: str, variables: dict[str, Any]) -> dict[str, Any] | None:
126128
"""
127129
global _consecutive_auth_failures, _auth_circuit_open
128130

129-
if _auth_circuit_open:
130-
return None
131+
with _auth_state_lock:
132+
if _auth_circuit_open:
133+
return None
131134

132135
token = os.environ.get("LINEAR_API_TOKEN", "")
133136
if not token:
@@ -149,13 +152,19 @@ def _graphql(query: str, variables: dict[str, Any]) -> dict[str, Any] | None:
149152
return None
150153

151154
if resp.status_code in (401, 403):
152-
_consecutive_auth_failures += 1
153-
if _consecutive_auth_failures >= _AUTH_FAILURE_THRESHOLD and not _auth_circuit_open:
154-
_auth_circuit_open = True
155+
with _auth_state_lock:
156+
_consecutive_auth_failures += 1
157+
opened = (
158+
_consecutive_auth_failures >= _AUTH_FAILURE_THRESHOLD and not _auth_circuit_open
159+
)
160+
if opened:
161+
_auth_circuit_open = True
162+
failures = _consecutive_auth_failures
163+
if opened:
155164
log(
156165
"ERROR",
157166
"linear_reactions: auth circuit OPEN after "
158-
f"{_consecutive_auth_failures} consecutive {resp.status_code}s — "
167+
f"{failures} consecutive {resp.status_code}s — "
159168
"API token likely revoked. Suppressing further Linear calls "
160169
"for this container.",
161170
)
@@ -169,7 +178,8 @@ def _graphql(query: str, variables: dict[str, Any]) -> dict[str, Any] | None:
169178

170179
# Successful 2xx — reset the auth failure counter so transient blips don't
171180
# accumulate toward the threshold.
172-
_consecutive_auth_failures = 0
181+
with _auth_state_lock:
182+
_consecutive_auth_failures = 0
173183

174184
body = resp.json() if resp.content else {}
175185
if body.get("errors"):
@@ -199,6 +209,23 @@ def _get_viewer_id() -> str | None:
199209
return None
200210

201211

212+
def _sweep_stale_reactions_safe(issue_id: str, exclude_id: str | None = None) -> None:
213+
"""Top-level wrapper for the sweep daemon thread.
214+
215+
Catches everything so an unexpected ``TypeError`` / ``AttributeError``
216+
inside ``_sweep_stale_reactions`` doesn't kill the thread silently —
217+
stderr from a daemon thread may not reach CloudWatch in containerized
218+
environments.
219+
"""
220+
try:
221+
_sweep_stale_reactions(issue_id, exclude_id=exclude_id)
222+
except Exception as e:
223+
log(
224+
"ERROR",
225+
f"linear_reactions: sweep thread crashed ({type(e).__name__}): {e}",
226+
)
227+
228+
202229
def _sweep_stale_reactions(issue_id: str, exclude_id: str | None = None) -> None:
203230
"""Delete bgagent-owned 👀/✅/❌ reactions on the issue.
204231
@@ -252,8 +279,8 @@ def _sweep_stale_reactions(issue_id: str, exclude_id: str | None = None) -> None
252279
if exclude_id is not None and rid == exclude_id:
253280
# The 👀 we just posted — skip, it's the new marker.
254281
continue
255-
_graphql(_DELETE_MUTATION, {"id": rid})
256-
deletes += 1
282+
if _graphql(_DELETE_MUTATION, {"id": rid}) is not None:
283+
deletes += 1
257284
deletes_ms = int((time.monotonic() - deletes_start) * 1000)
258285
total_ms = int((time.monotonic() - sweep_start) * 1000)
259286
log(
@@ -311,7 +338,7 @@ def react_task_started(
311338
# status. The sweep filters out the just-posted reaction id so it
312339
# never deletes itself.
313340
threading.Thread(
314-
target=_sweep_stale_reactions,
341+
target=_sweep_stale_reactions_safe,
315342
args=(issue_id,),
316343
kwargs={"exclude_id": rid},
317344
daemon=True,

agent/tests/test_config.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
"""Unit tests for config.py — build_config and constants."""
22

3+
import sys
4+
from unittest.mock import patch
5+
36
import pytest
47

5-
from config import PR_TASK_TYPES, build_config
8+
from config import PR_TASK_TYPES, build_config, resolve_linear_api_token
69
from models import TaskConfig
710

811

@@ -85,3 +88,35 @@ def test_auto_generated_task_id(self):
8588
)
8689
assert config.task_id
8790
assert len(config.task_id) == 12
91+
92+
93+
class TestResolveLinearApiToken:
94+
"""Coverage for the secrets-manager + boto3 fallback paths."""
95+
96+
def test_returns_cached_env_var_without_calling_boto(self, monkeypatch):
97+
monkeypatch.setenv("LINEAR_API_TOKEN", "lin_cached")
98+
monkeypatch.setenv("LINEAR_API_TOKEN_SECRET_ARN", "arn:aws:sm:::secret/linear")
99+
# boto3 must not be touched if the env var is already set.
100+
with patch("config.log") as mock_log:
101+
assert resolve_linear_api_token() == "lin_cached"
102+
mock_log.assert_not_called()
103+
104+
def test_returns_empty_when_no_secret_arn(self, monkeypatch):
105+
monkeypatch.delenv("LINEAR_API_TOKEN", raising=False)
106+
monkeypatch.delenv("LINEAR_API_TOKEN_SECRET_ARN", raising=False)
107+
assert resolve_linear_api_token() == ""
108+
109+
def test_import_error_degrades_gracefully(self, monkeypatch):
110+
"""If boto3 is missing from the container image, log WARN and return ''
111+
rather than crashing the agent."""
112+
monkeypatch.delenv("LINEAR_API_TOKEN", raising=False)
113+
monkeypatch.setenv("LINEAR_API_TOKEN_SECRET_ARN", "arn:aws:sm:::secret/linear")
114+
# Force `import boto3` (executed inside resolve_linear_api_token) to
115+
# raise ImportError by removing it from sys.modules and shadowing it.
116+
monkeypatch.setitem(sys.modules, "boto3", None)
117+
with patch("config.log") as mock_log:
118+
assert resolve_linear_api_token() == ""
119+
# WARN logged, no exception escaped.
120+
assert mock_log.call_count == 1
121+
assert mock_log.call_args[0][0] == "WARN"
122+
assert "boto3 unavailable" in mock_log.call_args[0][1]

agent/tests/test_linear_reactions.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,17 @@
1919

2020

2121
@pytest.fixture(autouse=True)
22-
def _reset_viewer_cache():
23-
"""Reset the module-level viewer-id cache between tests so one test's
24-
successful viewer fetch doesn't leak into another test that asserts the
25-
sweep no-ops on viewer-fetch failure."""
22+
def _reset_module_state():
23+
"""Reset module-level caches and the auth circuit breaker between tests
24+
so one test's state never leaks into another (viewer cache, consecutive
25+
auth-failure counter, circuit-open flag)."""
2626
linear_reactions._viewer_id_cache = None
27+
linear_reactions._consecutive_auth_failures = 0
28+
linear_reactions._auth_circuit_open = False
2729
yield
2830
linear_reactions._viewer_id_cache = None
31+
linear_reactions._consecutive_auth_failures = 0
32+
linear_reactions._auth_circuit_open = False
2933

3034

3135
def _viewer_response(viewer_id: str = "viewer-bot") -> MagicMock:

cdk/src/handlers/orchestrate-task.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,16 @@ const durableHandler: DurableExecutionHandler<OrchestrateTaskEvent, void> = asyn
7575
if (!result) {
7676
await failTask(taskId, current.status, 'User concurrency limit reached', task.user_id, false);
7777
await emitTaskEvent(taskId, 'admission_rejected', { reason: 'concurrency_limit' });
78-
await notifyLinearOnConcurrencyCap(task);
78+
// Linear feedback is non-fatal: a throw here would re-run failTask +
79+
// emitTaskEvent on the durable-execution retry, producing duplicate events.
80+
try {
81+
await notifyLinearOnConcurrencyCap(task);
82+
} catch (err) {
83+
logger.warn('Linear concurrency-cap feedback failed (non-fatal)', {
84+
task_id: taskId,
85+
error: err instanceof Error ? err.message : String(err),
86+
});
87+
}
7988
}
8089
return result;
8190
});

cdk/test/handlers/orchestrate-task-feedback.test.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,4 +128,19 @@ describe('notifyLinearOnConcurrencyCap', () => {
128128
process.env.LINEAR_API_TOKEN_SECRET_ARN = saved;
129129
}
130130
});
131+
132+
test('reportIssueFailure rejection propagates (caller must catch)', async () => {
133+
// The helper itself swallows network errors internally, but we contract
134+
// for callers to wrap the call defensively because durable-execution
135+
// retries the entire step on throw, producing duplicate failTask +
136+
// emitTaskEvent. This test asserts the rejection actually propagates so
137+
// the orchestrate-task try-catch is load-bearing, not redundant.
138+
reportIssueFailureMock.mockRejectedValue(new Error('boom'));
139+
await expect(
140+
notifyLinearOnConcurrencyCap(task({
141+
channel_source: 'linear',
142+
channel_metadata: { linear_issue_id: 'lin-issue-1' },
143+
})),
144+
).rejects.toThrow('boom');
145+
});
131146
});

0 commit comments

Comments
 (0)