Skip to content

Commit fdd980d

Browse files
sjarmakclaude
andcommitted
feat: [US-004] - Verify all 20 suites at Grade A
- Add missing test.sh wrappers to 12 org tasks (R.1 critical fix) - Fix T.10 shared-state issues in 6 tasks: - ccx-onboard-search-201/202/203: /tmp/verify.py → mktemp - etcd-grpc-api-upgrade-001: /tmp/fallback_patch.diff → mktemp - python-http-class-naming-refac-001: /tmp/fallback_patch.diff → mktemp - flipt-ecr-auth-oci-fix-001: extract test patch heredoc to test_patch.diff All 20 suites now at Grade A with zero critical or important failures. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 6cc28c7 commit fdd980d

File tree

19 files changed

+350
-237
lines changed
  • benchmarks
    • csb_org_compliance
    • csb_org_crossorg
    • csb_org_crossrepo_tracing/ccx-dep-trace-293/tests
    • csb_org_migration
    • csb_org_onboarding
      • ccx-onboard-search-201/tests
      • ccx-onboard-search-202/tests
      • ccx-onboard-search-203/tests
    • csb_org_org/ccx-agentic-290/tests
    • csb_org_platform
    • csb_org_security
    • csb_sdlc_design/etcd-grpc-api-upgrade-001/tests
    • csb_sdlc_fix/flipt-ecr-auth-oci-fix-001/tests
    • csb_sdlc_refactor/python-http-class-naming-refac-001/tests

19 files changed

+350
-237
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
# test.sh — Harbor compatibility wrapper
3+
# Harbor requires tests/test.sh for task discovery (TaskPaths.is_valid() check).
4+
# The actual evaluation logic lives in eval.sh (SWE-Factory exit-code-first pattern).
5+
6+
# sg_only_env: restore full repo before verification (no-op for regular runs)
7+
[ -f /tmp/.sg_only_mode ] && [ -f /tests/sgonly_verifier_wrapper.sh ] && source /tests/sgonly_verifier_wrapper.sh
8+
9+
exec bash "$(dirname "$0")/eval.sh" "$@"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
# test.sh — Harbor compatibility wrapper
3+
# Harbor requires tests/test.sh for task discovery (TaskPaths.is_valid() check).
4+
# The actual evaluation logic lives in eval.sh (SWE-Factory exit-code-first pattern).
5+
6+
# sg_only_env: restore full repo before verification (no-op for regular runs)
7+
[ -f /tmp/.sg_only_mode ] && [ -f /tests/sgonly_verifier_wrapper.sh ] && source /tests/sgonly_verifier_wrapper.sh
8+
9+
exec bash "$(dirname "$0")/eval.sh" "$@"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
# test.sh — Harbor compatibility wrapper
3+
# Harbor requires tests/test.sh for task discovery (TaskPaths.is_valid() check).
4+
# The actual evaluation logic lives in eval.sh (SWE-Factory exit-code-first pattern).
5+
6+
# sg_only_env: restore full repo before verification (no-op for regular runs)
7+
[ -f /tmp/.sg_only_mode ] && [ -f /tests/sgonly_verifier_wrapper.sh ] && source /tests/sgonly_verifier_wrapper.sh
8+
9+
exec bash "$(dirname "$0")/eval.sh" "$@"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
# test.sh — Harbor compatibility wrapper
3+
# Harbor requires tests/test.sh for task discovery (TaskPaths.is_valid() check).
4+
# The actual evaluation logic lives in eval.sh (SWE-Factory exit-code-first pattern).
5+
6+
# sg_only_env: restore full repo before verification (no-op for regular runs)
7+
[ -f /tmp/.sg_only_mode ] && [ -f /tests/sgonly_verifier_wrapper.sh ] && source /tests/sgonly_verifier_wrapper.sh
8+
9+
exec bash "$(dirname "$0")/eval.sh" "$@"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
# test.sh — Harbor compatibility wrapper
3+
# Harbor requires tests/test.sh for task discovery (TaskPaths.is_valid() check).
4+
# The actual evaluation logic lives in eval.sh (SWE-Factory exit-code-first pattern).
5+
6+
# sg_only_env: restore full repo before verification (no-op for regular runs)
7+
[ -f /tmp/.sg_only_mode ] && [ -f /tests/sgonly_verifier_wrapper.sh ] && source /tests/sgonly_verifier_wrapper.sh
8+
9+
exec bash "$(dirname "$0")/eval.sh" "$@"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
# test.sh — Harbor compatibility wrapper
3+
# Harbor requires tests/test.sh for task discovery (TaskPaths.is_valid() check).
4+
# The actual evaluation logic lives in eval.sh (SWE-Factory exit-code-first pattern).
5+
6+
# sg_only_env: restore full repo before verification (no-op for regular runs)
7+
[ -f /tmp/.sg_only_mode ] && [ -f /tests/sgonly_verifier_wrapper.sh ] && source /tests/sgonly_verifier_wrapper.sh
8+
9+
exec bash "$(dirname "$0")/eval.sh" "$@"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
# test.sh — Harbor compatibility wrapper
3+
# Harbor requires tests/test.sh for task discovery (TaskPaths.is_valid() check).
4+
# The actual evaluation logic lives in eval.sh (SWE-Factory exit-code-first pattern).
5+
6+
# sg_only_env: restore full repo before verification (no-op for regular runs)
7+
[ -f /tmp/.sg_only_mode ] && [ -f /tests/sgonly_verifier_wrapper.sh ] && source /tests/sgonly_verifier_wrapper.sh
8+
9+
exec bash "$(dirname "$0")/eval.sh" "$@"

benchmarks/csb_org_onboarding/ccx-onboard-search-201/tests/test.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ if [ ! -f "$SOLUTION_FILE" ]; then
2626
exit 0
2727
fi
2828

29-
cat > /tmp/verify.py << 'PYEOF'
29+
VERIFY_SCRIPT=$(mktemp /tmp/verify_XXXXXX.py)
30+
cat > "$VERIFY_SCRIPT" << 'PYEOF'
3031
import json, sys, re
3132
sys.path.insert(0, "/tests")
3233
from verifiers import SemanticRetrievalQAVerifier
@@ -64,5 +65,6 @@ except Exception as e:
6465
f.write("0.0")
6566
PYEOF
6667

67-
python3 /tmp/verify.py 2>&1 | tee /logs/verifier/verify-debug.log
68+
python3 "$VERIFY_SCRIPT" 2>&1 | tee /logs/verifier/verify-debug.log
69+
rm -f "$VERIFY_SCRIPT"
6870
exit 0

benchmarks/csb_org_onboarding/ccx-onboard-search-202/tests/test.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ if [ ! -f "$SOLUTION_FILE" ]; then
2626
exit 0
2727
fi
2828

29-
cat > /tmp/verify.py << 'PYEOF'
29+
VERIFY_SCRIPT=$(mktemp /tmp/verify_XXXXXX.py)
30+
cat > "$VERIFY_SCRIPT" << 'PYEOF'
3031
import json, sys, re
3132
sys.path.insert(0, "/tests")
3233
from verifiers import SemanticRetrievalQAVerifier
@@ -64,5 +65,6 @@ except Exception as e:
6465
f.write("0.0")
6566
PYEOF
6667

67-
python3 /tmp/verify.py 2>&1 | tee /logs/verifier/verify-debug.log
68+
python3 "$VERIFY_SCRIPT" 2>&1 | tee /logs/verifier/verify-debug.log
69+
rm -f "$VERIFY_SCRIPT"
6870
exit 0

benchmarks/csb_org_onboarding/ccx-onboard-search-203/tests/test.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ if [ ! -f "$SOLUTION_FILE" ]; then
2626
exit 0
2727
fi
2828

29-
cat > /tmp/verify.py << 'PYEOF'
29+
VERIFY_SCRIPT=$(mktemp /tmp/verify_XXXXXX.py)
30+
cat > "$VERIFY_SCRIPT" << 'PYEOF'
3031
import json, sys, re
3132
sys.path.insert(0, "/tests")
3233
from verifiers import SemanticRetrievalQAVerifier
@@ -64,5 +65,6 @@ except Exception as e:
6465
f.write("0.0")
6566
PYEOF
6667

67-
python3 /tmp/verify.py 2>&1 | tee /logs/verifier/verify-debug.log
68+
python3 "$VERIFY_SCRIPT" 2>&1 | tee /logs/verifier/verify-debug.log
69+
rm -f "$VERIFY_SCRIPT"
6870
exit 0

0 commit comments

Comments
 (0)