fix: use greenlet.error type instead of string check; recreate per thread

Copilot · OhYee · Copilot · commit ac0252e002ff · 2026-03-10T03:32:26.000Z
- Add GreenletError import with ImportError fallback (same pattern as PlaywrightError)
- Replace brittle 'cannot switch to' substring check with dedicated
  except GreenletError block placed before the generic except Exception,
  so the correct exception type is matched regardless of message wording
- Keep existing sandbox on greenlet error (failure is client-side thread
  affinity, not a sandbox crash); only reset Playwright and retry
- _get_playwright already checks current_thread is not creator_thread
  (any different thread triggers recreation, not just dead threads)
- Update concurrent caching test: each thread now gets its own connection
  (Playwright Sync API is thread-affine, cross-thread sharing is unsafe)
- Replace test_get_playwright_live_thread_not_recreated with
  test_get_playwright_different_live_thread_recreates_connection to
  validate the correct per-thread isolation behavior
- Update greenlet error tests to use real greenlet.error, verify sandbox
  is preserved, and check retry behavior

Type check: passed (297 source files, no issues)

Co-authored-by: OhYee &lt;13498329+OhYee@users.noreply.github.com&gt;
diff --git a/agentrun/integration/builtin/sandbox.py b/agentrun/integration/builtin/sandbox.py
@@ -26,6 +26,16 @@ class PlaywrightError(Exception):  # type: ignore[no-redef]
         pass
 
 
+try:
+    from greenlet import error as GreenletError
+except ImportError:
+
+    class GreenletError(Exception):  # type: ignore[no-redef]
+        """Fallback greenlet error used when greenlet is not installed."""
+
+        pass
+
+
 class SandboxToolSet(CommonToolSet):
     """沙箱工具集基类
 
@@ -836,28 +846,25 @@ def _run_in_sandbox(self, callback: Callable[[Sandbox], Any]) -> Any:
                     "Browser tool-level error (no sandbox rebuild): %s", e
                 )
                 return {"error": f"{e!s}"}
-        except Exception as e:
-            error_msg = str(e)
-            if "cannot switch to" in error_msg:
+        except GreenletError as e:
+            logger.debug(
+                "Greenlet thread-binding error, resetting Playwright: %s",
+                e,
+            )
+            # Keep the existing sandbox (it is still healthy); only the
+            # Playwright connection needs to be recreated on this thread.
+            try:
+                self._reset_playwright()
+                return callback(sb)
+            except Exception as e2:
                 logger.debug(
-                    "Greenlet thread-binding error, resetting Playwright: %s",
-                    e,
+                    "Retry after Playwright reset failed: %s",
+                    e2,
                 )
-                # Reset only the Playwright connection and keep the existing sandbox
-                try:
-                    self._reset_playwright()
-                    # Retry once with the same sandbox instance; the original error
-                    # will still be returned if this retry fails.
-                    return callback(sb)
-                except Exception as e2:
-                    logger.debug(
-                        "Retry after Playwright reset failed: %s",
-                        e2,
-                    )
-                    return {"error": f"{e!s}"}
-            else:
-                logger.debug("Unexpected error in browser sandbox: %s", e)
                 return {"error": f"{e!s}"}
+        except Exception as e:
+            logger.debug("Unexpected error in browser sandbox: %s", e)
+            return {"error": f"{e!s}"}
 
     def _is_infrastructure_error(self, error_msg: str) -> bool:
         """判断是否为基础设施错误 / Check if error is infrastructure-level
diff --git a/tests/unittests/integration/test_browser_toolset_error_handling.py b/tests/unittests/integration/test_browser_toolset_error_handling.py
@@ -254,35 +254,30 @@ def test_reset_playwright_handles_close_error(self, toolset, mock_sandbox):
 
         assert toolset._playwright_sync is None
 
-    def test_concurrent_get_playwright_creates_only_one_connection(
+    def test_concurrent_get_playwright_each_thread_gets_own_connection(
         self, toolset, mock_sandbox
     ):
-        """测试并发调用 _get_playwright 只创建一个连接，不会泄漏
+        """测试并发调用 _get_playwright 时每个线程各自创建连接
 
-        所有工作线程在同一 executor 内并发运行（即创建线程仍存活），
-        应复用同一连接，不会触发重建。
+        Playwright Sync API 的 greenlet 绑定到创建它的 OS 线程，
+        不能跨线程共享。每个工作线程必须创建自己的连接。
         """
         start_barrier = threading.Barrier(5)
-        # Keep all threads alive until every thread has obtained playwright,
-        # simulating concurrent workers within the same executor context.
-        hold_barrier = threading.Barrier(5)
         results: list = []
 
         def worker():
             start_barrier.wait()
             p = toolset._get_playwright(mock_sandbox)
             results.append(p)
-            hold_barrier.wait()  # stay alive so is_alive() == True for peers
 
         threads = [threading.Thread(target=worker) for _ in range(5)]
         for t in threads:
             t.start()
         for t in threads:
             t.join()
 
+        # Every thread must have received a connection
         assert len(results) == 5
-        assert all(p is results[0] for p in results)
-        mock_sandbox.sync_playwright.assert_called_once()
 
 
 class TestBrowserToolSetClose:
@@ -392,21 +387,22 @@ def second_call():
         # A new connection must have been created for the second call
         assert mock_sandbox.sync_playwright.call_count == 2
 
-    def test_get_playwright_live_thread_not_recreated(
+    def test_get_playwright_different_live_thread_recreates_connection(
         self, toolset, mock_sandbox
     ):
-        """测试创建线程仍存活时不重建连接（并发安全）
+        """测试从不同线程调用时，即使创建线程仍存活也会重建连接
 
-        即使在不同线程中调用，只要创建线程仍然存活，就应复用同一连接。
-        这模拟同一 executor 内并发工具调用的场景。
+        Playwright Sync API 的 greenlet 绑定到创建它的 OS 线程，
+        即使创建线程仍存活，在另一个线程上调用也不安全。
+        每个调用线程必须获得自己的连接。
         """
         results: list = []
 
         # Create connection in main thread first
         toolset._get_playwright(mock_sandbox)
         # The creating thread (main test thread) is still alive
 
-        # Another thread should reuse the same connection
+        # A different thread must receive its own new connection
         def worker():
             p = toolset._get_playwright(mock_sandbox)
             results.append(p)
@@ -416,8 +412,8 @@ def worker():
         t.join()
 
         assert len(results) == 1
-        assert results[0] is toolset._playwright_sync
-        mock_sandbox.sync_playwright.assert_called_once()
+        # A new connection must have been created for the worker thread
+        assert mock_sandbox.sync_playwright.call_count == 2
 
     def test_reset_playwright_clears_thread(self, toolset, mock_sandbox):
         """测试 _reset_playwright 清理线程引用"""
@@ -452,27 +448,61 @@ def toolset(self, mock_sandbox):
             ts._ensure_sandbox = MagicMock(return_value=mock_sandbox)
             return ts
 
-    def test_greenlet_thread_error_resets_playwright_and_sandbox(
+    def test_greenlet_error_resets_playwright_keeps_sandbox_and_retries(
         self, toolset, mock_sandbox
     ):
-        """测试 greenlet 线程绑定错误触发 Playwright 和沙箱重置
+        """測試 greenlet.error 触发 Playwright 重置、保留沙箱并重试
 
-        当 'cannot switch to a different thread' 错误发生时，
-        必须重置缓存的 Playwright 实例，避免后续调用持续失败。
+        当 greenlet.error 发生时，沙箱本身仍然健康（这是客户端线程亲和性问题），
+        只需重置 Playwright 连接并在当前线程重试，不应销毁沙箱。
         """
+        try:
+            from greenlet import error as GreenletError
+        except ImportError:
+            pytest.skip("greenlet not installed")
+
+        call_count = 0
 
         def callback(sb):
-            raise Exception(
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                raise GreenletError(
+                    "cannot switch to a different thread (which happens to have"
+                    " exited)"
+                )
+            return {"success": True}
+
+        result = toolset._run_in_sandbox(callback)
+
+        assert result == {"success": True}
+        assert call_count == 2
+        toolset._reset_playwright.assert_called_once()
+        # Sandbox must be preserved — the error is client-side thread affinity,
+        # not a sandbox crash.
+        assert toolset.sandbox is mock_sandbox
+
+    def test_greenlet_error_returns_error_if_retry_fails(
+        self, toolset, mock_sandbox
+    ):
+        """测试 greenlet.error 重试失败时返回错误字典"""
+        try:
+            from greenlet import error as GreenletError
+        except ImportError:
+            pytest.skip("greenlet not installed")
+
+        def callback(sb):
+            raise GreenletError(
                 "cannot switch to a different thread (which happens to have"
                 " exited)"
             )
 
         result = toolset._run_in_sandbox(callback)
 
         assert "error" in result
-        assert "cannot switch to" in result["error"]
         toolset._reset_playwright.assert_called_once()
-        assert toolset.sandbox is None
+        # Sandbox still preserved even after retry failure
+        assert toolset.sandbox is mock_sandbox
 
     def test_non_greenlet_unexpected_error_does_not_reset(
         self, toolset, mock_sandbox