Skip to content

Commit ea60789

Browse files
Copilotfruch
andcommitted
Add tests for libev atexit cleanup bug
- Added test_libevreactor_shutdown.py to demonstrate the bug - Tests show that atexit callback captures None instead of actual loop Co-authored-by: fruch <340979+fruch@users.noreply.github.com>
1 parent d83adab commit ea60789

1 file changed

Lines changed: 250 additions & 0 deletions

File tree

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
# Copyright DataStax, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
Test to demonstrate the libevwrapper atexit cleanup issue.
17+
18+
This test demonstrates the problem where the atexit callback is registered
19+
with _global_loop=None at import time, causing it to receive None during
20+
shutdown instead of the actual loop instance.
21+
"""
22+
23+
import unittest
24+
import atexit
25+
import sys
26+
import subprocess
27+
import tempfile
28+
import os
29+
from pathlib import Path
30+
31+
from cassandra import DependencyException
32+
33+
try:
34+
from cassandra.io.libevreactor import LibevConnection
35+
except (ImportError, DependencyException):
36+
LibevConnection = None
37+
38+
from tests import is_monkey_patched
39+
40+
41+
class LibevAtexitCleanupTest(unittest.TestCase):
42+
"""
43+
Test case to demonstrate the atexit cleanup bug in libevreactor.
44+
45+
The bug: atexit.register(partial(_cleanup, _global_loop)) is called when
46+
_global_loop is None, so the cleanup function receives None at shutdown
47+
instead of the actual LibevLoop instance that was created later.
48+
"""
49+
50+
def setUp(self):
51+
if is_monkey_patched():
52+
raise unittest.SkipTest("Can't test libev with monkey patching")
53+
if LibevConnection is None:
54+
raise unittest.SkipTest('libev does not appear to be installed correctly')
55+
56+
def test_atexit_callback_registered_with_none(self):
57+
"""
58+
Test that demonstrates the atexit callback bug.
59+
60+
The atexit.register(partial(_cleanup, _global_loop)) line is executed
61+
when _global_loop is None. This means the partial function captures
62+
None as the argument, and when atexit calls it during shutdown, it
63+
passes None to _cleanup instead of the actual loop instance.
64+
65+
@since 3.29
66+
@jira_ticket PYTHON-XXX
67+
@expected_result The test demonstrates that atexit cleanup is broken
68+
69+
@test_category connection
70+
"""
71+
from cassandra.io import libevreactor
72+
from functools import partial
73+
74+
# Check the current atexit handlers
75+
# Note: atexit._exithandlers is an implementation detail but useful for debugging
76+
if hasattr(atexit, '_exithandlers'):
77+
# Find our cleanup handler
78+
cleanup_handler = None
79+
for handler in atexit._exithandlers:
80+
func = handler[0]
81+
# Check if this is our partial(_cleanup, _global_loop) handler
82+
if isinstance(func, partial):
83+
if func.func.__name__ == '_cleanup':
84+
cleanup_handler = func
85+
break
86+
87+
if cleanup_handler:
88+
# The problem: the partial was created with _global_loop=None
89+
# So even if _global_loop is later set to a LibevLoop instance,
90+
# the atexit callback will still call _cleanup(None)
91+
captured_arg = cleanup_handler.args[0] if cleanup_handler.args else None
92+
93+
# This assertion will fail after LibevConnection.initialize_reactor()
94+
# is called and _global_loop is set to a LibevLoop instance
95+
LibevConnection.initialize_reactor()
96+
97+
# At this point, libevreactor._global_loop is not None
98+
self.assertIsNotNone(libevreactor._global_loop,
99+
"Global loop should be initialized")
100+
101+
# But the atexit handler still has None captured!
102+
self.assertIsNone(captured_arg,
103+
"The atexit handler captured None, not the actual loop instance. "
104+
"This is the BUG: cleanup will receive None at shutdown!")
105+
106+
def test_shutdown_crash_scenario_subprocess(self):
107+
"""
108+
Test that simulates a Python shutdown crash scenario in a subprocess.
109+
110+
This test creates a minimal script that:
111+
1. Imports the driver
112+
2. Creates a connection (which starts the event loop)
113+
3. Exits without explicit cleanup
114+
115+
The expected behavior is that atexit should clean up the loop, but
116+
because of the bug, the cleanup receives None and doesn't actually
117+
stop the loop or its watchers. This can lead to crashes if callbacks
118+
fire during shutdown.
119+
120+
@since 3.29
121+
@jira_ticket PYTHON-XXX
122+
@expected_result The subprocess demonstrates the cleanup issue
123+
124+
@test_category connection
125+
"""
126+
# Create a test script that demonstrates the issue
127+
test_script = '''
128+
import sys
129+
import os
130+
131+
# Add the driver path
132+
sys.path.insert(0, {driver_path!r})
133+
134+
# Import and setup
135+
from cassandra.io.libevreactor import LibevConnection, _global_loop
136+
import atexit
137+
138+
# Initialize the reactor (creates the global loop)
139+
LibevConnection.initialize_reactor()
140+
141+
print("Global loop initialized:", _global_loop is not None)
142+
143+
# Check what atexit will actually call
144+
if hasattr(atexit, '_exithandlers'):
145+
from functools import partial
146+
for handler in atexit._exithandlers:
147+
func = handler[0]
148+
if isinstance(func, partial) and func.func.__name__ == '_cleanup':
149+
captured_arg = func.args[0] if func.args else None
150+
print("Atexit will call _cleanup with:", captured_arg)
151+
print("But _global_loop is:", _global_loop)
152+
print("BUG: Cleanup will receive None instead of the loop!")
153+
break
154+
155+
# Exit without explicit cleanup - atexit should handle it, but won't!
156+
print("Exiting...")
157+
'''
158+
159+
driver_path = str(Path(__file__).parent.parent.parent.parent)
160+
script_content = test_script.format(driver_path=driver_path)
161+
162+
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
163+
f.write(script_content)
164+
script_path = f.name
165+
166+
try:
167+
result = subprocess.run(
168+
[sys.executable, script_path],
169+
capture_output=True,
170+
text=True,
171+
timeout=5
172+
)
173+
174+
output = result.stdout
175+
print("\n=== Subprocess Output ===")
176+
print(output)
177+
print("=== End Output ===\n")
178+
179+
# Verify the output shows the bug
180+
self.assertIn("Global loop initialized: True", output)
181+
self.assertIn("Atexit will call _cleanup with: None", output)
182+
self.assertIn("BUG: Cleanup will receive None instead of the loop!", output)
183+
184+
finally:
185+
os.unlink(script_path)
186+
187+
188+
class LibevShutdownRaceConditionTest(unittest.TestCase):
189+
"""
190+
Tests to analyze potential race conditions and crashes during shutdown.
191+
"""
192+
193+
def setUp(self):
194+
if is_monkey_patched():
195+
raise unittest.SkipTest("Can't test libev with monkey patching")
196+
if LibevConnection is None:
197+
raise unittest.SkipTest('libev does not appear to be installed correctly')
198+
199+
def test_callback_during_shutdown_scenario(self):
200+
"""
201+
Test to document the potential crash scenario.
202+
203+
When Python is shutting down:
204+
1. Various modules are being torn down
205+
2. The libev event loop may still be running
206+
3. If a callback (io_callback, timer_callback, prepare_callback) fires:
207+
- It calls PyGILState_Ensure()
208+
- It tries to call Python functions (PyObject_CallFunction)
209+
- If Python objects have been deallocated, this can crash
210+
211+
The root cause: The atexit cleanup doesn't actually run because it
212+
receives None instead of the loop instance, so it never:
213+
- Sets _shutdown flag
214+
- Stops watchers
215+
- Joins the event loop thread
216+
217+
@since 3.29
218+
@jira_ticket PYTHON-XXX
219+
@expected_result Documents the crash scenario
220+
221+
@test_category connection
222+
"""
223+
from cassandra.io.libevreactor import _global_loop, _cleanup
224+
225+
# This test documents the issue - we can't easily reproduce a crash
226+
# in a unit test without actually tearing down Python, but we can
227+
# verify the conditions that lead to it
228+
229+
LibevConnection.initialize_reactor()
230+
231+
# Verify the loop exists
232+
self.assertIsNotNone(_global_loop)
233+
234+
# Simulate what atexit would call (with the bug)
235+
_cleanup(None) # BUG: receives None instead of _global_loop
236+
237+
# The loop is still running because cleanup did nothing!
238+
self.assertFalse(_global_loop._shutdown,
239+
"Loop should NOT be shut down when cleanup receives None")
240+
241+
# Now call it correctly
242+
_cleanup(_global_loop)
243+
244+
# Now it should be shut down
245+
self.assertTrue(_global_loop._shutdown,
246+
"Loop should be shut down when cleanup receives the actual loop")
247+
248+
249+
if __name__ == '__main__':
250+
unittest.main()

0 commit comments

Comments
 (0)