Skip to content

Commit 0d8c0a7

Browse files
committed
Add memory logging hooks
1 parent 72b62f8 commit 0d8c0a7

File tree

1 file changed

+126
-0
lines changed

1 file changed

+126
-0
lines changed

dpnp/tests/conftest.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,45 @@ def normalize_test_name(nodeid):
8080
return normalized_nodeid
8181

8282

83+
def get_device_memory_info(device=None):
84+
"""
85+
Safely retrieve device memory information.
86+
87+
Returns dict with keys: 'global_mem_size', 'max_mem_alloc_size', 'local_mem_size'
88+
or None if information cannot be retrieved.
89+
"""
90+
try:
91+
if device is None:
92+
device = dpctl.select_default_device()
93+
94+
return {
95+
"global_mem_size": device.global_mem_size,
96+
"max_mem_alloc_size": device.max_mem_alloc_size,
97+
"local_mem_size": device.local_mem_size,
98+
"device_name": device.name,
99+
"device_type": str(device.device_type),
100+
"backend": str(device.backend),
101+
}
102+
except Exception as e:
103+
warnings.warn(f"Failed to get device memory info: {e}")
104+
return None
105+
106+
107+
def format_memory_size(size_bytes):
108+
"""Format memory size in human-readable format."""
109+
if size_bytes is None:
110+
return "N/A"
111+
112+
if size_bytes >= 1024**3:
113+
return f"{size_bytes / (1024**3):.2f} GB"
114+
elif size_bytes >= 1024**2:
115+
return f"{size_bytes / (1024**2):.2f} MB"
116+
elif size_bytes >= 1024:
117+
return f"{size_bytes / 1024:.2f} KB"
118+
else:
119+
return f"{size_bytes} bytes"
120+
121+
83122
def pytest_configure(config):
84123
# By default, tests marked as slow will be deselected.
85124
# To run all tests, use -m "slow or not slow".
@@ -141,6 +180,9 @@ def pytest_collection_modifyitems(config, items):
141180
support_fp64 = dev.has_aspect_fp64
142181
is_cuda = dpnp.is_cuda_backend(dev)
143182

183+
# Get device memory information
184+
mem_info = get_device_memory_info(dev)
185+
144186
print("")
145187
print(
146188
f"DPNP Test scope includes all integer dtypes: {bool(dtype_config.all_int_types)}"
@@ -153,6 +195,23 @@ def pytest_collection_modifyitems(config, items):
153195
print(f"DPNP version: {dpnp.__version__}, location: {dpnp}")
154196
print(f"NumPy version: {numpy.__version__}, location: {numpy}")
155197
print(f"Python version: {sys.version}")
198+
199+
# Log device memory information
200+
if mem_info:
201+
print("")
202+
print("Device Memory Information:")
203+
print(f" Device: {mem_info['device_name']}")
204+
print(f" Backend: {mem_info['backend']}")
205+
print(
206+
f" Global Memory Size: {format_memory_size(mem_info['global_mem_size'])}"
207+
)
208+
print(
209+
f" Max Allocation Size: {format_memory_size(mem_info['max_mem_alloc_size'])}"
210+
)
211+
print(
212+
f" Local Memory Size: {format_memory_size(mem_info['local_mem_size'])}"
213+
)
214+
156215
print("")
157216
if is_gpu or os.getenv("DPNP_QUEUE_GPU") == "1":
158217
excluded_tests.extend(get_excluded_tests(test_exclude_file_gpu))
@@ -239,3 +298,70 @@ def suppress_divide_invalid_numpy_warnings(
239298
suppress_divide_numpy_warnings, suppress_invalid_numpy_warnings
240299
):
241300
yield
301+
302+
303+
# Memory logging hooks
304+
# Set DPNP_TEST_LOG_MEMORY=1 to enable per-test memory logging
305+
def pytest_runtest_setup(item):
306+
"""Log memory info before each test if enabled."""
307+
if os.getenv("DPNP_TEST_LOG_MEMORY") == "1":
308+
mem_info = get_device_memory_info()
309+
if mem_info:
310+
# Get the pytest terminal writer to bypass capture
311+
tw = item.config.get_terminal_writer()
312+
tw.line()
313+
tw.write(
314+
f"[MEMORY BEFORE] {item.nodeid}: "
315+
f"Global={format_memory_size(mem_info['global_mem_size'])}, "
316+
f"MaxAlloc={format_memory_size(mem_info['max_mem_alloc_size'])}, "
317+
f"Local={format_memory_size(mem_info['local_mem_size'])}"
318+
)
319+
tw.line()
320+
321+
322+
def pytest_runtest_teardown(item):
323+
"""Log memory info after each test if enabled."""
324+
if os.getenv("DPNP_TEST_LOG_MEMORY") == "1":
325+
mem_info = get_device_memory_info()
326+
if mem_info:
327+
tw = item.config.get_terminal_writer()
328+
tw.write(
329+
f"[MEMORY AFTER] {item.nodeid}: "
330+
f"Global={format_memory_size(mem_info['global_mem_size'])}, "
331+
f"MaxAlloc={format_memory_size(mem_info['max_mem_alloc_size'])}, "
332+
f"Local={format_memory_size(mem_info['local_mem_size'])}"
333+
)
334+
tw.line()
335+
336+
337+
def pytest_runtest_makereport(item, call):
338+
"""
339+
Enhanced error reporting that handles device failures gracefully.
340+
341+
This hook catches device errors during test reporting phase and
342+
provides meaningful error messages instead of crashing.
343+
"""
344+
# Only intercept if we're in the call phase and there was an exception
345+
if call.when == "call" and call.excinfo is not None:
346+
# Check if this is a device-related error
347+
exc_type = call.excinfo.type
348+
exc_value = call.excinfo.value
349+
350+
# Log device state if there's a device-related error
351+
if (
352+
"sycl" in str(exc_type).lower()
353+
or "device" in str(exc_value).lower()
354+
):
355+
try:
356+
mem_info = get_device_memory_info()
357+
if mem_info:
358+
print(
359+
f"\n[DEVICE ERROR] Test: {item.nodeid}"
360+
f"\n Device: {mem_info['device_name']}"
361+
f"\n Backend: {mem_info['backend']}"
362+
f"\n Global Memory: {format_memory_size(mem_info['global_mem_size'])}"
363+
)
364+
except Exception as e:
365+
print(
366+
f"\n[DEVICE ERROR] Failed to retrieve device info during error: {e}"
367+
)

0 commit comments

Comments
 (0)