Skip to content

Commit d6a1659

Browse files
committed
fix(replay): match capture GPU on multi-GPU systems
Two fixes for multi-GPU replay crashes: 1. _is_arm_studio_dir() falsely detected upstream renderdoc builds as ARM Performance Studio, triggering RTLD_GLOBAL preload of librenderdoc.so which corrupted Vulkan replay. Now requires "arm-performance-studio" in the path. 2. ReplayOptions now pins forceGPUVendor/forceGPUDeviceID to the GPU that was used for capture, by matching the deviceName from structured data against GetAvailableGPUs(). Prevents replay from picking a different GPU than what captured the frame. Also resolves RENDERDOC_PYTHON_PATH relative path handling.
1 parent 72801bc commit d6a1659

4 files changed

Lines changed: 65 additions & 8 deletions

File tree

src/rdc/daemon_server.py

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,35 @@ def _cleanup_temp(state: DaemonState) -> None:
166166
_log = logging.getLogger("rdc.daemon")
167167

168168

169+
def _match_capture_gpu(cap: Any, sd: Any = None) -> Any | None:
170+
"""Find the GPU used for capture by matching structured data against available GPUs."""
171+
try:
172+
gpus = cap.GetAvailableGPUs()
173+
if not gpus:
174+
return None
175+
if len(gpus) == 1:
176+
return gpus[0]
177+
if sd is None:
178+
return gpus[0]
179+
for i in range(len(sd.chunks)):
180+
c = sd.chunks[i]
181+
if c.name == "vkEnumeratePhysicalDevices":
182+
for j in range(c.NumChildren()):
183+
child = c.GetChild(j)
184+
if child.name == "physProps":
185+
for k in range(child.NumChildren()):
186+
prop = child.GetChild(k)
187+
if prop.name == "deviceName":
188+
name = prop.AsString()
189+
for g in gpus:
190+
if g.name == name:
191+
return g
192+
break
193+
except Exception: # noqa: BLE001
194+
pass
195+
return gpus[0] if gpus else None
196+
197+
169198
def _load_replay(state: DaemonState) -> str | None:
170199
"""Load renderdoc module and open capture. Returns error string or None."""
171200
from rdc.discover import find_renderdoc
@@ -189,7 +218,13 @@ def _load_replay(state: DaemonState) -> str | None:
189218
cap.Shutdown()
190219
return "local replay not supported on this platform"
191220

192-
result, controller = cap.OpenCapture(rd.ReplayOptions(), None)
221+
opts = rd.ReplayOptions()
222+
gpu = _match_capture_gpu(cap, cap.GetStructuredData())
223+
if gpu is not None:
224+
opts.forceGPUVendor = gpu.vendor
225+
opts.forceGPUDeviceID = gpu.deviceID
226+
_log.info("replay GPU: %s (vendor=%d id=%d)", gpu.name, gpu.vendor, gpu.deviceID)
227+
result, controller = cap.OpenCapture(opts, None)
193228
if result != rd.ResultCode.Succeeded:
194229
cap.Shutdown()
195230
return f"OpenCapture failed: {result}"
@@ -321,8 +356,18 @@ def _load_remote_replay(state: DaemonState, remote_url: str) -> str | None:
321356
state.local_capture_path = str(local_tmp)
322357
state.local_capture_is_temp = True
323358

359+
remote_opts = rd.ReplayOptions()
360+
if state.local_capture_path:
361+
tmp_cap = rd.OpenCaptureFile()
362+
if tmp_cap.OpenFile(state.local_capture_path, "", None) == rd.ResultCode.Succeeded:
363+
gpu = _match_capture_gpu(tmp_cap)
364+
if gpu is not None:
365+
remote_opts.forceGPUVendor = gpu.vendor
366+
remote_opts.forceGPUDeviceID = gpu.deviceID
367+
tmp_cap.Shutdown()
368+
324369
result, controller = remote.OpenCapture(
325-
rd.RemoteServer.NoPreference, remote_path, rd.ReplayOptions(), None
370+
rd.RemoteServer.NoPreference, remote_path, remote_opts, None
326371
)
327372
if result != rd.ResultCode.Succeeded:
328373
_cleanup_temp_capture(state)

src/rdc/discover.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,12 @@ def _get_diagnostic() -> ProbeOutcome | None:
4848

4949

5050
def _is_arm_studio_dir(directory: str) -> bool:
51-
"""Return True if directory contains ARM PS patched renderdoc.so + librenderdoc.so."""
51+
"""Return True if directory is an ARM Performance Studio renderdoc install."""
5252
d = Path(directory)
53-
return (d / "librenderdoc.so").is_file() and (d / "renderdoc.so").is_file()
53+
if not ((d / "librenderdoc.so").is_file() and (d / "renderdoc.so").is_file()):
54+
return False
55+
parts = d.resolve().parts
56+
return any("arm-performance-studio" in p.lower() for p in parts)
5457

5558

5659
def _preload_librenderdoc(directory: str) -> None:
@@ -136,7 +139,7 @@ def find_renderdoc() -> ModuleType | None:
136139

137140
env_path = os.environ.get("RENDERDOC_PYTHON_PATH")
138141
if env_path:
139-
candidates.append(env_path)
142+
candidates.append(os.path.abspath(env_path))
140143

141144
try:
142145
candidates.extend(_platform.renderdoc_search_paths())
@@ -203,7 +206,7 @@ def find_renderdoccmd() -> Path | None:
203206
env_path = os.environ.get("RENDERDOC_PYTHON_PATH")
204207
if env_path:
205208
name = "renderdoccmd.exe" if sys.platform == "win32" else "renderdoccmd"
206-
candidate = Path(env_path) / name
209+
candidate = Path(os.path.abspath(env_path)) / name
207210
if candidate.exists():
208211
return candidate
209212

tests/mocks/mock_renderdoc.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1824,7 +1824,9 @@ def CreateHeadlessWindowingData(width: int, height: int) -> Any:
18241824

18251825

18261826
class ReplayOptions:
1827-
pass
1827+
forceGPUVendor: int = 0
1828+
forceGPUDeviceID: int = 0
1829+
forceGPUDriverName: str = ""
18281830

18291831

18301832
def ExecuteAndInject(

tests/unit/test_discover.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,9 +239,16 @@ class TestArmStudioDir:
239239
"""_is_arm_studio_dir detects ARM PS directory layout."""
240240

241241
def test_both_files_present(self, tmp_path: Path) -> None:
242+
arm_dir = tmp_path / "arm-performance-studio" / "renderdoc" / "lib"
243+
arm_dir.mkdir(parents=True)
244+
(arm_dir / "librenderdoc.so").write_text("fake")
245+
(arm_dir / "renderdoc.so").write_text("fake")
246+
assert _is_arm_studio_dir(str(arm_dir)) is True
247+
248+
def test_non_arm_dir_with_both_files(self, tmp_path: Path) -> None:
242249
(tmp_path / "librenderdoc.so").write_text("fake")
243250
(tmp_path / "renderdoc.so").write_text("fake")
244-
assert _is_arm_studio_dir(str(tmp_path)) is True
251+
assert _is_arm_studio_dir(str(tmp_path)) is False
245252

246253
def test_missing_librenderdoc(self, tmp_path: Path) -> None:
247254
(tmp_path / "renderdoc.so").write_text("fake")

0 commit comments

Comments
 (0)