Skip to content

Commit b5fb2b8

Browse files
authored
[Annotations] Address cuda orginal matching (#28623)
This pull request improves the accuracy and safety of device matching logic for GPU execution providers, especially when using numeric GPU indices (e.g., `gpu:1`) in layering rules. The changes ensure that matching by index only occurs when a runtime device ordinal is available (from `device_memory_info`), preventing accidental matches with hardware PCI device IDs. The update also enhances logging and expands test coverage for these scenarios. **Device matching logic improvements:** * Added a `has_device_ordinal` flag to `EpDeviceView` and updated matching logic so that index-based GPU matching only occurs when the device ordinal is known to be a runtime ordinal (from `device_memory_info`), not a hardware PCI ID. [[1]](diffhunk://#diff-a8f614056d63b5b3325eea1d855afc96550c977c16d8fdba641012a79194b7b5R169) [[2]](diffhunk://#diff-a8f614056d63b5b3325eea1d855afc96550c977c16d8fdba641012a79194b7b5L193-R198) [[3]](diffhunk://#diff-a8f614056d63b5b3325eea1d855afc96550c977c16d8fdba641012a79194b7b5L288-R299) [[4]](diffhunk://#diff-a8f614056d63b5b3325eea1d855afc96550c977c16d8fdba641012a79194b7b5R324) * Updated log messages to provide clearer error information when a layering rule with a numeric GPU index cannot be mapped, including guidance for troubleshooting. **Test improvements:** * Updated and expanded unit tests to cover correct and incorrect GPU index matching, including cases where only hardware IDs are present and should not match, and added a new test for execution providers with specific GPU ordinals. [[1]](diffhunk://#diff-37d64a2aa66018cc6a40ca2227432eae6c33dd6c1456d19ef539e869ee9d4f72L364-R366) [[2]](diffhunk://#diff-37d64a2aa66018cc6a40ca2227432eae6c33dd6c1456d19ef539e869ee9d4f72L375-R387) [[3]](diffhunk://#diff-37d64a2aa66018cc6a40ca2227432eae6c33dd6c1456d19ef539e869ee9d4f72R566-R584)
1 parent c267f8e commit b5fb2b8

2 files changed

Lines changed: 50 additions & 13 deletions

File tree

onnxruntime/core/framework/layering_annotations.cc

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ struct EpDeviceView {
166166
OrtDevice::DeviceType device_type; // OrtDevice::CPU, GPU, NPU, FPGA, or kDeviceTypeUnknown
167167
uint32_t vendor_id;
168168
OrtDevice::DeviceId device_id;
169+
bool has_device_ordinal; // true when device_id is a runtime ordinal (from device_memory_info)
169170
std::string_view vendor_string; // from OrtHardwareDevice::vendor (empty if unavailable)
170171
};
171172

@@ -190,7 +191,11 @@ bool MatchEpDevice(const EpDeviceView& ep,
190191
if (ep.device_type == OrtDevice::GPU) {
191192
uint32_t index = std::numeric_limits<uint32_t>::max();
192193
if (TryParseIndex(std::string(target_specifier), index)) {
193-
return ep.device_id == static_cast<OrtDevice::DeviceId>(index);
194+
// Only match by ordinal index when the device_id is known to be a runtime
195+
// ordinal (sourced from device_memory_info). OrtHardwareDevice::device_id is
196+
// a PCI hardware-type identifier, not a device instance ordinal.
197+
return ep.has_device_ordinal &&
198+
ep.device_id == static_cast<OrtDevice::DeviceId>(index);
194199
}
195200
// gpu:<vendor>
196201
if (!ep.vendor_string.empty() && CaseInsensitiveCompare(ep.vendor_string, target_specifier)) {
@@ -285,13 +290,13 @@ std::optional<std::string> EpLayeringMatcher::Match(gsl::span<const OrtEpDevice*
285290
ep_device.ep_name,
286291
device_type,
287292
has_hw ? ep_device.device->vendor_id : 0u,
288-
// Prefer the device ordinal from device_memory_info (set by the EP factory to
289-
// a runtime device ordinal such as a CUDA ordinal) over the OrtHardwareDevice::device_id
290-
// which is a hardware-type identifier and not guaranteed to be a stable runtime ordinal.
293+
// Use the device ordinal from device_memory_info (set by the EP factory to
294+
// a runtime device ordinal such as a CUDA ordinal). OrtHardwareDevice::device_id
295+
// is a PCI hardware-type identifier and must not be used for index-based matching.
291296
ep_device.device_memory_info
292297
? ep_device.device_memory_info->device.Id()
293-
: (has_hw ? static_cast<OrtDevice::DeviceId>(ep_device.device->device_id)
294-
: OrtDevice::DeviceId{}),
298+
: OrtDevice::DeviceId{},
299+
/*has_device_ordinal=*/ep_device.device_memory_info != nullptr,
295300
has_hw ? std::string_view(ep_device.device->vendor) : std::string_view{}};
296301

297302
if (MatchEpDevice(view, target_type_str, target_specifier, rule.device)) {
@@ -316,7 +321,8 @@ std::optional<std::string> EpLayeringMatcher::Match(const ExecutionProviders& pr
316321
device.Type(),
317322
device.Vendor(),
318323
device.Id(),
319-
{}}; // no vendor string available from IExecutionProvider
324+
/*has_device_ordinal=*/true, // IExecutionProvider sets device Id to a runtime ordinal
325+
{}}; // no vendor string available from IExecutionProvider
320326

321327
if (MatchEpDevice(view, target_type_str, target_specifier, rule.device)) {
322328
return std::string(ep.Type());
@@ -386,8 +392,10 @@ Status LayeringIndex::Create(const Graph& graph,
386392
LOGS(logger, VERBOSE) << "Layering Rule " << i << " (" << rule.device << " -> " << rule.annotation
387393
<< ") mapped to EP: " << ep_type;
388394
} else {
389-
LOGS(logger, WARNING) << "Layering Rule " << i << " (" << rule.device << " -> " << rule.annotation
390-
<< ") could not be mapped to any available Execution Provider.";
395+
LOGS(logger, ERROR) << "Layering rule " << i << " (device='" << rule.device << "', annotation='" << rule.annotation
396+
<< "') could not be mapped to any available Execution Provider. "
397+
<< "If a numeric gpu index was specified (e.g. gpu:0), ensure an EP with a matching "
398+
<< "device ordinal is registered and reports device_memory_info.";
391399
}
392400
}
393401

onnxruntime/test/framework/layering_annotations_test.cc

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -361,9 +361,9 @@ TEST(EpLayeringMatcherTest, MatchSpecificGPU_Heuristic) {
361361
TEST(EpLayeringMatcherTest, MatchSpecificGPU_Index) {
362362
LayerAnnotation rule = {"gpu:1", "Anno1", false};
363363

364-
// Case 1: ID Match
364+
// Case 1: ID Match via device_memory_info (runtime ordinal)
365365
{
366-
auto test_ep = CreateHwEp("GPU1", OrtHardwareDeviceType_GPU, 0, 1);
366+
auto test_ep = CreateMemEp("GPU1", OrtDevice::GPU, 1);
367367
OrtEpDevice ep_device = test_ep.Get();
368368
std::vector<const OrtEpDevice*> devices = {&ep_device};
369369

@@ -372,9 +372,19 @@ TEST(EpLayeringMatcherTest, MatchSpecificGPU_Index) {
372372
EXPECT_EQ(*result, "GPU1");
373373
}
374374

375-
// Case 2: ID Mismatch
375+
// Case 2: ID Mismatch via device_memory_info
376376
{
377-
auto test_ep = CreateHwEp("GPU0", OrtHardwareDeviceType_GPU, 0, 0);
377+
auto test_ep = CreateMemEp("GPU0", OrtDevice::GPU, 0);
378+
OrtEpDevice ep_device = test_ep.Get();
379+
std::vector<const OrtEpDevice*> devices = {&ep_device};
380+
auto result = EpLayeringMatcher::Match(devices, rule);
381+
EXPECT_FALSE(result.has_value());
382+
}
383+
384+
// Case 3: HW-only device (no device_memory_info) must NOT match by index.
385+
// OrtHardwareDevice::device_id is a PCI hardware-type ID, not an ordinal.
386+
{
387+
auto test_ep = CreateHwEp("GPU_HW_Only", OrtHardwareDeviceType_GPU, 0, 1);
378388
OrtEpDevice ep_device = test_ep.Get();
379389
std::vector<const OrtEpDevice*> devices = {&ep_device};
380390
auto result = EpLayeringMatcher::Match(devices, rule);
@@ -553,6 +563,25 @@ TEST(EpLayeringMatcherTest, MatchExecutionProviders_GPU_Specific) {
553563
EXPECT_EQ(*result, kCudaExecutionProvider);
554564
}
555565

566+
TEST(EpLayeringMatcherTest, MatchExecutionProviders_GPU_Index) {
567+
LayerAnnotation rule = {"gpu:1", "Anno1", false};
568+
ExecutionProviders providers;
569+
570+
// Add GPU provider with ordinal 0 (should not match gpu:1)
571+
auto gpu0_ep = std::make_shared<MockExecutionProvider>("GPU_EP_0",
572+
OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::NVIDIA, 0));
573+
ASSERT_STATUS_OK(providers.Add("GPU_EP_0", gpu0_ep));
574+
575+
// Add GPU provider with ordinal 1 (should match gpu:1)
576+
auto gpu1_ep = std::make_shared<MockExecutionProvider>("GPU_EP_1",
577+
OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, OrtDevice::VendorIds::NVIDIA, 1));
578+
ASSERT_STATUS_OK(providers.Add("GPU_EP_1", gpu1_ep));
579+
580+
auto result = EpLayeringMatcher::Match(providers, rule);
581+
ASSERT_TRUE(result.has_value());
582+
EXPECT_EQ(*result, "GPU_EP_1");
583+
}
584+
556585
TEST(EpLayeringMatcherTest, MatchExecutionProviders_NoMatch) {
557586
LayerAnnotation rule = {"GPU", "Anno1", false};
558587
ExecutionProviders providers;

0 commit comments

Comments
 (0)