|
16 | 16 | from packaging.version import InvalidVersion, parse |
17 | 17 |
|
18 | 18 | from devito.logger import warning |
19 | | -from devito.tools import all_equal, as_tuple, memoized_func |
| 19 | +from devito.tools import all_equal, as_tuple, frozendict, memoized_func |
20 | 20 |
|
21 | 21 | __all__ = [ # noqa: RUF022 |
22 | 22 | 'platform_registry', 'get_cpu_info', 'get_gpu_info', 'get_visible_devices', |
@@ -493,37 +493,31 @@ def parse_product_arch(): |
493 | 493 | return None |
494 | 494 |
|
495 | 495 |
|
496 | | -def _resolve_uuids_to_indices(uuids): |
| 496 | +@memoized_func |
| 497 | +def _get_uuid_to_index_map(): |
497 | 498 | """ |
498 | | - Map GPU UUID/unique-ID strings to integer device indices. |
| 499 | + Build a frozen mapping from GPU UUID/unique-ID strings to integer device indices. |
499 | 500 | """ |
500 | 501 | # (command, pattern) where group(1)=index, group(2)=uuid |
501 | 502 | # nvidia-smi -L output: "GPU 0: <name> (UUID: GPU-xxxx-...)" |
502 | 503 | # rocm-smi --showuniqueid output: "GPU[0] : Unique ID: 0x<hex>" |
503 | 504 | queries = [ |
504 | | - (['nvidia-smi', '-L'], r'GPU\s+(\d+):.*\(UUID:\s*([\w-]+)\)'), |
| 505 | + (['nvidia-smi', '-L'], r'GPU\s+(\d+):.*\(UUID:\s*([\w-]+)\)'), |
505 | 506 | (['rocm-smi', '--showuniqueid'], r'GPU\[(\d+)\].*Unique ID:\s*([\w]+)'), |
506 | 507 | ] |
| 508 | + mapper = {} |
507 | 509 | for cmd, pattern in queries: |
508 | 510 | try: |
509 | 511 | proc = Popen(cmd, stdout=PIPE, stderr=DEVNULL) |
510 | 512 | raw = proc.stdout.read().decode() |
511 | 513 | except OSError: |
512 | | - # Command not available |
513 | 514 | continue |
514 | 515 |
|
515 | | - uuid_to_index = {m.group(2): int(m.group(1)) |
516 | | - for line in raw.splitlines() |
517 | | - if (m := re.match(pattern, line))} |
518 | | - if not uuid_to_index: |
519 | | - continue |
| 516 | + for line in raw.splitlines(): |
| 517 | + if m := re.match(pattern, line): |
| 518 | + mapper[m.group(2)] = int(m.group(1)) |
520 | 519 |
|
521 | | - try: |
522 | | - return tuple(uuid_to_index[u] for u in uuids) |
523 | | - except KeyError: |
524 | | - continue |
525 | | - |
526 | | - return None |
| 520 | + return frozendict(mapper) |
527 | 521 |
|
528 | 522 |
|
529 | 523 | def get_visible_devices(): |
@@ -556,13 +550,17 @@ def get_visible_devices(): |
556 | 550 | return v, ids |
557 | 551 |
|
558 | 552 | # Try UUID → device index resolution |
559 | | - ids = _resolve_uuids_to_indices(entries) |
560 | | - if ids is not None: |
| 553 | + mapper = _get_uuid_to_index_map() |
| 554 | + try: |
| 555 | + ids = tuple(mapper[u] for u in entries) |
561 | 556 | return v, ids |
| 557 | + except KeyError: |
| 558 | + pass |
562 | 559 |
|
563 | | - raise RuntimeError( |
564 | | - f"Cannot resolve device specifiers in {v}={os.environ[v]!r}." |
565 | | - ) |
| 560 | + warning("Unresolvable visible devices environment variables encountered:" |
| 561 | + f" {v}={os.environ[v]} ignored.") |
| 562 | + |
| 563 | + return None, None |
566 | 564 |
|
567 | 565 | return None, None |
568 | 566 |
|
|
0 commit comments