implement direct ml (#211)

zbear0808 · web-flow · commit de321c50287f · 2025-05-19T22:32:34.000-04:00
* add directml gpu support

* fix bug that doesn't allow models to be loaded on gpu

* fix lock and import
diff --git a/README.md b/README.md
@@ -490,6 +490,10 @@ or
 ```sh
 poetry install --extras "gpu"
 ```
+or
+```sh
+poetry install --extras "dml"
+```
 
 ### Running the Command-Line Interface Locally
 
diff --git a/audio_separator/separator/architectures/mdxc_separator.py b/audio_separator/separator/architectures/mdxc_separator.py
@@ -105,7 +105,10 @@ def load_model(self):
             else:
                 self.logger.debug("Loading TFC_TDF_net model...")
                 self.model_run = TFC_TDF_net(self.model_data_cfgdict, device=self.torch_device)
-                self.model_run.load_state_dict(torch.load(self.model_path, map_location=self.torch_device))
+                self.logger.debug("Loading model onto cpu")
+                # For some reason loading the state onto a hardware accelerated devices causes issues, 
+                # so we load it onto CPU first then move it to the device
+                self.model_run.load_state_dict(torch.load(self.model_path, map_location="cpu"))
                 self.model_run.to(self.torch_device).eval()
 
         except RuntimeError as e:
diff --git a/audio_separator/separator/architectures/vr_separator.py b/audio_separator/separator/architectures/vr_separator.py
@@ -144,7 +144,7 @@ def separate(self, audio_file_path, custom_output_names=None):
             self.logger.debug("Determining model capacity...")
             self.model_run = nets.determine_model_capacity(self.model_params.param["bins"] * 2, nn_arch_size)
 
-        self.model_run.load_state_dict(torch.load(self.model_path, map_location=self.torch_device_cpu))
+        self.model_run.load_state_dict(torch.load(self.model_path, map_location="cpu"))
         self.model_run.to(self.torch_device)
         self.logger.debug("Model loaded and moved to device.")
 
diff --git a/audio_separator/separator/separator.py b/audio_separator/separator/separator.py
@@ -93,6 +93,7 @@ def __init__(
         sample_rate=44100,
         use_soundfile=False,
         use_autocast=False,
+        use_directml=False,
         mdx_params={"hop_length": 1024, "segment_size": 256, "overlap": 0.25, "batch_size": 1, "enable_denoise": False},
         vr_params={"batch_size": 1, "window_size": 512, "aggression": 5, "enable_tta": False, "enable_post_process": False, "post_process_threshold": 0.2, "high_end_process": False},
         demucs_params={"segment_size": "Default", "shifts": 2, "overlap": 0.25, "segments_enabled": True},
@@ -179,6 +180,7 @@ def __init__(
 
         self.use_soundfile = use_soundfile
         self.use_autocast = use_autocast
+        self.use_directml = use_directml
 
         # These are parameters which users may want to configure so we expose them to the top-level Separator class,
         # even though they are specific to a single model architecture
@@ -246,20 +248,24 @@ def log_onnxruntime_packages(self):
         onnxruntime_gpu_package = self.get_package_distribution("onnxruntime-gpu")
         onnxruntime_silicon_package = self.get_package_distribution("onnxruntime-silicon")
         onnxruntime_cpu_package = self.get_package_distribution("onnxruntime")
+        onnxruntime_dml_package = self.get_package_distribution("onnxruntime-directml")
 
         if onnxruntime_gpu_package is not None:
             self.logger.info(f"ONNX Runtime GPU package installed with version: {onnxruntime_gpu_package.version}")
         if onnxruntime_silicon_package is not None:
             self.logger.info(f"ONNX Runtime Silicon package installed with version: {onnxruntime_silicon_package.version}")
         if onnxruntime_cpu_package is not None:
             self.logger.info(f"ONNX Runtime CPU package installed with version: {onnxruntime_cpu_package.version}")
+        if onnxruntime_dml_package is not None:
+            self.logger.info(f"ONNX Runtime DirectML package installed with version: {onnxruntime_dml_package.version}")
 
     def setup_torch_device(self, system_info):
         """
         This method sets up the PyTorch and/or ONNX Runtime inferencing device, using GPU hardware acceleration if available.
         """
         hardware_acceleration_enabled = False
         ort_providers = ort.get_available_providers()
+        has_torch_dml_installed = self.get_package_distribution("torch_directml")
 
         self.torch_device_cpu = torch.device("cpu")
 
@@ -269,6 +275,11 @@ def setup_torch_device(self, system_info):
         elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available() and system_info.processor == "arm":
             self.configure_mps(ort_providers)
             hardware_acceleration_enabled = True
+        elif self.use_directml and has_torch_dml_installed:
+            import torch_directml
+            if torch_directml.is_available():
+                self.configure_dml(ort_providers)
+                hardware_acceleration_enabled = True
 
         if not hardware_acceleration_enabled:
             self.logger.info("No hardware acceleration could be configured, running in CPU mode")
@@ -302,6 +313,21 @@ def configure_mps(self, ort_providers):
         else:
             self.logger.warning("CoreMLExecutionProvider not available in ONNXruntime, so acceleration will NOT be enabled")
 
+    def configure_dml(self, ort_providers):
+        """
+        This method configures the DirectML device for PyTorch and ONNX Runtime, if available.
+        """
+        import torch_directml
+        self.logger.info("DirectML is available in Torch, setting Torch device to DirectML")
+        self.torch_device_dml = torch_directml.device() 
+        self.torch_device = self.torch_device_dml
+
+        if "DmlExecutionProvider" in ort_providers:
+            self.logger.info("ONNXruntime has DmlExecutionProvider available, enabling acceleration")
+            self.onnx_execution_provider = ["DmlExecutionProvider"]
+        else:
+            self.logger.warning("DmlExecutionProvider not available in ONNXruntime, so acceleration will NOT be enabled")
+
     def get_package_distribution(self, package_name):
         """
         This method returns the package distribution for a given package name if installed, or None otherwise.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -37,13 +37,15 @@ librosa = ">=0.10"
 samplerate = "0.1.0"
 six = ">=1.16"
 torch = ">=2.3"
+torch_directml = {version = "*", optional = true}
 tqdm = "*"
 pydub = ">=0.25"
 audioop-lts = { version = ">=0.2.1", python = "^3.13" }
 onnx-weekly = { version = "*" }
 onnx2torch-py313 = ">=1.6"
 onnxruntime = { version = ">=1.17", optional = true }
 onnxruntime-gpu = { version = ">=1.17", optional = true }
+onnxruntime-directml = { version = ">=1.17", optional = true } # haven't tested different versions, but gonna assume 1.17, the same as others
 julius = ">=0.2"
 diffq-fixed = { version = ">=0.2", platform = "win32" }
 diffq = { version = ">=0.2", platform = "!=win32" }
@@ -58,6 +60,7 @@ scipy = "^1.13.0"
 [tool.poetry.extras]
 cpu = ["onnxruntime"]
 gpu = ["onnxruntime-gpu"]
+dml = ["onnxruntime-directml", "torch_directml"]
 
 [tool.poetry.scripts]
 audio-separator = 'audio_separator.utils.cli:main'