apache
diff --git a/‎src/main/python/systemds/scuro/dataloader/audio_loader.py‎
Lines changed: 2 additions & 1 deletion b/‎src/main/python/systemds/scuro/dataloader/audio_loader.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/main/python/systemds/scuro/dataloader/timeseries_loader.py‎
Lines changed: 2 additions & 0 deletions b/‎src/main/python/systemds/scuro/dataloader/timeseries_loader.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/main/python/systemds/scuro/dataloader/video_loader.py‎
Lines changed: 2 additions & 2 deletions b/‎src/main/python/systemds/scuro/dataloader/video_loader.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/main/python/systemds/scuro/drsearch/node_executor.py‎
Lines changed: 4 additions & 1 deletion b/‎src/main/python/systemds/scuro/drsearch/node_executor.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/main/python/systemds/scuro/drsearch/node_scheduler.py‎
Lines changed: 7 additions & 0 deletions b/‎src/main/python/systemds/scuro/drsearch/node_scheduler.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/main/python/systemds/scuro/modality/transformed.py‎
Lines changed: 6 additions & 0 deletions b/‎src/main/python/systemds/scuro/modality/transformed.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/main/python/systemds/scuro/representations/concatenation.py‎
Lines changed: 7 additions & 0 deletions b/‎src/main/python/systemds/scuro/representations/concatenation.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/main/python/systemds/scuro/representations/covarep_audio_features.py‎
Lines changed: 28 additions & 22 deletions b/‎src/main/python/systemds/scuro/representations/covarep_audio_features.py‎
Lines changed: 28 additions & 22 deletions
diff --git a/‎src/main/python/systemds/scuro/representations/hadamard.py‎
Lines changed: 7 additions & 0 deletions b/‎src/main/python/systemds/scuro/representations/hadamard.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/main/python/systemds/scuro/representations/spectrogram.py‎
Lines changed: 45 additions & 0 deletions b/‎src/main/python/systemds/scuro/representations/spectrogram.py‎
Lines changed: 45 additions & 0 deletions
@@ -34,6 +34,7 @@ class AudioStats:
     max_length: int
     avg_length: float
     num_instances: int
+    output_shape_is_known: bool
 
     @property
     def output_shape(self):
@@ -91,4 +92,4 @@ def get_stats(self, source_path: str):
             max_length = max(max_length, audio.shape[0])
             avg_length += audio.shape[0]
         avg_length /= num_instances
-        return AudioStats(sampling_rate, max_length, avg_length, num_instances)
+        return AudioStats(sampling_rate, max_length, avg_length, num_instances, True)
@@ -34,6 +34,8 @@ class TimeseriesStats:
     max_length: int
     num_instances: int
     num_signals: int
+    output_shape: tuple
+    output_shape_is_known: bool
 
 
 class TimeseriesLoader(BaseLoader):
 
@@ -35,7 +35,7 @@ class VideoStats:
     max_length: int
     max_width: int
     max_height: int
-    max_num_channels: int
+    max_channels: int
     num_instances: int
 
     @property
@@ -48,7 +48,7 @@ def output_shape(self):
         the per-instance tensor shape. For videos we approximate this as
         (max_length, max_height, max_width, max_num_channels).
         """
-        return (self.max_length, self.max_height, self.max_width, self.max_num_channels)
+        return (self.max_length, self.max_height, self.max_width, self.max_channels)
 
 
 class VideoLoader(BaseLoader):
 
@@ -47,7 +47,6 @@
 from systemds.scuro.representations.representation import RepresentationStats
 from systemds.scuro.representations.unimodal import UnimodalRepresentation
 from systemds.scuro.utils.checkpointing import CheckpointManager
-import sys
 import threading
 import time
 import psutil
@@ -371,6 +370,10 @@ def submit_new_ready_nodes():
                     try:
                         result = future.result()
                     except Exception as e:
+                        err_cls = type(e)
+                        err_mod = err_cls.__module__
+                        if err_mod.startswith("torch"):
+                            torch.cuda.empty_cache()
                         print(f"Error executing node {node_id}: {e}")
                         self.scheduler.add_failed_node(node_id)
                         continue
 
@@ -81,6 +81,7 @@ def __init__(
                 for info in self.gpu_memory_info
             },
         }
+        self._initialized = False
 
     def update_cpu_memory_in_use(self, delta_bytes: int):
         self.memory_stats["cpu_in_use"] += delta_bytes
@@ -145,8 +146,13 @@ def complete_node(self, node_id: str):
                 self.remaining_children[parent_id] -= 1
 
     def is_finished(self) -> bool:
+        if not self._initialized:
+            self._initialized = True
+            return False
+
         if self._is_deadlock():
             self.deadlock = True
+            self.success = False
             return True
 
         if self._is_success():
@@ -258,6 +264,7 @@ def _check_memory_constraints(self, node_id: str) -> bool:
         if cpu_mem > self.memory_budget["cpu"] - self.memory_stats["cpu_in_use"]:
             if cpu_mem > self.memory_budget["cpu"]:
                 self.blocked_memory_nodes_perm.append(node_id)
+                self.topo_order.remove(node_id)
             return False, None
 
         if gpu_mem > 0.0 and self.n_gpu > 0:
 
@@ -70,6 +70,12 @@ def __init__(
                 if "attention_masks" in v:
                     del self.metadata[k]["attention_masks"]
 
+    def copy_from_instance(self):
+        """
+        Create a copy of the transformed modality instance
+        """
+        return type(self)(self, None, self.modality_type)
+
     def calculate_memory_usage(self):
         data_bytes = 0
         for instance in self.data:
 
@@ -96,3 +96,10 @@ def get_output_stats(self, input_stats_list) -> RepresentationStats:
             output_shape = stats_list[0].output_shape
 
         return RepresentationStats(num_instances, output_shape)
+
+    def estimate_peak_memory_bytes(self, input_stats) -> dict:
+        # TODO
+        return {
+            "cpu_peak_bytes": 0,
+            "gpu_peak_bytes": 0,
+        }
@@ -78,13 +78,6 @@ def transform(self, modality, aggregation=None):
         return transformed_modality
 
     def get_output_stats(self, input_stats) -> RepresentationStats:
-        """
-        Estimate output shape of Spectral features.
-
-        We compute 4 spectral feature sequences (centroid, bandwidth,
-        rolloff, flatness), each over frames of length ``hop_length``.
-        The resulting tensors have shape (num_frames, 4).
-        """
         num_instances = getattr(input_stats, "num_instances", 0)
 
         # Try to infer signal length from stats
@@ -103,6 +96,13 @@ def get_output_stats(self, input_stats) -> RepresentationStats:
 
         return RepresentationStats(num_instances, (num_frames, 4))
 
+    def estimate_peak_memory_bytes(self, input_stats) -> dict:
+        # TODO
+        return {
+            "cpu_peak_bytes": 0,
+            "gpu_peak_bytes": 0,
+        }
+
 
 @register_representation(ModalityType.AUDIO)
 class ZeroCrossing(UnimodalRepresentation):
@@ -130,13 +130,6 @@ def transform(self, modality, aggregation=None):
         return transformed_modality
 
     def get_output_stats(self, input_stats) -> RepresentationStats:
-        """
-        Estimate output shape of ZeroCrossing features.
-
-        ``librosa.feature.zero_crossing_rate`` returns an array of shape
-        (1, num_frames), so each instance is treated as a sequence of
-        scalar features over frames.
-        """
         num_instances = getattr(input_stats, "num_instances", 0)
 
         if hasattr(input_stats, "max_length"):
@@ -152,9 +145,15 @@ def get_output_stats(self, input_stats) -> RepresentationStats:
             num_frames = 1 + max(int((signal_length - 1) // self.hop_length), 0)
             num_frames = max(int(num_frames), 1)
 
-        # shape (num_frames, 1): one scalar feature per frame
         return RepresentationStats(num_instances, (num_frames, 1))
 
+    def estimate_peak_memory_bytes(self, input_stats) -> dict:
+        # TODO
+        return {
+            "cpu_peak_bytes": 0,
+            "gpu_peak_bytes": 0,
+        }
+
 
 @register_representation(ModalityType.AUDIO)
 class RMSE(UnimodalRepresentation):
@@ -183,12 +182,6 @@ def transform(self, modality, aggregation=None):
         return transformed_modality
 
     def get_output_stats(self, input_stats) -> RepresentationStats:
-        """
-        Estimate output shape of RMSE features.
-
-        ``librosa.feature.rms`` returns an array of shape (1, num_frames),
-        so each instance is a sequence of scalar RMS values over frames.
-        """
         num_instances = getattr(input_stats, "num_instances", 0)
 
         if hasattr(input_stats, "max_length"):
@@ -201,12 +194,18 @@ def get_output_stats(self, input_stats) -> RepresentationStats:
         if signal_length <= 0:
             num_frames = 1
         else:
-            # librosa.rms uses frame_length and hop_length; approximate
             num_frames = 1 + max(int((signal_length - 1) // self.hop_length), 0)
             num_frames = max(int(num_frames), 1)
 
         return RepresentationStats(num_instances, (num_frames, 1))
 
+    def estimate_peak_memory_bytes(self, input_stats) -> dict:
+        # TODO
+        return {
+            "cpu_peak_bytes": 0,
+            "gpu_peak_bytes": 0,
+        }
+
 
 @register_representation(ModalityType.AUDIO)
 class Pitch(UnimodalRepresentation):
@@ -253,3 +252,10 @@ def get_output_stats(self, input_stats) -> RepresentationStats:
             num_frames = max(int(num_frames), 1)
 
         return RepresentationStats(num_instances, (num_frames, 1))
+
+    def estimate_peak_memory_bytes(self, input_stats) -> dict:
+        # TODO
+        return {
+            "cpu_peak_bytes": 0,
+            "gpu_peak_bytes": 0,
+        }
@@ -63,3 +63,10 @@ def num_elements(stats: RepresentationStats) -> int:
 
         largest = max(stats_list, key=num_elements)
         return RepresentationStats(largest.num_instances, largest.output_shape)
+
+    def estimate_peak_memory_bytes(self, input_stats) -> dict:
+        # TODO
+        return {
+            "cpu_peak_bytes": 0,
+            "gpu_peak_bytes": 0,
+        }
@@ -30,6 +30,11 @@
     register_representation,
     register_context_representation_operator,
 )
+from systemds.scuro.utils.static_variables import (
+    NP_ARRAY_HEADER_BYTES,
+    PY_LIST_HEADER_BYTES,
+    PY_LIST_SLOT_BYTES,
+)
 
 
 @register_representation(ModalityType.AUDIO)
@@ -60,6 +65,46 @@ def compute_feature(self, instance):
         )
         return librosa.amplitude_to_db(np.abs(spectrogram)).T
 
+    def estimate_peak_memory_bytes(self, input_stats) -> dict:
+        # TODO: validate this function
+        n = int(input_stats.num_instances)
+        output_shape = input_stats.output_shape
+
+        signal_length = output_shape[0]
+        if signal_length < self.n_fft:
+            num_frames = 1
+        else:
+            num_frames = 1 + (signal_length - self.n_fft) // self.hop_length
+        num_frames = max(int(num_frames), 1)
+        num_freq_bins = 1 + self.n_fft // 2
+
+        output_payload_bytes = (
+            num_frames * num_freq_bins * np.dtype(np.float32).itemsize
+        )
+        per_instance_retained = (
+            output_payload_bytes + NP_ARRAY_HEADER_BYTES + PY_LIST_SLOT_BYTES
+        )
+        retained_output_bytes = PY_LIST_HEADER_BYTES + n * per_instance_retained
+        input_copy_bytes = max(signal_length, 1) * np.dtype(np.float32).itemsize
+        stft_bytes = num_frames * num_freq_bins * np.dtype(np.complex64).itemsize
+
+        magnitude_bytes = num_frames * num_freq_bins * np.dtype(np.float32).itemsize
+        db_bytes = output_payload_bytes
+        fft_workspace_bytes = max(2 * 1024 * 1024, stft_bytes // 2)
+        transient_one_instance_bytes = (
+            input_copy_bytes
+            + stft_bytes
+            + magnitude_bytes
+            + db_bytes
+            + fft_workspace_bytes
+        )
+        cpu_peak = retained_output_bytes + transient_one_instance_bytes
+
+        return {
+            "cpu_peak_bytes": int(cpu_peak),
+            "gpu_peak_bytes": 0,
+        }
+
     def get_output_stats(self, input_stats) -> RepresentationStats:
         num_instances = getattr(input_stats, "num_instances", 0)