apache
diff --git a/‎src/main/python/systemds/scuro/dataloader/image_loader.py‎
Lines changed: 16 additions & 16 deletions b/‎src/main/python/systemds/scuro/dataloader/image_loader.py‎
Lines changed: 16 additions & 16 deletions
diff --git a/‎src/main/python/systemds/scuro/dataloader/json_loader.py‎
Lines changed: 26 additions & 1 deletion b/‎src/main/python/systemds/scuro/dataloader/json_loader.py‎
Lines changed: 26 additions & 1 deletion
diff --git a/‎src/main/python/systemds/scuro/dataloader/video_loader.py‎
Lines changed: 16 additions & 0 deletions b/‎src/main/python/systemds/scuro/dataloader/video_loader.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎src/main/python/systemds/scuro/drsearch/dag_group_executor.py‎
Lines changed: 170 additions & 0 deletions b/‎src/main/python/systemds/scuro/drsearch/dag_group_executor.py‎
Lines changed: 170 additions & 0 deletions
diff --git a/‎src/main/python/systemds/scuro/drsearch/dag_group_scheduler.py‎
Lines changed: 6 additions & 0 deletions b/‎src/main/python/systemds/scuro/drsearch/dag_group_scheduler.py‎
Lines changed: 6 additions & 0 deletions
@@ -82,22 +82,22 @@ def get_stats(self, source_path: str):
 
         for file in self.indices:
             path = os.path.join(source_path, f"{file}{self._ext}")
-            if self.chunk_size is None:
-                self.extract(path)
-                md = self.metadata[path]
-                max_width = max(max_width, md["width"])
-                max_height = max(max_height, md["height"])
-                max_channels = max(max_channels, md["num_channels"])
-                num_instances += 1
-            else:
-                self.file_sanity_check(path)
-                image = cv2.imread(path, cv2.IMREAD_COLOR)
-                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-                height, width, channels = image.shape
-                max_width = max(max_width, width)
-                max_height = max(max_height, height)
-                max_channels = max(max_channels, channels)
-                num_instances += 1
+            # if self.chunk_size is None:
+            #     self.extract(path)
+            #     md = self.metadata[path]
+            #     max_width = max(max_width, md["width"])
+            #     max_height = max(max_height, md["height"])
+            #     max_channels = max(max_channels, md["num_channels"])
+            #     num_instances += 1
+            # else:
+            self.file_sanity_check(path)
+            image = cv2.imread(path, cv2.IMREAD_COLOR)
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            height, width, channels = image.shape
+            max_width = max(max_width, width)
+            max_height = max(max_height, height)
+            max_channels = max(max_channels, channels)
+            num_instances += 1
         return ImageStats(
             max_width,
             max_height,
 
@@ -95,8 +95,33 @@ def get_stats(self, source_path: str):
 
                     text = " ".join(text) if isinstance(text, list) else text
                     num_instances += 1
-                    max_length = max(max_length, len(text))
+                    max_length = max(max_length, len(text))  # number of characters
                     avg_length += len(text)
 
             avg_length /= num_instances
         return JSONStats(num_instances, max_length, avg_length, (max_length,))
+
+    def estimate_peak_memory_bytes(self) -> dict:
+        s = self.stats
+        n = max(1, s.num_instances)
+
+        avg_len = s.avg_length / n if s.avg_length > s.max_length else s.avg_length
+        avg_len = max(1.0, avg_len)
+
+        bytes_per_char = 2
+        str_overhead = 49
+        ptr_size = 8
+        list_header = 56
+        list_overalloc = 1.125
+        metadata_per_instance = 192
+
+        resident_strings = n * (str_overhead + bytes_per_char * avg_len)
+        resident_list = list_header + int(ptr_size * n * list_overalloc)
+        resident_metadata = n * metadata_per_instance
+        resident_total = resident_strings + resident_list + resident_metadata
+
+        parse_factor = 2.0
+        transient_parse = parse_factor * (n * bytes_per_char * avg_len)
+
+        cpu_peak = int(resident_total + transient_parse)
+        return {"cpu_peak_bytes": cpu_peak, "gpu_peak_bytes": 0}
@@ -133,3 +133,19 @@ def get_stats(self, source_path: str):
         return VideoStats(
             fps, max_length, max_width, max_height, max_num_channels, num_instances
         )
+
+    def estimate_peak_memory_bytes(self) -> dict:
+        s = self.stats
+        if self.chunk_size is not None:
+            n = self.chunk_size
+        else:
+            n = s.num_instances
+        return {
+            "cpu_peak_bytes": n
+            * s.output_shape[0]
+            * s.output_shape[1]
+            * s.output_shape[2]
+            * s.output_shape[3]
+            * 4,
+            "gpu_peak_bytes": 0,
+        }
@@ -0,0 +1,170 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+from concurrent.futures import ProcessPoolExecutor, as_completed
+import multiprocessing as mp
+import os
+import pickle
+
+import time
+from typing import Any, Dict, List, Optional
+from systemds.scuro import Modality
+from systemds.scuro.drsearch.representation_dag import (
+    LRUCache,
+    RepresentationDag,
+    group_dags_by_dependencies,
+)
+from systemds.scuro.utils.checkpointing import CheckpointManager
+from systemds.scuro.drsearch.dag_group_scheduler import DAGGroupScheduler
+
+
+def _process_dag_group(
+    dag_group_pickle: bytes,
+    modality_pickle: bytes,
+    tasks_pickle: bytes,
+    modality_id: int,
+    dag_group_idx: int,
+) -> Dict[str, Any]:
+    checkpoint_manager = CheckpointManager(
+        checkpoint_dir=os.getcwd(),
+        prefix=f"unimodal_checkpoint_group_{modality_id}_{dag_group_idx}_",
+        checkpoint_every=1,
+        resume=False,
+    )
+    results = []
+
+    dag_group = pickle.loads(dag_group_pickle)
+    modality = pickle.loads(modality_pickle)
+    tasks = pickle.loads(tasks_pickle)
+
+    group_cache = LRUCache(max_size=6)
+
+    for i, dag in enumerate(dag_group):
+        representation = dag.execute([modality], external_cache=group_cache)
+
+        for task in tasks:
+            start = time.perf_counter()
+            scores = task.run(representation.data)
+            end = time.perf_counter()
+
+            results.append(
+                {
+                    "scores": scores,
+                    "transform_time": representation.transform_time,
+                    "task_name": task.model.name,
+                    "task_time": end - start,
+                    "dag": dag,
+                    "modality_id": modality_id,
+                }
+            )
+
+            checkpoint_manager.increment(modality_id, 1, dag_group_idx=dag_group_idx)
+            checkpoint_manager.checkpoint_if_due(results)
+
+    return {"results": results}
+
+
+class DAGGroupExecutor:
+    def __init__(
+        self,
+        dags: List[RepresentationDag],
+        modalities: List[Modality],
+        tasks: List[Any],
+        checkpoint_manager: Optional[CheckpointManager] = None,
+        max_workers: Optional[int] = None,
+    ):
+        self.dags = dags
+        self.dag_groups = group_dags_by_dependencies(dags)
+        self.modalities = modalities
+        self.tasks = tasks
+        self.max_workers = max_workers or mp.cpu_count()
+        self.checkpoint_manager = checkpoint_manager
+        self.scheduler = DAGGroupScheduler(
+            dag_groups=self.dag_groups, modality=modalities[0]
+        )
+
+    def run(self):
+        results = []
+        ctx = mp.get_context("spawn")
+        max_workers = min(len(self.dag_groups), self.max_workers)
+
+        modality_pickle = pickle.dumps(
+            self.modalities[0]
+        )  # TODO: handle multiple modalities
+        tasks_pickle = pickle.dumps(self.tasks)
+
+        pending_dag_groups = set(range(len(self.dag_groups)))
+        running_dag_groups = {}
+        all_groups_succeeded = True
+        with ProcessPoolExecutor(max_workers=max_workers, mp_context=ctx) as executor:
+            while pending_dag_groups or running_dag_groups:
+                pending_resources = [
+                    (
+                        i,
+                        self.scheduler.group_resources[i][0],
+                        self.scheduler.group_resources[i][1],
+                    )
+                    for i in pending_dag_groups
+                ]
+                ready_to_execute = self.scheduler.get_runnable(
+                    pending_resources, max_concurrent=max_workers
+                )
+                for group_id, gpu_id in ready_to_execute:
+                    pending_dag_groups.remove(group_id)
+                    dag_group = self.dag_groups[group_id]
+                    cpu_mem, gpu_mem = self.scheduler.group_resources[group_id]
+
+                    future = executor.submit(
+                        _process_dag_group,
+                        pickle.dumps(dag_group),
+                        modality_pickle,
+                        tasks_pickle,
+                        self.modalities[0].modality_id,
+                        group_id,
+                    )
+                    running_dag_groups[future] = (group_id, cpu_mem, gpu_mem, gpu_id)
+                if not running_dag_groups:
+                    break
+                done = next(as_completed(running_dag_groups), None)
+                if done is None:
+                    break
+                group_id, cpu_mem, gpu_mem, gpu_id = running_dag_groups.pop(done)
+                self.scheduler.release(cpu_mem, gpu_mem, gpu_id)
+
+            try:
+                result_dict = future.result()
+
+                for result_entry in result_dict["results"]:
+                    results.append(
+                        {
+                            "scores": result_entry["scores"],
+                            "transform_time": result_entry["transform_time"],
+                            "task_name": result_entry["task_name"],
+                            "task_time": result_entry["task_time"],
+                            "dag": result_entry["dag"],
+                            "modality_id": self.modalities[0].modality_id,
+                        }
+                    )
+            except Exception as e:
+                all_groups_succeeded = False
+                print(
+                    f"Error processing DAG group {group_id} for modality {self.modalities[0].modality_id}: {e}"
+                )
+        return results, all_groups_succeeded
@@ -50,6 +50,8 @@ def __init__(
         gpu_margin: float = 0.8,
         shared_state: Optional[Dict[str, Any]] = None,
         lock=None,
+        dag_groups: List[List[RepresentationDag]] = None,
+        modality: Modality = None,
     ):
         self._margin = (cpu_margin, gpu_margin)
         self._n_gpu = (
@@ -65,6 +67,10 @@ def __init__(
         else:
             self._shared.setdefault("cpu_in_use", 0.0)
             self._shared.setdefault("gpu_in_use", {})
+        self.group_resources = []
+        for dag_group in dag_groups:
+            cpu_mem, gpu_mem = get_peak_memory_from_dag_group(dag_group, modality)
+            self.group_resources.append((cpu_mem, gpu_mem))
 
     def _avail_cpu(self) -> float:
         available_memory = (psutil.virtual_memory().available) if psutil else 4096.0