batch fps

KeplerC · KeplerC · commit be1919171c1e · 2025-06-19T01:13:04.000Z
diff --git a/robodm/backend/pyav_backend.py b/robodm/backend/pyav_backend.py
@@ -16,7 +16,7 @@
 import pickle
 import logging
 from fractions import Fraction
-from typing import Any, Dict, List, Tuple, Optional
+from typing import Any, Dict, List, Tuple, Optional, Union
 
 import av
 import numpy as np
@@ -822,17 +822,20 @@ def create_streams_for_batch_data(
         self,
         sample_data: Dict[str, Any],
         codec_config: Any,
-        feature_name_separator: str = "/"
+        feature_name_separator: str = "/",
+        visualization_feature: Optional[str] = None
     ) -> Dict[str, int]:
         """Create optimized streams for batch data processing.
         
         Analyzes sample data to determine optimal codec for each feature
-        and creates streams with target codec directly.
+        and creates streams with target codec directly. Respects visualization_feature
+        ordering to prioritize visualization streams first.
         
         Args:
             sample_data: Sample data dict to analyze feature types
             codec_config: Codec configuration
             feature_name_separator: Separator for nested feature names
+            visualization_feature: Optional feature name to prioritize as first stream for visualization
             
         Returns:
             Dict mapping feature names to stream indices
@@ -846,9 +849,30 @@ def create_streams_for_batch_data(
         # Flatten the sample data
         flattened_data = _flatten_dict(sample_data, sep=feature_name_separator)
         
+        # Sort features to prioritize visualization feature
+        def get_feature_priority(item):
+            feature_name, sample_value = item
+            
+            # Highest priority: specified visualization_feature
+            if visualization_feature and feature_name == visualization_feature:
+                return (0, feature_name)
+            
+            # Second priority: features that will become video-encoded (images/visualizations)
+            feature_type = FeatureType.from_data(sample_value)
+            target_codec = codec_config.get_codec_for_feature(feature_type, feature_name)
+            container_codec = codec_config.get_container_codec(target_codec)
+            if container_codec in {"ffv1", "libaom-av1", "libx264", "libx265"}:
+                return (1, feature_name)
+            
+            # Third priority: everything else
+            return (2, feature_name)
+        
+        # Sort features by priority
+        sorted_features = sorted(flattened_data.items(), key=get_feature_priority)
+        
         feature_to_stream_idx = {}
         
-        for feature_name, sample_value in flattened_data.items():
+        for feature_name, sample_value in sorted_features:
             # Determine feature type from sample
             feature_type = FeatureType.from_data(sample_value)
             
@@ -866,7 +890,7 @@ def create_streams_for_batch_data(
             
             feature_to_stream_idx[feature_name] = stream.index
             
-            logger.debug(f"Created stream for '{feature_name}' with codec '{container_codec}' (target: '{target_codec}')")
+            logger.debug(f"Created stream for '{feature_name}' with codec '{container_codec}' (target: '{target_codec}') at index {stream.index}")
         
         return feature_to_stream_idx
 
@@ -876,7 +900,7 @@ def encode_batch_data_directly(
         feature_to_stream_idx: Dict[str, int],
         codec_config: Any,
         feature_name_separator: str = "/",
-        fps: int = 10
+        fps: Union[int, Dict[str, int]] = 10
     ) -> None:
         """Encode a batch of data directly to target codecs without intermediate transcoding.
         
@@ -885,12 +909,32 @@ def encode_batch_data_directly(
             feature_to_stream_idx: Mapping of feature names to stream indices
             codec_config: Codec configuration
             feature_name_separator: Separator for nested feature names
-            fps: Frames per second for timestamp calculation
+            fps: Frames per second for timestamp calculation. Can be an int (same fps for all features) or Dict[str, int] (per-feature fps)
         """
         from robodm.utils.flatten import _flatten_dict
         
-        time_interval_ms = 1000 / fps
-        current_timestamp = 0
+        # Handle fps parameter - can be int or dict
+        if isinstance(fps, int):
+            # Use same fps for all features
+            default_fps = fps
+            feature_fps = {}
+        else:
+            # Per-feature fps specified
+            feature_fps = fps
+            default_fps = 10  # Fallback default
+        
+        # Initialize per-feature timestamps and time intervals
+        feature_timestamps = {}
+        feature_time_intervals = {}
+        
+        # Get all feature names from first sample to initialize timestamps
+        if data_batch:
+            first_sample = _flatten_dict(data_batch[0], sep=feature_name_separator)
+            for feature_name in first_sample.keys():
+                if feature_name in feature_to_stream_idx:
+                    fps_for_feature = feature_fps.get(feature_name, default_fps)
+                    feature_timestamps[feature_name] = 0
+                    feature_time_intervals[feature_name] = 1000.0 / fps_for_feature
         
         for step_data in data_batch:
             flattened_data = _flatten_dict(step_data, sep=feature_name_separator)
@@ -899,6 +943,9 @@ def encode_batch_data_directly(
                 if feature_name in feature_to_stream_idx:
                     stream_idx = feature_to_stream_idx[feature_name]
                     
+                    # Get current timestamp for this feature
+                    current_timestamp = feature_timestamps.get(feature_name, 0)
+                    
                     # Encode directly to target format
                     packet_infos = self.encode_data_to_packets(
                         data=value,
@@ -911,5 +958,7 @@ def encode_batch_data_directly(
                     # Mux packets immediately
                     for packet_info in packet_infos:
                         self.mux_packet_info(packet_info)
-            
-            current_timestamp += time_interval_ms 
+                    
+                    # Update timestamp for this feature
+                    time_interval = feature_time_intervals.get(feature_name, 1000.0 / default_fps)
+                    feature_timestamps[feature_name] = current_timestamp + time_interval 
diff --git a/robodm/trajectory.py b/robodm/trajectory.py
@@ -783,7 +783,7 @@ def from_list_of_dicts(
         video_codec: str = "auto",
         codec_options: Optional[Dict[str, Any]] = None,
         visualization_feature: Optional[Text] = None,
-        fps: Optional[int] = 10,
+        fps: Optional[Union[int, Dict[str, int]]] = 10,
         raw_codec: Optional[str] = None,
     ) -> "Trajectory":
         """
@@ -795,6 +795,7 @@ def from_list_of_dicts(
             video_codec (str, optional): Video codec to use for video/image features. Defaults to "auto".
             codec_options (Dict[str, Any], optional): Additional codec-specific options.
             visualization_feature: Optional feature name to prioritize as first stream for visualization.
+            fps: Optional fps for features. Can be an int (same fps for all features) or Dict[str, int] (per-feature fps).
             raw_codec (str, optional): Raw codec to use for non-image features. Defaults to None.
 
         Example:
@@ -822,7 +823,8 @@ def from_list_of_dicts(
         feature_to_stream_idx = traj.backend.create_streams_for_batch_data(
             sample_data=sample_data,
             codec_config=traj.codec_config,
-            feature_name_separator=traj.feature_name_separator
+            feature_name_separator=traj.feature_name_separator,
+            visualization_feature=visualization_feature
         )
         
         # Update feature type tracking for consistency
@@ -854,7 +856,7 @@ def from_dict_of_lists(
         video_codec: str = "auto",
         codec_options: Optional[Dict[str, Any]] = None,
         visualization_feature: Optional[Text] = None,
-        fps: Optional[int] = 10,
+        fps: Optional[Union[int, Dict[str, int]]] = 10,
         raw_codec: Optional[str] = None,
     ) -> "Trajectory":
         """
@@ -867,6 +869,7 @@ def from_dict_of_lists(
             video_codec (str, optional): Video codec to use for video/image features. Defaults to "auto".
             codec_options (Dict[str, Any], optional): Additional codec-specific options.
             visualization_feature: Optional feature name to prioritize as first stream for visualization.
+            fps: Optional fps for features. Can be an int (same fps for all features) or Dict[str, int] (per-feature fps).
             raw_codec (str, optional): Raw codec to use for non-image features. Defaults to None.
 
         Returns:
diff --git a/robodm/trajectory_base.py b/robodm/trajectory_base.py
@@ -97,7 +97,7 @@ def from_list_of_dicts(
         video_codec: str = "auto",
         codec_options: Optional[Dict[str, Any]] = None,
         visualization_feature: Optional[Text] = None,
-        fps: Optional[int] = 10,
+        fps: Optional[Union[int, Dict[str, int]]] = 10,
         raw_codec: Optional[str] = None,
     ) -> "TrajectoryInterface":
         """
@@ -124,7 +124,7 @@ def from_dict_of_lists(
         video_codec: str = "auto",
         codec_options: Optional[Dict[str, Any]] = None,
         visualization_feature: Optional[Text] = None,
-        fps: Optional[int] = 10,
+        fps: Optional[Union[int, Dict[str, int]]] = 10,
         raw_codec: Optional[str] = None,
     ) -> "TrajectoryInterface":
         """