InternRobotics
diff --git a/‎README.md‎
Lines changed: 7 additions & 7 deletions b/‎README.md‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎internnav/agent/dialog_agent.py‎
Lines changed: 6 additions & 6 deletions b/‎internnav/agent/dialog_agent.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎internnav/dataset/internvla_n1_lerobot_dataset.py‎
Lines changed: 3 additions & 1 deletion b/‎internnav/dataset/internvla_n1_lerobot_dataset.py‎
Lines changed: 3 additions & 1 deletion
@@ -218,19 +218,19 @@ For example, raising issues, fixing bugs in the framework, and adapting or addin
 
 ## 🚀 Community Deployment & Best Practices
 
-We are excited to see InternNav being deployed and extended by the community across different robots and real-world scenarios.  
+We are excited to see InternNav being deployed and extended by the community across different robots and real-world scenarios.
 Below are selected community-driven deployment guides and solution write-ups, which may serve as practical references for advanced users.
 
-- **IROS Challenge Nav Track: Champion Solution (2025)**  
-  A complete system-level solution and design analysis for Vision-and-Language Navigation in Physical Environments.  
+- **IROS Challenge Nav Track: Champion Solution (2025)**
+  A complete system-level solution and design analysis for Vision-and-Language Navigation in Physical Environments.
   🔗 https://zhuanlan.zhihu.com/p/1969046543286907790
 
-- **Go2 Series Deployment Tutorial (ShanghaiTech University)**  
-  Step-by-step edge deployment guide for InternNav-based perception and navigation.  
+- **Go2 Series Deployment Tutorial (ShanghaiTech University)**
+  Step-by-step edge deployment guide for InternNav-based perception and navigation.
   🔗 https://github.com/cmjang/InternNav-deploy
 
-- **G1 Series Deployment Tutorial (Wuhan University)**  
-  Detailed educational materials on vision-language navigation deployment.  
+- **G1 Series Deployment Tutorial (Wuhan University)**
+  Detailed educational materials on vision-language navigation deployment.
   🔗 [*Chapter 5: Vision-Language Navigation (Part II)*](https://mp.weixin.qq.com/s/p3cJzbRvecMajiTh9mXoAw)
 
 ## 🔗 Citation
 
@@ -16,13 +16,13 @@
 from internnav.configs.agent import AgentCfg
 
 try:
+    from depth_camera_filtering import filter_depth
+    from habitat.tasks.nav.shortest_path_follower import ShortestPathFollower
     from transformers import (
         AutoProcessor,
         AutoTokenizer,
         Qwen2_5_VLForConditionalGeneration,
     )
-    from depth_camera_filtering import filter_depth
-    from habitat.tasks.nav.shortest_path_follower import ShortestPathFollower
 except Exception as e:
     print(f"Warning: ({e}), Habitat Evaluation is not loaded in this runtime. Ignore this if not using Habitat.")
 
@@ -47,12 +47,12 @@ def split_and_clean(text):
 @Agent.register('dialog')
 class DialogAgent(Agent):
     """Vision-language navigation agent that can either move or ask an oracle via dialog. The agent builds a multimodal
-     chat prompt from current/historical RGB observations (and optional look-down frames), runs a Qwen2.5-VL model to 
-     produce either an action sequence, a pixel waypoint, or a dialog query, then converts the model output into 
+     chat prompt from current/historical RGB observations (and optional look-down frames), runs a Qwen2.5-VL model to
+     produce either an action sequence, a pixel waypoint, or a dialog query, then converts the model output into
      simulator actions and (optionally) a world-space navigation goal.
 
     Args:
-        agent_config (AgentCfg): AgentCfg containing model_settings (e.g., task name, sensor config, model path, mode, 
+        agent_config (AgentCfg): AgentCfg containing model_settings (e.g., task name, sensor config, model path, mode,
             resizing, dialog flags, and generation parameters) and runtime info such as local_rank.
     """
 
@@ -440,7 +440,7 @@ def pixel_to_gps(self, pixel, depth, intrinsic, tf_camera_to_episodic):
             pixel (Tuple[int, int] | List[int] | np.ndarray): pixel coordinate in (v, u) indexing as used here.
             depth (np.ndarray): depth image of shape (H, W) in meters, where depth[v, u] returns the metric depth.
             intrinsic (np.ndarray): camera intrinsic matrix.
-            tf_camera_to_episodic (np.ndarray): homogeneous transform of shape (4, 4) mapping camera-frame points to 
+            tf_camera_to_episodic (np.ndarray): homogeneous transform of shape (4, 4) mapping camera-frame points to
                 the episodic frame.
 
         Returns:
 
@@ -17,8 +17,8 @@
 from torchcodec.decoders import VideoDecoder
 from transformers.image_utils import to_numpy_array
 
-from .vlln_lerobot_dataset import VLLNDataset
 from .rope2d import get_rope_index_2, get_rope_index_25
+from .vlln_lerobot_dataset import VLLNDataset
 
 # Define placeholders for dataset paths
 CAMBRIAN_737K = {
@@ -1330,6 +1330,7 @@ def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
 
         return batch
 
+
 class CombinedDataset(Dataset):
     """
     Combine multiple datasets into a single dataset interface.
@@ -1338,6 +1339,7 @@ class CombinedDataset(Dataset):
     It concatenates samples from all provided datasets and optionally shuffles
     the global index mapping (without changing the underlying datasets).
     """
+
     def __init__(self, datasets, shuffle=False):
         super(CombinedDataset, self).__init__()
         self.datasets = datasets