GradientSpaces
diff --git a/‎README.md‎
Lines changed: 10 additions & 4 deletions b/‎README.md‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎config/RPF_base_main_10k.yaml‎
Lines changed: 35 additions & 0 deletions b/‎config/RPF_base_main_10k.yaml‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎config/model/rectified_point_flow.yaml‎
Lines changed: 2 additions & 2 deletions b/‎config/model/rectified_point_flow.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎rectified_point_flow/data/dataset.py‎
Lines changed: 13 additions & 5 deletions b/‎rectified_point_flow/data/dataset.py‎
Lines changed: 13 additions & 5 deletions
diff --git a/‎rectified_point_flow/eval/evaluator.py‎
Lines changed: 8 additions & 8 deletions b/‎rectified_point_flow/eval/evaluator.py‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎rectified_point_flow/flow_model/embedding.py‎
Lines changed: 23 additions & 14 deletions b/‎rectified_point_flow/flow_model/embedding.py‎
Lines changed: 23 additions & 14 deletions
@@ -15,9 +15,15 @@
 
 
 ## 🔔 News
-- [July 15, 2025] Improve training stability and fix bugs.
-- [July 9, 2025] Released training codes.
-- [July 1, 2025] Released model checkpoints and inference codes. 
+- [July 25, 2025] **Version 1.0**: We strongly recommend updating to this version, which includes:
+  - Improved model speed (9-12% faster) and training stability.
+  - Fixed bugs in configs, RK2 sampler, and validation.
+  - Simplified point cloud packing and shaping.
+  - Checkpoints are compatible with the previous version.
+
+- [July 9, 2025] **Version 0.1**: Release training codes.
+
+- [July 1, 2025] Initial release of the model checkpoints and inference codes.
 
 ## Overview
 
@@ -74,7 +80,7 @@ This saves images of the input (unposed) parts and multiple generations for poss
 
 - **Renderer**: We use [Mitsuba](https://mitsuba.readthedocs.io/en/latest/) for high quality ray-traced rendering, as shown above. For a faster rendering, please switch to [PyTorch3D PointsRasterizer](https://pytorch3d.readthedocs.io/en/latest/modules/renderer/points/rasterizer.html#pytorch3d.renderer.points.rasterizer.PointsRasterizer) by adding `visualizer.renderer=pytorch3d`. To disable rendering, use `visualizer.renderer=none`. More rendering options are available in [config/visualizer](config/visualizer/flow.yaml).
 
-- **Sampler**: We support Euler, RK2 (default), and RK4 samplers for inference, set `model.inference_sampler={euler, rk2, rk4}` accordingly.
+- **Sampler**: We support Euler (default), RK2, and RK4 samplers for inference, set `model.inference_sampler={euler, rk2, rk4}` accordingly.
 
 **Overlap Prediction:** To visualize the overlap probabilities predicted by the encoder, please run:
 
 
@@ -0,0 +1,35 @@
+# Training Rectified Point Flow
+
+defaults:
+  - model: rectified_point_flow
+  - data: ikea_partnet_everyday_twobytwo_modelnet_tudl
+  - trainer: main
+  - loggers: wandb
+  - _self_
+
+# Random seed for reproducibility
+seed: 42
+
+# Data root
+data_root: "../dataset"
+data:
+  num_points_to_sample: 10000
+
+# Experiment name and log directory
+experiment_name: RPF_base
+log_dir: ./output/${experiment_name}
+ckpt_path: ${log_dir}/last.ckpt
+hydra:
+  run:
+    dir: ${log_dir}
+
+# Model settings
+model:
+  encoder_ckpt: null
+  flow_model_ckpt: null
+  
+  flow_model:
+    # For 10k points, we replace QK norm by softcapping for speeding up.
+    attn_dtype: "bfloat16"
+    softcap: 50.0
+    qk_norm: False
@@ -19,6 +19,6 @@ lr_scheduler:
   gamma: 0.5
 
 timestep_sampling: "u_shaped"
-inference_sampler: "rk2"
-inference_sampling_steps: 20
+inference_sampler: "euler"
+inference_sampling_steps: 50
 n_generations: 1
@@ -98,8 +98,9 @@ def __getitem__(self, index):
             - rotations (P, 3, 3) float32: Rotation matrices.
             - translations (P, 3) float32: Translation vectors.
             - points_per_part (P) int64: Number of points per part.
-            - scale (P) float32: Scale of the point clouds.
-            - anchor_part (P) bool: Whether the part is an anchor part.
+            - scales (1, ) float32: Scale of the point clouds.
+            - anchor_parts (P) bool: Boolean array indicating anchor parts.
+            - anchor_indices (N, ) bool: Boolean array indicating anchor points.
             - init_rotation (3, 3) float32: Initial rotation matrix of the pointclouds_gt, used for recovering the original data.
 
         Note:
@@ -323,7 +324,6 @@ def _proc_part(i):
         pts_per_part = pad_data(counts, self.max_parts)
         rots = pad_data(np.stack(rots), self.max_parts)
         trans = pad_data(np.stack(trans), self.max_parts)
-        scale = pad_data(np.array([scale] * n_parts), self.max_parts)
 
         # Use the largest part as the anchor part
         anchor = np.zeros(self.max_parts, bool)
@@ -347,6 +347,13 @@ def _proc_part(i):
                 rots[extra_idx] = np.eye(3)
                 trans[extra_idx] = np.zeros(3)
 
+        # Broadcast anchor part to points
+        anchor_indices = np.zeros(self.num_points_to_sample, bool)
+        for i in range(n_parts):
+            if anchor[i]:
+                st, ed = offsets[i], offsets[i + 1]
+                anchor_indices[st:ed] = True
+
         results = {}
         for key in ["index", "name", "overlap_threshold"]:
             results[key] = data[key]
@@ -360,8 +367,9 @@ def _proc_part(i):
         results["rotations"] = rots.astype(np.float32)
         results["translations"] = trans.astype(np.float32)
         results["points_per_part"] = pts_per_part.astype(np.int64)
-        results["scale"] = scale.astype(np.float32)
-        results["anchor_part"] = anchor.astype(bool)
+        results["scales"] = np.array(scale, dtype=np.float32)
+        results["anchor_parts"] = anchor.astype(bool)
+        results["anchor_indices"] = anchor_indices.astype(bool)
         results["init_rotation"] = init_rot.astype(np.float32)
 
         return results
 
@@ -25,14 +25,14 @@ def _compute_metrics(
         pts = data["pointclouds"]                       # (B, N, 3)
         pts_gt = data["pointclouds_gt"]                 # (B, N, 3)
         points_per_part = data["points_per_part"]       # (B, P)
-        anchor_part = data["anchor_part"]               # (B, P)
-        scale = data["scale"][:, 0]                     # (B,)
+        anchor_parts = data["anchor_parts"]             # (B, P)
+        scales = data["scales"]                         # (B,)
 
-        # Rescale to original scale
+        # Rescale to original scales
         B, _, _ = pts_gt.shape
         pointclouds_pred = pointclouds_pred.view(B, -1, 3)
-        pts_gt_rescaled = pts_gt * scale.view(B, 1, 1)
-        pts_pred_rescaled = pointclouds_pred * scale.view(B, 1, 1)
+        pts_gt_rescaled = pts_gt * scales.view(B, 1, 1)
+        pts_pred_rescaled = pointclouds_pred * scales.view(B, 1, 1)
 
         object_cd = compute_object_cd(pts_gt_rescaled, pts_pred_rescaled)
         part_acc, matched_parts = compute_part_acc(pts_gt_rescaled, pts_pred_rescaled, points_per_part)
@@ -43,7 +43,7 @@ def _compute_metrics(
 
         if rotations_pred is not None and translations_pred is not None:
             rot_errors, trans_errors = compute_transform_errors(
-                pts, pts_gt, rotations_pred, translations_pred, points_per_part, anchor_part, matched_parts, scale,
+                pts, pts_gt, rotations_pred, translations_pred, points_per_part, anchor_parts, matched_parts, scales,
             )
             rot_recalls = self._recall_at_thresholds(rot_errors, [5, 10])
             trans_recalls = self._recall_at_thresholds(trans_errors, [0.01, 0.05])
@@ -84,7 +84,7 @@ def _save_single_result(
             "dataset": dataset_name,
             "num_parts": int(data["num_parts"][idx]),
             "generation_idx": generation_idx,
-            "scale": float(data["scale"][idx, 0]),
+            "scales": float(data["scales"][idx]),
         }
         entry.update({k: float(v[idx]) for k, v in metrics.items()})
 
@@ -107,7 +107,7 @@ def run(
         Args:
             data: Input data dictionary, containing:
                 pointclouds_gt (B, N, 3): Ground truth point clouds.
-                scale (B,): Scale factors.
+                scales (B,): scales factors.
                 points_per_part (B, P): Points per part.
                 name (B,): Object names.
                 dataset_name (B,): Dataset names.
 
@@ -117,33 +117,42 @@ def __init__(self, in_dim: int, embed_dim: int, multires: int = 10):
 
     def forward(
         self,
-        x: torch.Tensor,       # (n_points, 3)
-        latent: dict,          # PointTransformer's `Point` instance
-        scale: torch.Tensor,   # (n_valid_parts, )
+        x: torch.Tensor,
+        latent: dict,
+        scales: torch.Tensor,
     ) -> torch.Tensor:
         """Generate PointCloudEmbedding from the input.
         
         Args:
-            x: Input coordinates tensor of shape (n_points, 3).
-            latent: Dictionary containing point cloud features and metadata.
-            scale: Scale factors of shape (n_valid_parts, 1).
+            x (B, N, 3): Noise point coordinates at timestep t.
+            latent: PointTransformer's Point instance of conditional point cloud:
+                - "coord" (n_points, 3): Point coordinates
+                - "normal" (n_points, 3): Point normals
+                - "feat" (n_points, in_dim): Point features  
+            scales (B, ): Scale factor for the point cloud.
             
         Returns:
-            Shape embeddings of shape (n_points, embed_dim).
+            Shape embeddings of shape (B, N, dim).
         """
+        B, N, _ = x.shape
+
         # Coordinate of noise PCs
-        x_pos_emb = self.noise_embedding.embed(x)                   # (n_points, emb_dim)
+        x_pos_emb = self.noise_embedding.embed(x)                    # (B, N, dim)
 
         # Coordinate of condition PCs
-        c_pos_emb = self.coord_embedding.embed(latent["coord"])     # (n_points, emb_dim)
-        
+        coord = latent["coord"].view(B, N, 3)
+        c_pos_emb = self.coord_embedding.embed(coord)                # (B, N, dim)
+
         # Normal of condition PCs
-        normal_emb = self.normal_embedding.embed(latent["normal"])  # (n_points, emb_dim)
+        normal = latent["normal"].view(B, N, 3)
+        normal_emb = self.normal_embedding.embed(normal)             # (B, N, dim)
 
         # Scale of condition PCs
-        scale_emb = self.scale_embedding.embed(scale.unsqueeze(-1)) # (n_valid_parts, emb_dim)
-        scale_emb = scale_emb[latent["batch"]]                      # (n_points, emb_dim)
+        scale_emb = self.scale_embedding.embed(scales.unsqueeze(-1)) # (B, 1, dim)
+        scale_emb = scale_emb.unsqueeze(1).expand(B, N, -1)          # (B, N, dim)
 
         # Concatenate with point features
-        x = torch.cat([latent["feat"], c_pos_emb, x_pos_emb, normal_emb, scale_emb], dim=-1)
+        feat = latent["feat"].view(B, N, -1)                         # (B, N, in_dim)
+        x = torch.cat([feat, c_pos_emb, x_pos_emb, normal_emb, scale_emb], dim=-1)
+        
         return self.emb_proj(x)