1+ # Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
2+ #
3+ # Licensed under the Apache License, Version 2.0 (the "License");
4+ # you may not use this file except in compliance with the License.
5+ # You may obtain a copy of the License at
6+ #
7+ # http://www.apache.org/licenses/LICENSE-2.0
8+ #
9+ # Unless required by applicable law or agreed to in writing, software
10+ # distributed under the License is distributed on an "AS IS" BASIS,
11+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ # See the License for the specific language governing permissions and
13+ # limitations under the License.
14+
15+ """Helper utils for processing meshroom data into the nerfstudio format."""
16+
17+ import json
18+ import math
19+ import numpy as np
20+ from pathlib import Path
21+ from typing import Dict , List , Optional
22+ from copy import deepcopy as dc
23+
24+ from nerfstudio .process_data .process_data_utils import CAMERA_MODELS
25+ from nerfstudio .utils .rich_utils import CONSOLE
26+
27+ # Rotation matrix to adjust coordinate system
28+ ROT_MAT = np .array ([[1 , 0 , 0 , 0 ],
29+ [0 , 0 , 1 , 0 ],
30+ [0 ,- 1 , 0 , 0 ],
31+ [0 , 0 , 0 , 1 ]])
32+
33+ def reflect (axis , size = 4 ):
34+ """Create a reflection matrix along the specified axis."""
35+ _diag = np .ones (size )
36+ _diag [axis ] = - 1
37+ refl = np .diag (_diag )
38+ return refl
39+
40+ def Mat2Nerf (mat ):
41+ """Convert a matrix to NeRF coordinate system."""
42+ M = np .array (mat )
43+ M = ((M @ reflect (2 )) @ reflect (1 ))
44+ return M
45+
46+ def closest_point_2_lines (oa , da , ob , db ):
47+ """Find the point closest to both rays of form o+t*d."""
48+ da = da / np .linalg .norm (da )
49+ db = db / np .linalg .norm (db )
50+ c = np .cross (da , db )
51+ denom = np .linalg .norm (c )** 2
52+ t = ob - oa
53+ ta = np .linalg .det ([t , db , c ]) / (denom + 1e-10 )
54+ tb = np .linalg .det ([t , da , c ]) / (denom + 1e-10 )
55+ if ta > 0 :
56+ ta = 0
57+ if tb > 0 :
58+ tb = 0
59+ return (oa + ta * da + ob + tb * db ) * 0.5 , denom
60+
61+ def central_point (out ):
62+ """Find a central point all cameras are looking at."""
63+ CONSOLE .print ("Computing center of attention..." )
64+ totw = 0.0
65+ totp = np .array ([0.0 , 0.0 , 0.0 ])
66+ for f in out ["frames" ]:
67+ mf = np .array (f ["transform_matrix" ])[0 :3 ,:]
68+ for g in out ["frames" ]:
69+ mg = np .array (g ["transform_matrix" ])[0 :3 ,:]
70+ p , w = closest_point_2_lines (mf [:,3 ], mf [:,2 ], mg [:,3 ], mg [:,2 ])
71+ if w > 0.01 :
72+ totp += p * w
73+ totw += w
74+
75+ if len (out ["frames" ]) == 0 :
76+ CONSOLE .print ("[bold red]No frames found when computing center of attention[/bold red]" )
77+ return totp
78+
79+ if (totw == 0 ) and (not totp .any ()):
80+ CONSOLE .print ("[bold red]Center of attention is zero[/bold red]" )
81+ return totp
82+
83+ totp /= totw
84+ CONSOLE .print (f"The center of attention is: { totp } " )
85+
86+ return totp
87+
88+ def build_sensor (intrinsic ):
89+ """Build camera intrinsics from Meshroom data."""
90+ out = {}
91+ out ["w" ] = float (intrinsic ['width' ])
92+ out ["h" ] = float (intrinsic ['height' ])
93+
94+ # Focal length in mm
95+ focal = float (intrinsic ['focalLength' ])
96+
97+ # Sensor width in mm
98+ sensor_width = float (intrinsic ['sensorWidth' ])
99+ sensor_height = float (intrinsic ['sensorHeight' ])
100+
101+ # Focal length in pixels
102+ out ["fl_x" ] = (out ["w" ] * focal ) / sensor_width
103+
104+ # Check W/H ratio to sensor ratio
105+ if np .isclose ((out ["w" ] / out ["h" ]), (sensor_width / sensor_height )):
106+ out ["fl_y" ] = (out ["h" ] * focal ) / sensor_height
107+ else :
108+ CONSOLE .print ("[yellow]WARNING: W/H ratio does not match sensor ratio, this is likely a bug from Meshroom. Will use fl_x to set fl_y.[/yellow]" )
109+ out ["fl_y" ] = out ["fl_x" ]
110+
111+ camera_angle_x = math .atan (out ["w" ] / (out ['fl_x' ]) * 2 ) * 2
112+ camera_angle_y = math .atan (out ["h" ] / (out ['fl_y' ]) * 2 ) * 2
113+
114+ out ["camera_angle_x" ] = camera_angle_x
115+ out ["camera_angle_y" ] = camera_angle_y
116+
117+ out ["cx" ] = float (intrinsic ['principalPoint' ][0 ]) + (out ["w" ] / 2.0 )
118+ out ["cy" ] = float (intrinsic ['principalPoint' ][1 ]) + (out ["h" ] / 2.0 )
119+
120+ if intrinsic ['type' ] == 'radial3' :
121+ for i , coef in enumerate (intrinsic ['distortionParams' ]):
122+ out [f"k{ i + 1 } " ] = float (coef )
123+
124+ return out
125+
126+ def meshroom_to_json (
127+ image_filename_map : Dict [str , Path ],
128+ json_filename : Path ,
129+ output_dir : Path ,
130+ ply_filename : Optional [Path ] = None ,
131+ verbose : bool = False ,
132+ ) -> List [str ]:
133+ """Convert Meshroom data into a nerfstudio dataset.
134+
135+ Args:
136+ image_filename_map: Mapping of original image filenames to their saved locations.
137+ json_filename: Path to the Meshroom json file.
138+ output_dir: Path to the output directory.
139+ ply_filename: Path to the exported ply file.
140+ verbose: Whether to print verbose output.
141+
142+ Returns:
143+ Summary of the conversion.
144+ """
145+ summary_log = []
146+
147+ with open (json_filename , 'r' ) as f :
148+ data = json .load (f )
149+
150+ # Create output structure
151+ out = {}
152+ out ['aabb_scale' ] = 16 # Default value
153+
154+ # Extract transforms from Meshroom data
155+ transforms = {}
156+ for pose in data .get ('poses' , []):
157+ transform = pose ['pose' ]['transform' ]
158+ rot = np .asarray (transform ['rotation' ])
159+ rot = rot .reshape (3 , 3 ).astype (float )
160+
161+ ctr = np .asarray (transform ['center' ])
162+ ctr = ctr .astype (float )
163+
164+ M = np .eye (4 )
165+ M [:3 , :3 ] = rot
166+ M [:3 , 3 ] = ctr
167+
168+ M = Mat2Nerf (M .astype (float ))
169+ transforms [pose ['poseId' ]] = np .dot (ROT_MAT , M )
170+
171+ # Extract intrinsics from Meshroom data
172+ intrinsics = {}
173+ for intrinsic in data .get ('intrinsics' , []):
174+ intrinsics [intrinsic ['intrinsicId' ]] = build_sensor (intrinsic )
175+
176+ # Set camera model based on intrinsic type
177+ if data .get ('intrinsics' ) and 'type' in data ['intrinsics' ][0 ]:
178+ intrinsic_type = data ['intrinsics' ][0 ]['type' ]
179+ if intrinsic_type in ['radial1' , 'radial3' ]:
180+ out ["camera_model" ] = CAMERA_MODELS ["perspective" ].value
181+ elif intrinsic_type in ['fisheye' , 'fisheye4' ]:
182+ out ["camera_model" ] = CAMERA_MODELS ["fisheye" ].value
183+ else :
184+ # Default to perspective
185+ out ["camera_model" ] = CAMERA_MODELS ["perspective" ].value
186+ else :
187+ out ["camera_model" ] = CAMERA_MODELS ["perspective" ].value
188+
189+ # Build frames
190+ frames = []
191+ skipped_images = 0
192+
193+ for view in data .get ('views' , []):
194+ # Get the image name from the path
195+ path = Path (view ['path' ])
196+ name = path .stem
197+
198+ # Check if the image exists in our mapping
199+ if name not in image_filename_map :
200+ if verbose :
201+ CONSOLE .print (f"[yellow]Missing image for { name } , skipping[/yellow]" )
202+ skipped_images += 1
203+ continue
204+
205+ # Get poseId and intrinsicId
206+ poseId = view ['poseId' ]
207+ intrinsicId = view ['intrinsicId' ]
208+
209+ # Check if we have the necessary data
210+ if poseId not in transforms :
211+ if verbose :
212+ CONSOLE .print (f"[yellow]PoseId { poseId } not found in transforms, skipping image: { name } [/yellow]" )
213+ skipped_images += 1
214+ continue
215+
216+ if intrinsicId not in intrinsics :
217+ if verbose :
218+ CONSOLE .print (f"[yellow]IntrinsicId { intrinsicId } not found, skipping image: { name } [/yellow]" )
219+ skipped_images += 1
220+ continue
221+
222+ # Create camera data
223+ camera = {}
224+ camera .update (dc (intrinsics [intrinsicId ]))
225+ camera ['transform_matrix' ] = transforms [poseId ]
226+ camera ['file_path' ] = image_filename_map [name ].as_posix ()
227+
228+ frames .append (camera )
229+
230+ out ['frames' ] = frames
231+
232+ # Calculate center point
233+ center = central_point (out )
234+
235+ # Adjust camera positions by centering
236+ for f in out ["frames" ]:
237+ f ["transform_matrix" ][0 :3 , 3 ] -= center
238+ f ["transform_matrix" ] = f ["transform_matrix" ].tolist ()
239+
240+ # Include point cloud if provided
241+ if ply_filename is not None :
242+ import open3d as o3d
243+
244+ # Create the applied transform
245+ applied_transform = np .eye (4 )[:3 , :]
246+ applied_transform = applied_transform [np .array ([2 , 0 , 1 ]), :]
247+ out ["applied_transform" ] = applied_transform .tolist ()
248+
249+ # Load and transform point cloud
250+ pc = o3d .io .read_point_cloud (str (ply_filename ))
251+ points3D = np .asarray (pc .points )
252+ points3D = np .einsum ("ij,bj->bi" , applied_transform [:3 , :3 ], points3D ) + applied_transform [:3 , 3 ]
253+ pc .points = o3d .utility .Vector3dVector (points3D )
254+ o3d .io .write_point_cloud (str (output_dir / "sparse_pc.ply" ), pc )
255+ out ["ply_file_path" ] = "sparse_pc.ply"
256+ summary_log .append (f"Imported { ply_filename } as starting points" )
257+
258+ # Write output
259+ with open (output_dir / "transforms.json" , "w" , encoding = "utf-8" ) as f :
260+ json .dump (out , f , indent = 4 )
261+
262+ # Add summary info
263+ if skipped_images == 1 :
264+ summary_log .append (f"{ skipped_images } image skipped due to missing camera pose or intrinsic data." )
265+ elif skipped_images > 1 :
266+ summary_log .append (f"{ skipped_images } images were skipped due to missing camera poses or intrinsic data." )
267+
268+ summary_log .append (f"Final dataset contains { len (out ['frames' ])} frames." )
269+
270+ return summary_log
0 commit comments