Merge branch 'master' into yutian/update

seungchan-kim · web-flow · commit 0601a150f6e9 · 2025-12-26T14:29:02.000+09:00
diff --git a/_bibliography/references.bib b/_bibliography/references.bib
@@ -6,6 +6,20 @@ @article{chen2025cometokens
 	journal      = {arXiv preprint arXiv:2511.14751},
 	abstract 	 = {We propose Confidence-Guided Token Merging (Co-Me), an acceleration mechanism for visual geometric transformers without retraining or finetuning the base model. Co-Me distilled a light-weight confidence predictor to rank tokens by uncertainty and selectively merge low-confidence ones, effectively reducing computation while maintaining spatial coverage. Compared to similarity-based merging or pruning, the confidence signal in Co-Me reliably indicates regions emphasized by the transformer, enabling substantial acceleration without degrading performance. Co-Me applies seamlessly to various multi-view and streaming visual geometric transformers, achieving speedups that scale with sequence length. When applied to VGGT and MapAnything, Co-Me achieves up to 11.3x and 7.2x speedup, making visual geometric transformers practical for real-time 3D perception and reconstruction.}
 }
+@misc{yu2025unified,
+	title        = {Unified Spherical Frontend: Learning Rotation-Equivariant Representations of Spherical Images from Any Camera},
+	shorttitle   = {Unified Spherical Frontend},
+	author       = {Mukai Yu and Mosam Dabhi and Liuyue Xie and Sebastian Scherer and L{\'a}szl{\'o} A. Jeni},
+	year         = {2025},
+	publisher    = {arXiv},
+	doi          = {10.48550/arXiv.2511.18174},
+	url          = {https://arxiv.org/pdf/2511.18174},
+	eprint       = {2511.18174},
+	primaryclass = {cs.CV},
+	abstract     = {Modern perception increasingly relies on fisheye, panoramic, and other wide field-of-view (FoV) cameras, yet most pipelines still apply planar CNNs designed for pinhole imagery on 2D grids, where image-space neighborhoods misrepresent physical adjacency and models are sensitive to global rotations. Frequency-domain spherical CNNs partially address this mismatch but require costly spherical harmonic transforms that constrain resolution and efficiency. We introduce the Unified Spherical Frontend (USF), a lens-agnostic framework that transforms images from any calibrated camera into a unit-sphere representation via ray-direction correspondences, and performs spherical resampling, convolution, and pooling directly in the spatial domain. USF is modular: projection, location sampling, interpolation, and resolution control are fully decoupled. Its distance-only spherical kernels offer configurable rotation-equivariance (mirroring translation-equivariance in planar CNNs) while avoiding harmonic transforms entirely. We compare standard planar backbones with their spherical counterparts across classification, detection, and segmentation tasks on synthetic (Spherical MNIST) and real-world datasets (PANDORA, Stanford 2D-3D-S), and stress-test robustness to extreme lens distortions, varying FoV, and arbitrary rotations. USF processes high-resolution spherical imagery efficiently and maintains less than 1\% performance drop under random test-time rotations, even without rotational augmentation, and even enables zero-shot generalization from one lens type to unseen wide-FoV lenses with minimal performance degradation.},
+	archiveprefix = {arXiv},
+	keywords     = {Computer Science - Computer Vision and Pattern Recognition}
+}
 @inproceedings{alama2025rayfronts,
 	title        = {RayFronts: Open-Set Semantic Ray Frontiers for Online Scene Understanding and Exploration},
 	author       = {Omar Alama and Avigyan Bhattacharya and Haoyang He and Seungchan Kim and Yuheng Qiu and Wenshan Wang and Cherie Ho and Nikhil Keetha and Sebastian Scherer},
diff --git a/_team/gihwan_kim.md b/_team/gihwan_kim.md
@@ -0,0 +1,17 @@
+---
+title: Gihwan Kim
+subtitle: Visitor
+job_title: Visitor
+category: intern
+layout: team_member_personal_page
+image: /img/team/gihwan_kim.jpeg
+link-new-tab: true
+---
+
+Gihwan is a visitor at Robotics Institute, Carnegie Mellon University. His research experience at the AirLab has centered on model compression and acceleration, with a particular emphasis on enhancing the efficiency of robotics modules. Previously, he worked on semantic segmentation and object detection, focusing on vision tasks acceleration and compression in satellite applications. He earned his B.S. degree in Computer Software and Engineering from Chungnam National University.
+
+## Website ##
+[https://sites.google.com/view/gihwankim/home](https://sites.google.com/view/gihwankim/home)
+
+## Email ##
+gihwank@andrew.cmu.edu
diff --git a/_team/krrish_jain.md b/_team/krrish_jain.md
@@ -0,0 +1,17 @@
+---
+title: Krrish Jain
+subtitle: Master's Student
+job_title: Master's Student
+category:  master_student
+layout: team_member_personal_page
+image: /img/team/krrish.jpg
+link-new-tab: true
+---
+
+Krrish is a Master’s student at the Robotics Institute, Carnegie Mellon University, advised by Sebastian Scherer. His research centers on multi-robot coordination, motion planning, and autonomy for aerial platforms. He is currently part of AirLab’s multi-drone planning and coordination projects.
+
+**Email**: [krrishj@andrew.cmu.edu](mailto:krrishj@andrew.cmu.edu)
+
+**Linkedin**: [Linkedin](https://www.linkedin.com/in/krrishj/)
+
+**Website**: [krrishjain.com](https://krrishjain.com/)
diff --git a/_team/viktor.md b/_team/viktor.md
@@ -14,9 +14,6 @@ His research interests are relative localization of robots, computer vision, aer
 One of his primary results during his PhD studies was the development of the UVDAR (UltraViolet Direction and Ranging) system for mutual relative localization of UAVs in outdoor conditions.
 Viktor likes to work with robots in the field, and previously participated in the MBZIRC 2020 competition, in a  team that scored the overall victory.
 
-%% ## Website ##
-%% [https://vwalter.github.io/](https://vwalter.github.io/)
-
 ### [LinkedIn](https://www.linkedin.com/in/viktor-walter-37287a1a5) ###
 ### [Google Scholar](https://scholar.google.cz/citations?user=5QI-m0gAAAAJ&hl=cs&oi=ao) ###
 ### [Web of Science](https://www.webofscience.com/wos/author/record/3554829) ###
diff --git a/img/team/gihwan_kim.jpeg b/img/team/gihwan_kim.jpeg
diff --git a/img/team/krrish.jpg b/img/team/krrish.jpg