Skip to content

Commit 0601a15

Browse files
Merge branch 'master' into yutian/update
2 parents b8e7907 + 8159afe commit 0601a15

6 files changed

Lines changed: 48 additions & 3 deletions

File tree

_bibliography/references.bib

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,20 @@ @article{chen2025cometokens
66
journal = {arXiv preprint arXiv:2511.14751},
77
abstract = {We propose Confidence-Guided Token Merging (Co-Me), an acceleration mechanism for visual geometric transformers without retraining or finetuning the base model. Co-Me distilled a light-weight confidence predictor to rank tokens by uncertainty and selectively merge low-confidence ones, effectively reducing computation while maintaining spatial coverage. Compared to similarity-based merging or pruning, the confidence signal in Co-Me reliably indicates regions emphasized by the transformer, enabling substantial acceleration without degrading performance. Co-Me applies seamlessly to various multi-view and streaming visual geometric transformers, achieving speedups that scale with sequence length. When applied to VGGT and MapAnything, Co-Me achieves up to 11.3x and 7.2x speedup, making visual geometric transformers practical for real-time 3D perception and reconstruction.}
88
}
9+
@misc{yu2025unified,
10+
title = {Unified Spherical Frontend: Learning Rotation-Equivariant Representations of Spherical Images from Any Camera},
11+
shorttitle = {Unified Spherical Frontend},
12+
author = {Mukai Yu and Mosam Dabhi and Liuyue Xie and Sebastian Scherer and L{\'a}szl{\'o} A. Jeni},
13+
year = {2025},
14+
publisher = {arXiv},
15+
doi = {10.48550/arXiv.2511.18174},
16+
url = {https://arxiv.org/pdf/2511.18174},
17+
eprint = {2511.18174},
18+
primaryclass = {cs.CV},
19+
abstract = {Modern perception increasingly relies on fisheye, panoramic, and other wide field-of-view (FoV) cameras, yet most pipelines still apply planar CNNs designed for pinhole imagery on 2D grids, where image-space neighborhoods misrepresent physical adjacency and models are sensitive to global rotations. Frequency-domain spherical CNNs partially address this mismatch but require costly spherical harmonic transforms that constrain resolution and efficiency. We introduce the Unified Spherical Frontend (USF), a lens-agnostic framework that transforms images from any calibrated camera into a unit-sphere representation via ray-direction correspondences, and performs spherical resampling, convolution, and pooling directly in the spatial domain. USF is modular: projection, location sampling, interpolation, and resolution control are fully decoupled. Its distance-only spherical kernels offer configurable rotation-equivariance (mirroring translation-equivariance in planar CNNs) while avoiding harmonic transforms entirely. We compare standard planar backbones with their spherical counterparts across classification, detection, and segmentation tasks on synthetic (Spherical MNIST) and real-world datasets (PANDORA, Stanford 2D-3D-S), and stress-test robustness to extreme lens distortions, varying FoV, and arbitrary rotations. USF processes high-resolution spherical imagery efficiently and maintains less than 1\% performance drop under random test-time rotations, even without rotational augmentation, and even enables zero-shot generalization from one lens type to unseen wide-FoV lenses with minimal performance degradation.},
20+
archiveprefix = {arXiv},
21+
keywords = {Computer Science - Computer Vision and Pattern Recognition}
22+
}
923
@inproceedings{alama2025rayfronts,
1024
title = {RayFronts: Open-Set Semantic Ray Frontiers for Online Scene Understanding and Exploration},
1125
author = {Omar Alama and Avigyan Bhattacharya and Haoyang He and Seungchan Kim and Yuheng Qiu and Wenshan Wang and Cherie Ho and Nikhil Keetha and Sebastian Scherer},

_team/gihwan_kim.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
title: Gihwan Kim
3+
subtitle: Visitor
4+
job_title: Visitor
5+
category: intern
6+
layout: team_member_personal_page
7+
image: /img/team/gihwan_kim.jpeg
8+
link-new-tab: true
9+
---
10+
11+
Gihwan is a visitor at Robotics Institute, Carnegie Mellon University. His research experience at the AirLab has centered on model compression and acceleration, with a particular emphasis on enhancing the efficiency of robotics modules. Previously, he worked on semantic segmentation and object detection, focusing on vision tasks acceleration and compression in satellite applications. He earned his B.S. degree in Computer Software and Engineering from Chungnam National University.
12+
13+
## Website ##
14+
[https://sites.google.com/view/gihwankim/home](https://sites.google.com/view/gihwankim/home)
15+
16+
## Email ##
17+
gihwank@andrew.cmu.edu

_team/krrish_jain.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
title: Krrish Jain
3+
subtitle: Master's Student
4+
job_title: Master's Student
5+
category: master_student
6+
layout: team_member_personal_page
7+
image: /img/team/krrish.jpg
8+
link-new-tab: true
9+
---
10+
11+
Krrish is a Master’s student at the Robotics Institute, Carnegie Mellon University, advised by Sebastian Scherer. His research centers on multi-robot coordination, motion planning, and autonomy for aerial platforms. He is currently part of AirLab’s multi-drone planning and coordination projects.
12+
13+
**Email**: [krrishj@andrew.cmu.edu](mailto:krrishj@andrew.cmu.edu)
14+
15+
**Linkedin**: [Linkedin](https://www.linkedin.com/in/krrishj/)
16+
17+
**Website**: [krrishjain.com](https://krrishjain.com/)

_team/viktor.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@ His research interests are relative localization of robots, computer vision, aer
1414
One of his primary results during his PhD studies was the development of the UVDAR (UltraViolet Direction and Ranging) system for mutual relative localization of UAVs in outdoor conditions.
1515
Viktor likes to work with robots in the field, and previously participated in the MBZIRC 2020 competition, in a team that scored the overall victory.
1616

17-
%% ## Website ##
18-
%% [https://vwalter.github.io/](https://vwalter.github.io/)
19-
2017
### [LinkedIn](https://www.linkedin.com/in/viktor-walter-37287a1a5) ###
2118
### [Google Scholar](https://scholar.google.cz/citations?user=5QI-m0gAAAAJ&hl=cs&oi=ao) ###
2219
### [Web of Science](https://www.webofscience.com/wos/author/record/3554829) ###

img/team/gihwan_kim.jpeg

229 KB
Loading

img/team/krrish.jpg

33.7 KB
Loading

0 commit comments

Comments
 (0)