Skip to content

Commit c44cf56

Browse files
committed
more papers
1 parent e199560 commit c44cf56

11 files changed

Lines changed: 184 additions & 1 deletion

.DS_Store

6 KB
Binary file not shown.

src/components/publications.json

Lines changed: 184 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,176 @@
1010
"Linda G. Shapiro",
1111
"Ranjay Krishna"
1212
],
13-
"year": "2024",
13+
"year": "2025",
14+
"venue": "CVPR 2025",
1415
"links": {
1516
"pdf": "https://arxiv.org/abs/2412.03548"
1617
},
1718
"thumbnail": "/perctokens.png"
1819
},
20+
{
21+
"title": "The Unmet Promise of Synthetic Training Images: Using Retrieved Real Images Performs Better",
22+
"authors": [
23+
"Scott Geng",
24+
"Cheng-Yu Hsieh",
25+
"Vivek Ramanujan",
26+
"Matthew Wallingford",
27+
"Chun-Liang Li",
28+
"Pang Wei Koh",
29+
"Ranjay Krishna"
30+
],
31+
"year": "2024",
32+
"venue": "NeurIPS 2024",
33+
"links": {
34+
"pdf": "https://arxiv.org/abs/2406.05184"
35+
},
36+
"thumbnail": "/unmetpromise.png"
37+
},
38+
{
39+
"title": "AHA: A Vision-Language-Model for Detecting and Reasoning Over Failures in Robotic Manipulation",
40+
"authors": [
41+
"Jiafei Duan",
42+
"Wilbert Pumacay",
43+
"Nishanth Kumar",
44+
"Yi Ru Wang",
45+
"Shulin Tian",
46+
"Wentao Yuan",
47+
"Ranjay Krishna",
48+
"Dieter Fox",
49+
"Ajay Mandlekar",
50+
"Yijie Guo"
51+
],
52+
"year": "2025",
53+
"venue": "ICLR 2025",
54+
"links": {
55+
"pdf": "https://arxiv.org/abs/2410.00371"
56+
},
57+
"thumbnail": "/aha.png"
58+
},
59+
{
60+
"title": "Task Me Anything",
61+
"authors": [
62+
"Jieyu Zhang",
63+
"Weikai Huang",
64+
"Zixian Ma",
65+
"Oscar Michel",
66+
"Dong He",
67+
"Tanmay Gupta",
68+
"Wei-Chiu Ma",
69+
"Ali Farhadi",
70+
"Aniruddha Kembhavi",
71+
"Ranjay Krishna"
72+
],
73+
"year": "2024",
74+
"venue": "NeurIPS 2025",
75+
"links": {
76+
"pdf": "https://arxiv.org/abs/2406.11775"
77+
},
78+
"thumbnail": "/taskmeanything.png"
79+
},
80+
{
81+
"title": "Coarse Correspondences Boost Spatial-Temporal Reasoning in Multimodal Language Model",
82+
"authors": [
83+
"Benlin Liu",
84+
"Yiqin Wang",
85+
"Yuhao Dong",
86+
"Yongming Rao",
87+
"Yansong Tang",
88+
"Wei-Chiu Ma",
89+
"Ranjay Krishna"
90+
],
91+
"year": "2025",
92+
"venue": "CVPR 2025",
93+
"links": {
94+
"pdf": "https://arxiv.org/abs/2408.00754"
95+
},
96+
"thumbnail": "/coarsecorrespondences.png"
97+
},
98+
{
99+
"title": "Molmo and PixMo: Open Weights and Open Data for State-of-the-Art Multimodal Models",
100+
"authors": [
101+
"Ai2 + UW"
102+
],
103+
"year": "2025",
104+
"venue": "CVPR 2025",
105+
"links": {
106+
"pdf": "https://arxiv.org/abs/2409.17146"
107+
},
108+
"thumbnail": "/molmopixmo.png"
109+
},
110+
{
111+
"title": "Synthetic Visual Genome",
112+
"authors": [
113+
"Jae Sung Park",
114+
"Zixian Ma",
115+
"Linjie Li",
116+
"Chenhao Zheng",
117+
"Cheng-Yu Hsieh",
118+
"Ximing Lu",
119+
"Khyathi Chandu",
120+
"Quan Kong",
121+
"Norimasa Kobori",
122+
"Ali Farhadi",
123+
"Yejin Choi",
124+
"Ranjay Krishna"
125+
],
126+
"year": "2025",
127+
"venue": "CVPR 2025"
128+
},
129+
{
130+
"title": "Eval3D: Interpretable and Fine-grained Evaluation for 3D Generation",
131+
"authors": [
132+
"Shivam Duggal",
133+
"Yushi Hu",
134+
"Oscar Michel",
135+
"Aniruddha Kembhavi",
136+
"William T. Freeman",
137+
"Noah A. Smith",
138+
"Ranjay Krishna",
139+
"Antonio Torralba",
140+
"Ali Farhadi",
141+
"Wei-Chiu Ma"
142+
],
143+
"year": "2025",
144+
"venue": "CVPR 2025",
145+
"links": {
146+
"pdf": "https://arxiv.org/abs/2504.18509"
147+
},
148+
"thumbnail": "/eval3d.png"
149+
},
150+
{
151+
"title": "SAM2Act: Integrating Visual Foundation Model with A Memory Architecture for Robotic Manipulation",
152+
"authors": [
153+
"Haoquan Fang",
154+
"Markus Grotz",
155+
"Wilbert Pumacay",
156+
"Yi Ru Wang",
157+
"Dieter Fox",
158+
"Ranjay Krishna",
159+
"Jiafei Duan"
160+
],
161+
"year": "2025",
162+
"venue": "ICML 2024",
163+
"links": {
164+
"pdf": "https://arxiv.org/abs/2501.18564"
165+
},
166+
"thumbnail": "/sam2act.png"
167+
},
168+
{
169+
"title": "Unsettling the Hegemony of Intention: Agonistic Image Generation",
170+
"authors": [
171+
"Andrew Shaw",
172+
"Andre Ye",
173+
"Ranjay Krishna",
174+
"Amy X. Zhang"
175+
],
176+
"year": "2025",
177+
"venue": "Faact 2024",
178+
"links": {
179+
"pdf": "https://arxiv.org/abs/2502.15242"
180+
},
181+
"thumbnail": "/agonistic.png"
182+
},
19183
{
20184
"title": "REALEDIT: Reddit Edits As a Large-scale Empirical Dataset for Image Transformations",
21185
"authors": [
@@ -433,6 +597,25 @@
433597
},
434598
"thumbnail": "/gencap.png"
435599
},
600+
{
601+
"title": "Multilingual Diversity Improves Vision-Language Representations",
602+
"authors": [
603+
"Thao Nguyen",
604+
"Matthew Wallingford",
605+
"Sebastin Santy",
606+
"Wei-Chiu Ma",
607+
"Sewoong Oh",
608+
"Ludwig Schmidt",
609+
"Pang Wei Koh",
610+
"Ranjay Krishna"
611+
],
612+
"year": "2024",
613+
"venue": "NeurIPS 2024",
614+
"links": {
615+
"pdf": "https://arxiv.org/abs/2405.16915"
616+
},
617+
"thumbnail": "/multilingualdiversity.png"
618+
},
436619
{
437620
"title": "AdANNS: A Framework for Adaptive Semantic Search",
438621
"authors": [

static/agnostic.png

410 KB
Loading

static/aha.png

442 KB
Loading

static/coarsecorrespondences.png

328 KB
Loading

static/eval3d.png

327 KB
Loading

static/molmopixmo.png

607 KB
Loading

static/multilingualdiversity.png

1.18 MB
Loading

static/sam2act.png

161 KB
Loading

static/taskmeanything.png

146 KB
Loading

0 commit comments

Comments
 (0)