|
10 | 10 | "Linda G. Shapiro", |
11 | 11 | "Ranjay Krishna" |
12 | 12 | ], |
13 | | - "year": "2024", |
| 13 | + "year": "2025", |
| 14 | + "venue": "CVPR 2025", |
14 | 15 | "links": { |
15 | 16 | "pdf": "https://arxiv.org/abs/2412.03548" |
16 | 17 | }, |
17 | 18 | "thumbnail": "/perctokens.png" |
18 | 19 | }, |
| 20 | + { |
| 21 | + "title": "The Unmet Promise of Synthetic Training Images: Using Retrieved Real Images Performs Better", |
| 22 | + "authors": [ |
| 23 | + "Scott Geng", |
| 24 | + "Cheng-Yu Hsieh", |
| 25 | + "Vivek Ramanujan", |
| 26 | + "Matthew Wallingford", |
| 27 | + "Chun-Liang Li", |
| 28 | + "Pang Wei Koh", |
| 29 | + "Ranjay Krishna" |
| 30 | + ], |
| 31 | + "year": "2024", |
| 32 | + "venue": "NeurIPS 2024", |
| 33 | + "links": { |
| 34 | + "pdf": "https://arxiv.org/abs/2406.05184" |
| 35 | + }, |
| 36 | + "thumbnail": "/unmetpromise.png" |
| 37 | + }, |
| 38 | + { |
| 39 | + "title": "AHA: A Vision-Language-Model for Detecting and Reasoning Over Failures in Robotic Manipulation", |
| 40 | + "authors": [ |
| 41 | + "Jiafei Duan", |
| 42 | + "Wilbert Pumacay", |
| 43 | + "Nishanth Kumar", |
| 44 | + "Yi Ru Wang", |
| 45 | + "Shulin Tian", |
| 46 | + "Wentao Yuan", |
| 47 | + "Ranjay Krishna", |
| 48 | + "Dieter Fox", |
| 49 | + "Ajay Mandlekar", |
| 50 | + "Yijie Guo" |
| 51 | + ], |
| 52 | + "year": "2025", |
| 53 | + "venue": "ICLR 2025", |
| 54 | + "links": { |
| 55 | + "pdf": "https://arxiv.org/abs/2410.00371" |
| 56 | + }, |
| 57 | + "thumbnail": "/aha.png" |
| 58 | + }, |
| 59 | + { |
| 60 | + "title": "Task Me Anything", |
| 61 | + "authors": [ |
| 62 | + "Jieyu Zhang", |
| 63 | + "Weikai Huang", |
| 64 | + "Zixian Ma", |
| 65 | + "Oscar Michel", |
| 66 | + "Dong He", |
| 67 | + "Tanmay Gupta", |
| 68 | + "Wei-Chiu Ma", |
| 69 | + "Ali Farhadi", |
| 70 | + "Aniruddha Kembhavi", |
| 71 | + "Ranjay Krishna" |
| 72 | + ], |
| 73 | + "year": "2024", |
| 74 | + "venue": "NeurIPS 2025", |
| 75 | + "links": { |
| 76 | + "pdf": "https://arxiv.org/abs/2406.11775" |
| 77 | + }, |
| 78 | + "thumbnail": "/taskmeanything.png" |
| 79 | + }, |
| 80 | + { |
| 81 | + "title": "Coarse Correspondences Boost Spatial-Temporal Reasoning in Multimodal Language Model", |
| 82 | + "authors": [ |
| 83 | + "Benlin Liu", |
| 84 | + "Yiqin Wang", |
| 85 | + "Yuhao Dong", |
| 86 | + "Yongming Rao", |
| 87 | + "Yansong Tang", |
| 88 | + "Wei-Chiu Ma", |
| 89 | + "Ranjay Krishna" |
| 90 | + ], |
| 91 | + "year": "2025", |
| 92 | + "venue": "CVPR 2025", |
| 93 | + "links": { |
| 94 | + "pdf": "https://arxiv.org/abs/2408.00754" |
| 95 | + }, |
| 96 | + "thumbnail": "/coarsecorrespondences.png" |
| 97 | + }, |
| 98 | + { |
| 99 | + "title": "Molmo and PixMo: Open Weights and Open Data for State-of-the-Art Multimodal Models", |
| 100 | + "authors": [ |
| 101 | + "Ai2 + UW" |
| 102 | + ], |
| 103 | + "year": "2025", |
| 104 | + "venue": "CVPR 2025", |
| 105 | + "links": { |
| 106 | + "pdf": "https://arxiv.org/abs/2409.17146" |
| 107 | + }, |
| 108 | + "thumbnail": "/molmopixmo.png" |
| 109 | + }, |
| 110 | + { |
| 111 | + "title": "Synthetic Visual Genome", |
| 112 | + "authors": [ |
| 113 | + "Jae Sung Park", |
| 114 | + "Zixian Ma", |
| 115 | + "Linjie Li", |
| 116 | + "Chenhao Zheng", |
| 117 | + "Cheng-Yu Hsieh", |
| 118 | + "Ximing Lu", |
| 119 | + "Khyathi Chandu", |
| 120 | + "Quan Kong", |
| 121 | + "Norimasa Kobori", |
| 122 | + "Ali Farhadi", |
| 123 | + "Yejin Choi", |
| 124 | + "Ranjay Krishna" |
| 125 | + ], |
| 126 | + "year": "2025", |
| 127 | + "venue": "CVPR 2025" |
| 128 | + }, |
| 129 | + { |
| 130 | + "title": "Eval3D: Interpretable and Fine-grained Evaluation for 3D Generation", |
| 131 | + "authors": [ |
| 132 | + "Shivam Duggal", |
| 133 | + "Yushi Hu", |
| 134 | + "Oscar Michel", |
| 135 | + "Aniruddha Kembhavi", |
| 136 | + "William T. Freeman", |
| 137 | + "Noah A. Smith", |
| 138 | + "Ranjay Krishna", |
| 139 | + "Antonio Torralba", |
| 140 | + "Ali Farhadi", |
| 141 | + "Wei-Chiu Ma" |
| 142 | + ], |
| 143 | + "year": "2025", |
| 144 | + "venue": "CVPR 2025", |
| 145 | + "links": { |
| 146 | + "pdf": "https://arxiv.org/abs/2504.18509" |
| 147 | + }, |
| 148 | + "thumbnail": "/eval3d.png" |
| 149 | + }, |
| 150 | + { |
| 151 | + "title": "SAM2Act: Integrating Visual Foundation Model with A Memory Architecture for Robotic Manipulation", |
| 152 | + "authors": [ |
| 153 | + "Haoquan Fang", |
| 154 | + "Markus Grotz", |
| 155 | + "Wilbert Pumacay", |
| 156 | + "Yi Ru Wang", |
| 157 | + "Dieter Fox", |
| 158 | + "Ranjay Krishna", |
| 159 | + "Jiafei Duan" |
| 160 | + ], |
| 161 | + "year": "2025", |
| 162 | + "venue": "ICML 2024", |
| 163 | + "links": { |
| 164 | + "pdf": "https://arxiv.org/abs/2501.18564" |
| 165 | + }, |
| 166 | + "thumbnail": "/sam2act.png" |
| 167 | + }, |
| 168 | + { |
| 169 | + "title": "Unsettling the Hegemony of Intention: Agonistic Image Generation", |
| 170 | + "authors": [ |
| 171 | + "Andrew Shaw", |
| 172 | + "Andre Ye", |
| 173 | + "Ranjay Krishna", |
| 174 | + "Amy X. Zhang" |
| 175 | + ], |
| 176 | + "year": "2025", |
| 177 | + "venue": "Faact 2024", |
| 178 | + "links": { |
| 179 | + "pdf": "https://arxiv.org/abs/2502.15242" |
| 180 | + }, |
| 181 | + "thumbnail": "/agonistic.png" |
| 182 | + }, |
19 | 183 | { |
20 | 184 | "title": "REALEDIT: Reddit Edits As a Large-scale Empirical Dataset for Image Transformations", |
21 | 185 | "authors": [ |
|
433 | 597 | }, |
434 | 598 | "thumbnail": "/gencap.png" |
435 | 599 | }, |
| 600 | + { |
| 601 | + "title": "Multilingual Diversity Improves Vision-Language Representations", |
| 602 | + "authors": [ |
| 603 | + "Thao Nguyen", |
| 604 | + "Matthew Wallingford", |
| 605 | + "Sebastin Santy", |
| 606 | + "Wei-Chiu Ma", |
| 607 | + "Sewoong Oh", |
| 608 | + "Ludwig Schmidt", |
| 609 | + "Pang Wei Koh", |
| 610 | + "Ranjay Krishna" |
| 611 | + ], |
| 612 | + "year": "2024", |
| 613 | + "venue": "NeurIPS 2024", |
| 614 | + "links": { |
| 615 | + "pdf": "https://arxiv.org/abs/2405.16915" |
| 616 | + }, |
| 617 | + "thumbnail": "/multilingualdiversity.png" |
| 618 | + }, |
436 | 619 | { |
437 | 620 | "title": "AdANNS: A Framework for Adaptive Semantic Search", |
438 | 621 | "authors": [ |
|
0 commit comments