@@ -75,6 +75,152 @@ if ImageVision.ortex_configured?() do
7575 @ detr_short_side 800
7676 @ detr_no_object_class 250
7777
78+ # Canonical COCO panoptic id → label map (133 categories, IDs 1-200
79+ # with gaps), drawn from the official panoptic_coco_categories.json:
80+ # https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json
81+ #
82+ # Used as a fallback for IDs where the model's config.json carries
83+ # a placeholder `"LABEL_n"` instead of the real class name. Both the
84+ # original `facebook/detr-resnet-50-panoptic` config and the Xenova
85+ # ONNX repackage drop the names for IDs 183-200 (the COCO panoptic
86+ # "merged" stuff classes — sky-other-merged, mountain-merged, etc.),
87+ # even though the model predicts them confidently.
88+ @ coco_panoptic_id2label % {
89+ 1 => "person" ,
90+ 2 => "bicycle" ,
91+ 3 => "car" ,
92+ 4 => "motorcycle" ,
93+ 5 => "airplane" ,
94+ 6 => "bus" ,
95+ 7 => "train" ,
96+ 8 => "truck" ,
97+ 9 => "boat" ,
98+ 10 => "traffic light" ,
99+ 11 => "fire hydrant" ,
100+ 13 => "stop sign" ,
101+ 14 => "parking meter" ,
102+ 15 => "bench" ,
103+ 16 => "bird" ,
104+ 17 => "cat" ,
105+ 18 => "dog" ,
106+ 19 => "horse" ,
107+ 20 => "sheep" ,
108+ 21 => "cow" ,
109+ 22 => "elephant" ,
110+ 23 => "bear" ,
111+ 24 => "zebra" ,
112+ 25 => "giraffe" ,
113+ 27 => "backpack" ,
114+ 28 => "umbrella" ,
115+ 31 => "handbag" ,
116+ 32 => "tie" ,
117+ 33 => "suitcase" ,
118+ 34 => "frisbee" ,
119+ 35 => "skis" ,
120+ 36 => "snowboard" ,
121+ 37 => "sports ball" ,
122+ 38 => "kite" ,
123+ 39 => "baseball bat" ,
124+ 40 => "baseball glove" ,
125+ 41 => "skateboard" ,
126+ 42 => "surfboard" ,
127+ 43 => "tennis racket" ,
128+ 44 => "bottle" ,
129+ 46 => "wine glass" ,
130+ 47 => "cup" ,
131+ 48 => "fork" ,
132+ 49 => "knife" ,
133+ 50 => "spoon" ,
134+ 51 => "bowl" ,
135+ 52 => "banana" ,
136+ 53 => "apple" ,
137+ 54 => "sandwich" ,
138+ 55 => "orange" ,
139+ 56 => "broccoli" ,
140+ 57 => "carrot" ,
141+ 58 => "hot dog" ,
142+ 59 => "pizza" ,
143+ 60 => "donut" ,
144+ 61 => "cake" ,
145+ 62 => "chair" ,
146+ 63 => "couch" ,
147+ 64 => "potted plant" ,
148+ 65 => "bed" ,
149+ 67 => "dining table" ,
150+ 70 => "toilet" ,
151+ 72 => "tv" ,
152+ 73 => "laptop" ,
153+ 74 => "mouse" ,
154+ 75 => "remote" ,
155+ 76 => "keyboard" ,
156+ 77 => "cell phone" ,
157+ 78 => "microwave" ,
158+ 79 => "oven" ,
159+ 80 => "toaster" ,
160+ 81 => "sink" ,
161+ 82 => "refrigerator" ,
162+ 84 => "book" ,
163+ 85 => "clock" ,
164+ 86 => "vase" ,
165+ 87 => "scissors" ,
166+ 88 => "teddy bear" ,
167+ 89 => "hair drier" ,
168+ 90 => "toothbrush" ,
169+ 92 => "banner" ,
170+ 93 => "blanket" ,
171+ 95 => "bridge" ,
172+ 100 => "cardboard" ,
173+ 107 => "counter" ,
174+ 109 => "curtain" ,
175+ 112 => "door-stuff" ,
176+ 118 => "floor-wood" ,
177+ 119 => "flower" ,
178+ 122 => "fruit" ,
179+ 125 => "gravel" ,
180+ 128 => "house" ,
181+ 130 => "light" ,
182+ 133 => "mirror-stuff" ,
183+ 138 => "net" ,
184+ 141 => "pillow" ,
185+ 144 => "platform" ,
186+ 145 => "playingfield" ,
187+ 147 => "railroad" ,
188+ 148 => "river" ,
189+ 149 => "road" ,
190+ 151 => "roof" ,
191+ 154 => "sand" ,
192+ 155 => "sea" ,
193+ 156 => "shelf" ,
194+ 159 => "snow" ,
195+ 161 => "stairs" ,
196+ 166 => "tent" ,
197+ 168 => "towel" ,
198+ 171 => "wall-brick" ,
199+ 175 => "wall-stone" ,
200+ 176 => "wall-tile" ,
201+ 177 => "wall-wood" ,
202+ 178 => "water-other" ,
203+ 180 => "window-blind" ,
204+ 181 => "window-other" ,
205+ 184 => "tree-merged" ,
206+ 185 => "fence-merged" ,
207+ 186 => "ceiling-merged" ,
208+ 187 => "sky-other-merged" ,
209+ 188 => "cabinet-merged" ,
210+ 189 => "table-merged" ,
211+ 190 => "floor-other-merged" ,
212+ 191 => "pavement-merged" ,
213+ 192 => "mountain-merged" ,
214+ 193 => "grass-merged" ,
215+ 194 => "dirt-merged" ,
216+ 195 => "paper-merged" ,
217+ 196 => "food-other-merged" ,
218+ 197 => "building-other-merged" ,
219+ 198 => "rock-merged" ,
220+ 199 => "wall-other-merged" ,
221+ 200 => "rug-merged"
222+ }
223+
78224 @ default_min_score 0.5
79225
80226 # ImageNet normalisation (shared by both models).
@@ -549,6 +695,17 @@ if ImageVision.ortex_configured?() do
549695 { { tensor , pixel_mask } , input_h , input_w }
550696 end
551697
698+ # Resolves a class index to a human-readable label, preferring the
699+ # repo's id2label and falling back to the canonical COCO panoptic
700+ # map when the repo carries a `LABEL_n` placeholder.
701+ defp lookup_panoptic_label ( id2label , class_idx ) do
702+ case Map . get ( id2label , to_string ( class_idx ) ) do
703+ nil -> Map . get ( @ coco_panoptic_id2label , class_idx , "class_#{ class_idx } " )
704+ "LABEL_" <> _ -> Map . get ( @ coco_panoptic_id2label , class_idx , "class_#{ class_idx } " )
705+ name -> name
706+ end
707+ end
708+
552709 # Loads id2label from config.json; cached in :persistent_term.
553710 defp load_detr_labels ( repo ) do
554711 key = { __MODULE__ , :labels , repo }
@@ -597,7 +754,7 @@ if ImageVision.ortex_configured?() do
597754 |> Enum . with_index ( )
598755 |> Enum . flat_map ( fn { { class_idx , score } , query_idx } ->
599756 if class_idx != @ detr_no_object_class and score >= min_score do
600- label = Map . get ( id2label , to_string ( class_idx ) , "class_ #{ class_idx } " )
757+ label = lookup_panoptic_label ( id2label , class_idx )
601758 mask_tensor = pred_masks [ 0 ] [ query_idx ]
602759
603760 mask =
0 commit comments