Skip to content

Commit 2380905

Browse files
committed
Add fallback labels for segmentation
1 parent 94dbf34 commit 2380905

1 file changed

Lines changed: 158 additions & 1 deletion

File tree

lib/segmentation.ex

Lines changed: 158 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,152 @@ if ImageVision.ortex_configured?() do
7575
@detr_short_side 800
7676
@detr_no_object_class 250
7777

78+
# Canonical COCO panoptic id → label map (133 categories, IDs 1-200
79+
# with gaps), drawn from the official panoptic_coco_categories.json:
80+
# https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json
81+
#
82+
# Used as a fallback for IDs where the model's config.json carries
83+
# a placeholder `"LABEL_n"` instead of the real class name. Both the
84+
# original `facebook/detr-resnet-50-panoptic` config and the Xenova
85+
# ONNX repackage drop the names for IDs 183-200 (the COCO panoptic
86+
# "merged" stuff classes — sky-other-merged, mountain-merged, etc.),
87+
# even though the model predicts them confidently.
88+
@coco_panoptic_id2label %{
89+
1 => "person",
90+
2 => "bicycle",
91+
3 => "car",
92+
4 => "motorcycle",
93+
5 => "airplane",
94+
6 => "bus",
95+
7 => "train",
96+
8 => "truck",
97+
9 => "boat",
98+
10 => "traffic light",
99+
11 => "fire hydrant",
100+
13 => "stop sign",
101+
14 => "parking meter",
102+
15 => "bench",
103+
16 => "bird",
104+
17 => "cat",
105+
18 => "dog",
106+
19 => "horse",
107+
20 => "sheep",
108+
21 => "cow",
109+
22 => "elephant",
110+
23 => "bear",
111+
24 => "zebra",
112+
25 => "giraffe",
113+
27 => "backpack",
114+
28 => "umbrella",
115+
31 => "handbag",
116+
32 => "tie",
117+
33 => "suitcase",
118+
34 => "frisbee",
119+
35 => "skis",
120+
36 => "snowboard",
121+
37 => "sports ball",
122+
38 => "kite",
123+
39 => "baseball bat",
124+
40 => "baseball glove",
125+
41 => "skateboard",
126+
42 => "surfboard",
127+
43 => "tennis racket",
128+
44 => "bottle",
129+
46 => "wine glass",
130+
47 => "cup",
131+
48 => "fork",
132+
49 => "knife",
133+
50 => "spoon",
134+
51 => "bowl",
135+
52 => "banana",
136+
53 => "apple",
137+
54 => "sandwich",
138+
55 => "orange",
139+
56 => "broccoli",
140+
57 => "carrot",
141+
58 => "hot dog",
142+
59 => "pizza",
143+
60 => "donut",
144+
61 => "cake",
145+
62 => "chair",
146+
63 => "couch",
147+
64 => "potted plant",
148+
65 => "bed",
149+
67 => "dining table",
150+
70 => "toilet",
151+
72 => "tv",
152+
73 => "laptop",
153+
74 => "mouse",
154+
75 => "remote",
155+
76 => "keyboard",
156+
77 => "cell phone",
157+
78 => "microwave",
158+
79 => "oven",
159+
80 => "toaster",
160+
81 => "sink",
161+
82 => "refrigerator",
162+
84 => "book",
163+
85 => "clock",
164+
86 => "vase",
165+
87 => "scissors",
166+
88 => "teddy bear",
167+
89 => "hair drier",
168+
90 => "toothbrush",
169+
92 => "banner",
170+
93 => "blanket",
171+
95 => "bridge",
172+
100 => "cardboard",
173+
107 => "counter",
174+
109 => "curtain",
175+
112 => "door-stuff",
176+
118 => "floor-wood",
177+
119 => "flower",
178+
122 => "fruit",
179+
125 => "gravel",
180+
128 => "house",
181+
130 => "light",
182+
133 => "mirror-stuff",
183+
138 => "net",
184+
141 => "pillow",
185+
144 => "platform",
186+
145 => "playingfield",
187+
147 => "railroad",
188+
148 => "river",
189+
149 => "road",
190+
151 => "roof",
191+
154 => "sand",
192+
155 => "sea",
193+
156 => "shelf",
194+
159 => "snow",
195+
161 => "stairs",
196+
166 => "tent",
197+
168 => "towel",
198+
171 => "wall-brick",
199+
175 => "wall-stone",
200+
176 => "wall-tile",
201+
177 => "wall-wood",
202+
178 => "water-other",
203+
180 => "window-blind",
204+
181 => "window-other",
205+
184 => "tree-merged",
206+
185 => "fence-merged",
207+
186 => "ceiling-merged",
208+
187 => "sky-other-merged",
209+
188 => "cabinet-merged",
210+
189 => "table-merged",
211+
190 => "floor-other-merged",
212+
191 => "pavement-merged",
213+
192 => "mountain-merged",
214+
193 => "grass-merged",
215+
194 => "dirt-merged",
216+
195 => "paper-merged",
217+
196 => "food-other-merged",
218+
197 => "building-other-merged",
219+
198 => "rock-merged",
220+
199 => "wall-other-merged",
221+
200 => "rug-merged"
222+
}
223+
78224
@default_min_score 0.5
79225

80226
# ImageNet normalisation (shared by both models).
@@ -549,6 +695,17 @@ if ImageVision.ortex_configured?() do
549695
{{tensor, pixel_mask}, input_h, input_w}
550696
end
551697

698+
# Resolves a class index to a human-readable label, preferring the
699+
# repo's id2label and falling back to the canonical COCO panoptic
700+
# map when the repo carries a `LABEL_n` placeholder.
701+
defp lookup_panoptic_label(id2label, class_idx) do
702+
case Map.get(id2label, to_string(class_idx)) do
703+
nil -> Map.get(@coco_panoptic_id2label, class_idx, "class_#{class_idx}")
704+
"LABEL_" <> _ -> Map.get(@coco_panoptic_id2label, class_idx, "class_#{class_idx}")
705+
name -> name
706+
end
707+
end
708+
552709
# Loads id2label from config.json; cached in :persistent_term.
553710
defp load_detr_labels(repo) do
554711
key = {__MODULE__, :labels, repo}
@@ -597,7 +754,7 @@ if ImageVision.ortex_configured?() do
597754
|> Enum.with_index()
598755
|> Enum.flat_map(fn {{class_idx, score}, query_idx} ->
599756
if class_idx != @detr_no_object_class and score >= min_score do
600-
label = Map.get(id2label, to_string(class_idx), "class_#{class_idx}")
757+
label = lookup_panoptic_label(id2label, class_idx)
601758
mask_tensor = pred_masks[0][query_idx]
602759

603760
mask =

0 commit comments

Comments
 (0)