Skip to content

Commit 98675b8

Browse files
matt-bernsteinrobot-ci-heartex
authored andcommitted
fix: FIT-1653: COCO Export Category IDs mismatch
GitOrigin-RevId: a0484ba33aeaf90344482fbbe2317eb7ee7f3294
1 parent e1114f5 commit 98675b8

3 files changed

Lines changed: 112 additions & 29 deletions

File tree

src/label_studio_sdk/converter/converter.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import io
2+
import itertools
23
import logging
34
import math
45
import os
@@ -1250,20 +1251,25 @@ def _get_labels(self):
12501251
labels |= set(info["labels"])
12511252
attrs = info["labels_attrs"]
12521253
for label in attrs:
1253-
if attrs[label].get("category"):
1254-
categories.append(
1255-
{"id": attrs[label].get("category"), "name": label}
1254+
raw_category_id = attrs[label].get("category")
1255+
if raw_category_id is None:
1256+
continue
1257+
try:
1258+
category_id = int(raw_category_id)
1259+
except (TypeError, ValueError):
1260+
logger.warning(
1261+
"Invalid category id %r for label %r in tag %r, skipping explicit category mapping",
1262+
raw_category_id,
1263+
label,
1264+
name,
12561265
)
1257-
category_name_to_id[label] = attrs[label].get("category")
1266+
continue
1267+
categories.append({"id": category_id, "name": label})
1268+
category_name_to_id[label] = category_id
12581269
labels_to_add = set(labels) - set(list(category_name_to_id.keys()))
1259-
labels_to_add = sorted(list(labels_to_add))
1260-
idx = 0
1261-
while idx in list(category_name_to_id.values()):
1262-
idx += 1
1263-
for label in labels_to_add:
1270+
used_category_ids = set(category_name_to_id.values())
1271+
idxs = (c for c in itertools.count() if c not in used_category_ids)
1272+
for label, idx in zip(sorted(labels_to_add), idxs):
12641273
categories.append({"id": idx, "name": label})
12651274
category_name_to_id[label] = idx
1266-
idx += 1
1267-
while idx in list(category_name_to_id.values()):
1268-
idx += 1
12691275
return categories, category_name_to_id

src/label_studio_sdk/converter/keypoints.py

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,29 +21,30 @@ def update_categories_for_keypoints(categories, category_name_to_id, label_confi
2121
keypoint_labels.extend(cfg.get("labels", []))
2222
keypoint_labels = list(dict.fromkeys(keypoint_labels))
2323

24+
if not keypoint_labels:
25+
return categories, category_name_to_id
26+
2427
non_kp = [cat.copy() for cat in categories if cat["name"] not in keypoint_labels]
2528

2629
new_categories = []
2730
new_mapping = {}
28-
next_id = 0
2931
for cat in non_kp:
30-
cat["id"] = next_id
32+
category_id = cat["id"]
3133
new_categories.append(cat)
32-
new_mapping[cat["name"]] = next_id
33-
next_id += 1
34-
35-
if keypoint_labels:
36-
merged_id = next_id
37-
merged_category = {
38-
"id": merged_id,
39-
"name": "default",
40-
"supercategory": "default",
41-
"keypoints": keypoint_labels,
42-
"skeleton": []
43-
}
44-
new_categories.append(merged_category)
45-
for kp_name in keypoint_labels:
46-
new_mapping[kp_name] = merged_id
34+
new_mapping[cat["name"]] = category_id
35+
36+
numeric_ids = [cat["id"] for cat in new_categories if isinstance(cat["id"], int)]
37+
merged_id = (max(numeric_ids) + 1) if numeric_ids else 0
38+
merged_category = {
39+
"id": merged_id,
40+
"name": "default",
41+
"supercategory": "default",
42+
"keypoints": keypoint_labels,
43+
"skeleton": []
44+
}
45+
new_categories.append(merged_category)
46+
for kp_name in keypoint_labels:
47+
new_mapping[kp_name] = merged_id
4748

4849
return new_categories, new_mapping
4950

tests/custom/converter/test_export_coco.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,3 +159,79 @@ def test_convert_to_coco_keypoints(temp_out_dir: Path):
159159
cat = kp_cats[0]
160160

161161
assert cat["name"] == "default"
162+
163+
164+
def test_convert_to_coco_preserves_explicit_category_ids(temp_out_dir: Path):
165+
"""COCO export should keep explicit `category` ids from the labeling config."""
166+
config = """
167+
<View>
168+
<Image name="image" value="$image"/>
169+
<RectangleLabels name="label" toName="image">
170+
<Label value="car" category="10"/>
171+
<Label value="person" category="2"/>
172+
<Label value="truck" category="7"/>
173+
</RectangleLabels>
174+
</View>
175+
""".strip()
176+
input_payload = [
177+
{
178+
"id": 1,
179+
"data": {"image": "not-downloaded.jpg"},
180+
"annotations": [
181+
{
182+
"id": 1,
183+
"result": [
184+
{
185+
"id": "r1",
186+
"type": "rectanglelabels",
187+
"value": {
188+
"x": 10,
189+
"y": 10,
190+
"width": 20,
191+
"height": 20,
192+
"rotation": 0,
193+
"rectanglelabels": ["car"],
194+
},
195+
"to_name": "image",
196+
"from_name": "label",
197+
"original_width": 1000,
198+
"original_height": 500,
199+
},
200+
{
201+
"id": "r2",
202+
"type": "rectanglelabels",
203+
"value": {
204+
"x": 30,
205+
"y": 30,
206+
"width": 10,
207+
"height": 10,
208+
"rotation": 0,
209+
"rectanglelabels": ["truck"],
210+
},
211+
"to_name": "image",
212+
"from_name": "label",
213+
"original_width": 1000,
214+
"original_height": 500,
215+
},
216+
],
217+
}
218+
],
219+
}
220+
]
221+
input_path = temp_out_dir / "input.json"
222+
input_path.write_text(json.dumps(input_payload))
223+
224+
converter = Converter(config=config, project_dir=PROJECT_DIR, download_resources=False)
225+
converter.convert_to_coco(str(input_path), str(temp_out_dir), output_image_dir=str(temp_out_dir / "images"), is_dir=False)
226+
227+
coco_path = next(temp_out_dir.glob("*.json"))
228+
coco = json.loads(coco_path.read_text())
229+
230+
category_id_by_name = {cat["name"]: cat["id"] for cat in coco["categories"]}
231+
assert category_id_by_name["car"] == 10
232+
assert category_id_by_name["person"] == 2
233+
assert category_id_by_name["truck"] == 7
234+
assert all(isinstance(cat["id"], int) for cat in coco["categories"])
235+
236+
annotation_category_ids = {ann["category_id"] for ann in coco["annotations"]}
237+
assert annotation_category_ids == {10, 7}

0 commit comments

Comments
 (0)