Skip to content

Commit 81b107b

Browse files
authored
Merge pull request #1931 from kili-technology/feature/lab-3719-aau-i-can-upload-geojson-labels-directly-without-conversion
feat(LAB-3719): support automatic and specific job mapping for GeoJSON imports
2 parents fdf59c5 + 1083529 commit 81b107b

3 files changed

Lines changed: 903 additions & 13 deletions

File tree

src/kili/presentation/client/label.py

Lines changed: 100 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@
4848
from kili.use_cases.label.process_shapefiles import get_json_response_from_shapefiles
4949
from kili.use_cases.label.types import LabelToCreateUseCaseInput
5050
from kili.use_cases.project.project import ProjectUseCases
51+
from kili.utils.labels.geojson import (
52+
enrich_geojson_with_kili_properties,
53+
enrich_geojson_with_specific_mapping,
54+
)
5155
from kili.utils.labels.parsing import ParsedLabel
5256
from kili.utils.logcontext import for_all_methods, log_call
5357

@@ -1402,49 +1406,132 @@ def append_labels_from_shapefiles(
14021406
asset_external_id_array=[asset_external_id],
14031407
)
14041408

1409+
# pylint: disable=too-many-branches
14051410
@typechecked
14061411
def append_labels_from_geojson_files(
14071412
self,
14081413
project_id: str,
14091414
asset_external_id: str,
14101415
geojson_file_paths: List[str],
1416+
job_names: Optional[List[str]] = None,
1417+
category_names: Optional[List[str]] = None,
14111418
):
14121419
"""Import and convert GeoJSON files into annotations for a specific asset in a Kili project.
14131420
14141421
This method processes GeoJSON feature collections, converts them to the appropriate
14151422
Kili annotation format, and appends them as labels to the specified asset.
1416-
The GeoJSON features must contain job names and category information in their properties.
1423+
1424+
Three modes of import are supported:
1425+
1426+
1. **With `kili` properties**: GeoJSON features contain 'kili' metadata in their properties
1427+
with job, type, and category information.
1428+
1429+
2. **With specific job/category names**: Provide `job_names` and `category_names` to map
1430+
all compatible geometries from each file to the specified job and category.
1431+
1432+
3. **Automatic mapping**: When no 'kili' properties or specific names are provided,
1433+
geometries are automatically mapped based on type and available jobs.
14171434
14181435
Args:
14191436
project_id: The ID of the Kili project to add the labels to.
14201437
asset_external_id: The external ID of the asset to label.
14211438
geojson_file_paths: List of file paths to the GeoJSON files to be processed.
1422-
Each file should contain a FeatureCollection with features that have
1423-
'kili' metadata in their properties, including 'job', 'type', and 'categories'.
1439+
Each file should contain a FeatureCollection with features.
1440+
job_names: Optional list of job names in the Kili project, one for each GeoJSON file.
1441+
When provided, all compatible geometries from the corresponding file will be
1442+
mapped to this job. Must have the same length as `geojson_file_paths`.
1443+
category_names: Optional list of category names, one for each GeoJSON file.
1444+
When provided, all geometries from the corresponding file will be assigned
1445+
to this category. Must have the same length as `geojson_file_paths`.
1446+
Each category must exist in the corresponding job's ontology.
14241447
14251448
Note:
1426-
The GeoJSON features must contain a 'kili' property with the following structure:
1427-
- 'job': The name of the job in the Kili project
1428-
- 'type': The annotation type ('marker', 'polyline', 'semantic', etc.)
1429-
- 'categories': List of category objects with 'name' field
1449+
**Geometry-to-job compatibility:**
1450+
- Point geometries → jobs with 'marker' tool
1451+
- LineString geometries → jobs with 'polyline' tool
1452+
- Polygon geometries → jobs with 'polygon' or 'semantic' tool
1453+
- MultiPolygon geometries → jobs with 'semantic' tool
1454+
1455+
**GeoJSON 'kili' properties structure (Mode 1):**
1456+
```json
1457+
{
1458+
"properties": {
1459+
"kili": {
1460+
"job": "job_name",
1461+
"type": "marker|polyline|polygon|semantic",
1462+
"categories": [{"name": "category_name"}]
1463+
}
1464+
}
1465+
}
1466+
```
14301467
1431-
Supported geometries: Point, LineString, Polygon, and MultiPolygon.
1432-
Polygon and MultiPolygon are always mapped to semantic segmentation jobs in Kili.
1468+
**Automatic mapping priority (Mode 3):**
1469+
- Point → first available 'marker' job
1470+
- LineString → first available 'polyline' job
1471+
- Polygon → first available 'polygon' job, fallback to 'semantic'
1472+
- MultiPolygon → first available 'semantic' job
14331473
14341474
Examples:
1475+
Mode 1 - With kili properties in GeoJSON:
14351476
>>> kili.append_labels_from_geojson_files(
14361477
project_id="project_id",
14371478
asset_external_id="asset_1",
14381479
geojson_file_paths=["annotations.geojson"]
14391480
)
1481+
1482+
Mode 2 - With specific job/category mapping:
1483+
>>> kili.append_labels_from_geojson_files(
1484+
project_id="project_id",
1485+
asset_external_id="asset_1",
1486+
geojson_file_paths=["points.geojson", "polygons.geojson"],
1487+
job_names=["MARKERS", "POLYGONS"],
1488+
category_names=["BUILDING", "ROAD"]
1489+
)
1490+
1491+
Mode 3 - Automatic mapping:
1492+
>>> kili.append_labels_from_geojson_files(
1493+
project_id="project_id",
1494+
asset_external_id="asset_1",
1495+
geojson_file_paths=["mixed_geometries.geojson"]
1496+
)
14401497
"""
1498+
if job_names is not None and category_names is not None:
1499+
if len(job_names) != len(geojson_file_paths):
1500+
raise ValueError("job_names must have the same length as geojson_file_paths")
1501+
if len(category_names) != len(geojson_file_paths):
1502+
raise ValueError("category_names must have the same length as geojson_file_paths")
1503+
if len(job_names) != len(category_names):
1504+
raise ValueError("job_names and category_names must have the same length")
1505+
elif job_names is not None or category_names is not None:
1506+
raise ValueError(
1507+
"Both job_names and category_names must be provided together, or both must be None"
1508+
)
1509+
1510+
json_interface = self.kili_api_gateway.get_project(
1511+
ProjectId(project_id), ("jsonInterface",)
1512+
)["jsonInterface"]
1513+
14411514
merged_json_response = {}
14421515

1443-
for file_path in geojson_file_paths:
1444-
with open(file_path, encoding="utf-8") as f:
1445-
feature_collection = json.load(f)
1516+
for file_index, file_path in enumerate(geojson_file_paths):
1517+
with open(file_path, encoding="utf-8") as file:
1518+
feature_collection = json.load(file)
1519+
1520+
if job_names is not None and category_names is not None:
1521+
enriched_feature_collection = enrich_geojson_with_specific_mapping(
1522+
feature_collection,
1523+
json_interface,
1524+
job_names[file_index],
1525+
category_names[file_index],
1526+
)
1527+
else:
1528+
enriched_feature_collection = enrich_geojson_with_kili_properties(
1529+
feature_collection, json_interface
1530+
)
14461531

1447-
json_response = geojson_feature_collection_to_kili_json_response(feature_collection)
1532+
json_response = geojson_feature_collection_to_kili_json_response(
1533+
enriched_feature_collection
1534+
)
14481535

14491536
for job_name, job_data in json_response.items():
14501537
if job_name not in merged_json_response:

src/kili/utils/labels/geojson.py

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
"""Utility functions to enrich GeoJSON features with Kili properties."""
2+
3+
from typing import Dict
4+
5+
6+
# pylint: disable=too-many-statements,too-many-branches
7+
def enrich_geojson_with_kili_properties(feature_collection: Dict, json_interface: Dict) -> Dict:
8+
"""Enrich GeoJSON features with kili properties when missing.
9+
10+
Args:
11+
feature_collection: The GeoJSON feature collection to enrich
12+
json_interface: The project's JSON interface containing job definitions
13+
14+
Returns:
15+
The enriched feature collection with kili properties added where needed
16+
"""
17+
object_detection_jobs = {}
18+
if "jobs" in json_interface:
19+
for job_name, job_config in json_interface["jobs"].items():
20+
if job_config.get("mlTask") == "OBJECT_DETECTION":
21+
object_detection_jobs[job_name] = job_config
22+
23+
marker_job = None
24+
polyline_job = None
25+
polygon_job = None
26+
semantic_job = None
27+
28+
for job_name, job_config in object_detection_jobs.items():
29+
tools = job_config.get("tools", [])
30+
if "marker" in tools and marker_job is None:
31+
marker_job = (job_name, job_config)
32+
if "polyline" in tools and polyline_job is None:
33+
polyline_job = (job_name, job_config)
34+
if "polygon" in tools and polygon_job is None:
35+
polygon_job = (job_name, job_config)
36+
if "semantic" in tools and semantic_job is None:
37+
semantic_job = (job_name, job_config)
38+
39+
def get_first_category_name(job_config):
40+
categories = job_config.get("content", {}).get("categories", {})
41+
if categories:
42+
first_category_key = next(iter(categories.keys()))
43+
return first_category_key
44+
return None
45+
46+
def create_kili_property(job_name, job_config, annotation_type):
47+
category_name = get_first_category_name(job_config)
48+
if category_name:
49+
return {
50+
"kili": {
51+
"children": {},
52+
"categories": [{"name": category_name}],
53+
"type": annotation_type,
54+
"job": job_name,
55+
}
56+
}
57+
return None
58+
59+
enriched_features = []
60+
61+
for feature in feature_collection.get("features", []):
62+
# Skip if feature already has kili properties
63+
if feature.get("properties", {}).get("kili") is not None:
64+
enriched_features.append(feature)
65+
continue
66+
67+
# Skip features with null geometry unless they have kili properties
68+
if feature.get("geometry") is None:
69+
continue
70+
71+
geometry_type = feature.get("geometry", {}).get("type")
72+
kili_property = None
73+
74+
if geometry_type == "Point" and marker_job:
75+
job_name, job_config = marker_job
76+
kili_property = create_kili_property(job_name, job_config, "marker")
77+
78+
elif geometry_type == "LineString" and polyline_job:
79+
job_name, job_config = polyline_job
80+
kili_property = create_kili_property(job_name, job_config, "polyline")
81+
82+
elif geometry_type == "Polygon":
83+
if polygon_job:
84+
job_name, job_config = polygon_job
85+
kili_property = create_kili_property(job_name, job_config, "polygon")
86+
elif semantic_job:
87+
job_name, job_config = semantic_job
88+
kili_property = create_kili_property(job_name, job_config, "semantic")
89+
90+
elif geometry_type == "MultiPolygon" and semantic_job:
91+
job_name, job_config = semantic_job
92+
kili_property = create_kili_property(job_name, job_config, "semantic")
93+
94+
if kili_property:
95+
if "properties" not in feature:
96+
feature["properties"] = {}
97+
feature["properties"].update(kili_property)
98+
enriched_features.append(feature)
99+
else:
100+
continue
101+
102+
enriched_collection = feature_collection.copy()
103+
enriched_collection["features"] = enriched_features
104+
return enriched_collection
105+
106+
107+
# pylint: disable=too-many-branches
108+
def enrich_geojson_with_specific_mapping(
109+
feature_collection: Dict, json_interface: Dict, target_job_name: str, target_category_name: str
110+
) -> Dict:
111+
"""Enrich GeoJSON features with specific job and category mapping.
112+
113+
Args:
114+
feature_collection: The GeoJSON feature collection to enrich
115+
json_interface: The project's JSON interface containing job definitions
116+
target_job_name: The specific job name to map geometries to
117+
target_category_name: The specific category name to assign to all geometries
118+
119+
Returns:
120+
The enriched feature collection with kili properties added
121+
"""
122+
target_job_config = None
123+
if "jobs" in json_interface:
124+
target_job_config = json_interface["jobs"].get(target_job_name)
125+
126+
if not target_job_config:
127+
raise ValueError(f"Job '{target_job_name}' not found in project")
128+
129+
if target_job_config.get("mlTask") != "OBJECT_DETECTION":
130+
raise ValueError(f"Job '{target_job_name}' is not an OBJECT_DETECTION job")
131+
132+
categories = target_job_config.get("content", {}).get("categories", {})
133+
if target_category_name not in categories:
134+
raise ValueError(f"Category '{target_category_name}' not found in job '{target_job_name}'")
135+
136+
tools = target_job_config.get("tools", [])
137+
138+
def create_kili_property(annotation_type: str):
139+
return {
140+
"kili": {
141+
"children": {},
142+
"categories": [{"name": target_category_name}],
143+
"type": annotation_type,
144+
"job": target_job_name,
145+
}
146+
}
147+
148+
enriched_features = []
149+
150+
for feature in feature_collection.get("features", []):
151+
# Skip features with null geometry
152+
if feature.get("geometry") is None:
153+
continue
154+
155+
geometry_type = feature.get("geometry", {}).get("type")
156+
kili_property = None
157+
158+
# Map geometry types to annotation types based on available tools
159+
if geometry_type == "Point" and "marker" in tools:
160+
kili_property = create_kili_property("marker")
161+
elif geometry_type == "LineString" and "polyline" in tools:
162+
kili_property = create_kili_property("polyline")
163+
elif geometry_type == "Polygon":
164+
if "polygon" in tools:
165+
kili_property = create_kili_property("polygon")
166+
elif "semantic" in tools:
167+
kili_property = create_kili_property("semantic")
168+
elif geometry_type == "MultiPolygon" and "semantic" in tools:
169+
kili_property = create_kili_property("semantic")
170+
171+
if kili_property:
172+
if "properties" not in feature:
173+
feature["properties"] = {}
174+
feature["properties"].update(kili_property)
175+
enriched_features.append(feature)
176+
177+
enriched_collection = feature_collection.copy()
178+
enriched_collection["features"] = enriched_features
179+
return enriched_collection

0 commit comments

Comments
 (0)