Skip to content

Commit b0e5dd3

Browse files
committed
fix(LAB-4269): isolated LLM annotations specific code in method
1 parent ef614db commit b0e5dd3

2 files changed

Lines changed: 100 additions & 54 deletions

File tree

src/kili/adapters/kili_api_gateway/asset/operations_mixin.py

Lines changed: 64 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,16 @@ def list_assets(
5151
project_info = get_project(
5252
self.graphql_client, filters.project_id, ("inputType", "jsonInterface")
5353
)
54+
# TODO(LAB-4269): TEMPORARY WORKAROUND - Remove when backend handles jsonResponseUrl for LLM
55+
# Threshold for batching based on number of annotations.
5456
if project_info["inputType"] in {
55-
"VIDEO",
5657
"LLM_RLHF",
5758
"LLM_INSTR_FOLLOWING",
5859
"LLM_STATIC",
59-
"GEOSPATIAL",
6060
}:
61+
yield from self.llm_list_assets_split(filters, fields, options, project_info)
62+
return
63+
if project_info["inputType"] in {"VIDEO", "GEOSPATIAL"}:
6164
yield from self.list_assets_split(filters, fields, options, project_info)
6265
return
6366

@@ -78,29 +81,66 @@ def list_assets(
7881

7982
yield from assets_gen
8083

81-
def list_assets_split( # pylint: disable=too-many-branches
84+
def list_assets_split(
8285
self,
8386
filters: AssetFilters,
8487
fields: ListOrTuple[str],
8588
options: QueryOptions,
8689
project_info,
8790
) -> Generator[dict, None, None]:
88-
"""List assets with given options."""
89-
# For LLM projects, we need to fetch annotations and rebuild jsonResponse
90-
# because LLM projects don't have jsonResponseUrl
91-
is_llm_project = project_info["inputType"] in {
92-
"LLM_RLHF",
93-
"LLM_INSTR_FOLLOWING",
94-
"LLM_STATIC",
95-
}
96-
91+
"""List assets with given options for VIDEO and GEOSPATIAL projects."""
9792
assets_batch_max_amount = 10 if project_info["inputType"] == "VIDEO" else 50
9893
batch_size_to_use = min(options.batch_size, assets_batch_max_amount)
9994

100-
# For LLM projects fetching annotations, adjust batch size based on annotation count
101-
if is_llm_project and (
102-
"labels.jsonResponse" in fields or "latestLabel.jsonResponse" in fields
103-
):
95+
options = QueryOptions(options.disable_tqdm, options.first, options.skip, batch_size_to_use)
96+
97+
requested_labels_json_response = "labels.jsonResponse" in fields
98+
requested_latest_label_json_response = "latestLabel.jsonResponse" in fields
99+
100+
required_fields = {"content", "jsonContent", "resolution.width", "resolution.height"}
101+
fields = list(fields)
102+
103+
if requested_labels_json_response:
104+
required_fields.add("labels.jsonResponseUrl")
105+
if requested_latest_label_json_response:
106+
required_fields.add("latestLabel.jsonResponseUrl")
107+
108+
for field in required_fields:
109+
if field not in fields:
110+
fields.append(field)
111+
112+
fragment = fragment_builder(fields)
113+
query = get_assets_query(fragment)
114+
where = asset_where_mapper(filters)
115+
assets_gen = PaginatedGraphQLQuery(self.graphql_client).execute_query_from_paginated_call(
116+
query, where, options, "Retrieving assets", GQL_COUNT_ASSETS
117+
)
118+
assets_gen = (
119+
load_asset_json_fields(asset, fields, self.http_client) for asset in assets_gen
120+
)
121+
122+
yield from assets_gen
123+
124+
def llm_list_assets_split(
125+
self,
126+
filters: AssetFilters,
127+
fields: ListOrTuple[str],
128+
options: QueryOptions,
129+
project_info,
130+
) -> Generator[dict, None, None]:
131+
"""List assets with given options for LLM projects.
132+
133+
This method handles the specific logic for LLM projects where jsonResponse
134+
needs to be rebuilt from annotations client-side.
135+
"""
136+
assets_batch_max_amount = 50
137+
batch_size_to_use = min(options.batch_size, assets_batch_max_amount)
138+
139+
requested_labels_json_response = "labels.jsonResponse" in fields
140+
requested_latest_label_json_response = "latestLabel.jsonResponse" in fields
141+
needs_json_response = requested_labels_json_response or requested_latest_label_json_response
142+
143+
if needs_json_response:
104144
nb_annotations = self.count_assets_annotations(filters)
105145
batch_size = (
106146
1
@@ -112,33 +152,17 @@ def list_assets_split( # pylint: disable=too-many-branches
112152

113153
options = QueryOptions(options.disable_tqdm, options.first, options.skip, batch_size)
114154

115-
requested_labels_json_response = "labels.jsonResponse" in fields
116-
requested_latest_label_json_response = "latestLabel.jsonResponse" in fields
117-
needs_json_response = requested_labels_json_response or requested_latest_label_json_response
118-
119155
required_fields = {"content", "jsonContent", "resolution.width", "resolution.height"}
120156
fields = list(fields)
121157

122158
static_fragments = {}
123-
if is_llm_project and needs_json_response:
124-
# For LLM projects: fetch annotations and rebuild jsonResponse client-side
125-
inner_annotation_fragment = get_annotation_fragment()
126-
annotation_fragment = f"""
127-
annotations {{
128-
{inner_annotation_fragment}
129-
}}
130-
"""
131-
static_fragments = {"labels": annotation_fragment, "latestLabel": annotation_fragment}
132-
133-
fields = list(fields)
134-
for field in required_fields:
135-
if field not in fields:
136-
fields.append(field)
137-
else:
138-
if requested_labels_json_response:
139-
required_fields.add("labels.jsonResponseUrl")
140-
if requested_latest_label_json_response:
141-
required_fields.add("latestLabel.jsonResponseUrl")
159+
inner_annotation_fragment = get_annotation_fragment()
160+
annotation_fragment = f"""
161+
annotations {{
162+
{inner_annotation_fragment}
163+
}}
164+
"""
165+
static_fragments = {"labels": annotation_fragment, "latestLabel": annotation_fragment}
142166

143167
for field in required_fields:
144168
if field not in fields:
@@ -154,7 +178,7 @@ def list_assets_split( # pylint: disable=too-many-branches
154178
load_asset_json_fields(asset, fields, self.http_client) for asset in assets_gen
155179
)
156180

157-
if is_llm_project and needs_json_response:
181+
if needs_json_response:
158182
# Rebuild jsonResponse from annotations for LLM projects
159183
converter = AnnotationsToJsonResponseConverter(
160184
json_interface=project_info["jsonInterface"],

src/kili/adapters/kili_api_gateway/label/operations_mixin.py

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,16 @@ def list_labels(
5656
project_info = get_project(
5757
self.graphql_client, filters.project_id, ("inputType", "jsonInterface")
5858
)
59+
# TODO(LAB-4269): TEMPORARY WORKAROUND - Remove when backend handles jsonResponseUrl for LLM
60+
# Threshold for batching based on number of annotations.
5961
if project_info["inputType"] in {
60-
"GEOSPATIAL",
61-
"VIDEO",
6262
"LLM_RLHF",
6363
"LLM_INSTR_FOLLOWING",
6464
"LLM_STATIC",
6565
}:
66+
yield from self.llm_list_labels_split(filters, fields, options, project_info)
67+
return
68+
if project_info["inputType"] in {"GEOSPATIAL", "VIDEO"}:
6669
yield from self.list_labels_split(filters, fields, options, project_info)
6770
return
6871

@@ -84,24 +87,45 @@ def list_labels_split(
8487
options: QueryOptions,
8588
project_info,
8689
) -> Generator[dict, None, None]:
87-
"""List labels."""
90+
"""List labels for VIDEO and GEOSPATIAL projects."""
8891
if project_info["inputType"] == "VIDEO":
8992
options = QueryOptions(
9093
options.disable_tqdm, options.first, options.skip, min(options.batch_size, 20)
9194
)
9295

93-
# For LLM projects, we need to fetch annotations and rebuild jsonResponse
94-
# because LLM projects don't have jsonResponseUrl
95-
is_llm_project = project_info["inputType"] in {
96-
"LLM_RLHF",
97-
"LLM_INSTR_FOLLOWING",
98-
"LLM_STATIC",
99-
}
96+
fields = list(fields)
97+
98+
if "jsonResponse" in fields and "jsonResponseUrl" not in fields:
99+
fields.append("jsonResponseUrl")
100+
fragment = fragment_builder(fields)
101+
query = get_labels_query(fragment)
102+
where = label_where_mapper(filters)
103+
labels_gen = PaginatedGraphQLQuery(self.graphql_client).execute_query_from_paginated_call(
104+
query, where, options, "Retrieving labels", GQL_COUNT_LABELS
105+
)
106+
labels_gen = (
107+
load_label_json_fields(label, fields, self.http_client) for label in labels_gen
108+
)
109+
110+
yield from labels_gen
111+
112+
def llm_list_labels_split(
113+
self,
114+
filters: LabelFilters,
115+
fields: ListOrTuple[str],
116+
options: QueryOptions,
117+
project_info,
118+
) -> Generator[dict, None, None]:
119+
"""List labels for LLM projects.
120+
121+
This method handles the specific logic for LLM projects where jsonResponse
122+
needs to be rebuilt from annotations client-side.
123+
"""
100124
needs_json_response = "jsonResponse" in fields
101125

102126
fields = list(fields)
103127

104-
if is_llm_project and needs_json_response:
128+
if needs_json_response:
105129
# For LLM projects: fetch annotations and rebuild jsonResponse client-side
106130
inner_annotation_fragment = get_annotation_fragment()
107131
full_fragment = f"""
@@ -111,8 +135,6 @@ def list_labels_split(
111135
}}
112136
"""
113137
else:
114-
if "jsonResponse" in fields and "jsonResponseUrl" not in fields:
115-
fields.append("jsonResponseUrl")
116138
full_fragment = fragment_builder(fields)
117139

118140
query = get_labels_query(full_fragment)
@@ -124,7 +146,7 @@ def list_labels_split(
124146
load_label_json_fields(label, fields, self.http_client) for label in labels_gen
125147
)
126148

127-
if is_llm_project and needs_json_response:
149+
if needs_json_response:
128150
# Rebuild jsonResponse from annotations for LLM projects
129151
converter = AnnotationsToJsonResponseConverter(
130152
json_interface=project_info["jsonInterface"],

0 commit comments

Comments
 (0)