@@ -51,13 +51,16 @@ def list_assets(
5151 project_info = get_project (
5252 self .graphql_client , filters .project_id , ("inputType" , "jsonInterface" )
5353 )
54+ # TODO(LAB-4269): TEMPORARY WORKAROUND - Remove when backend handles jsonResponseUrl for LLM
55+ # Threshold for batching based on number of annotations.
5456 if project_info ["inputType" ] in {
55- "VIDEO" ,
5657 "LLM_RLHF" ,
5758 "LLM_INSTR_FOLLOWING" ,
5859 "LLM_STATIC" ,
59- "GEOSPATIAL" ,
6060 }:
61+ yield from self .llm_list_assets_split (filters , fields , options , project_info )
62+ return
63+ if project_info ["inputType" ] in {"VIDEO" , "GEOSPATIAL" }:
6164 yield from self .list_assets_split (filters , fields , options , project_info )
6265 return
6366
@@ -78,29 +81,66 @@ def list_assets(
7881
7982 yield from assets_gen
8083
81- def list_assets_split ( # pylint: disable=too-many-branches
84+ def list_assets_split (
8285 self ,
8386 filters : AssetFilters ,
8487 fields : ListOrTuple [str ],
8588 options : QueryOptions ,
8689 project_info ,
8790 ) -> Generator [dict , None , None ]:
88- """List assets with given options."""
89- # For LLM projects, we need to fetch annotations and rebuild jsonResponse
90- # because LLM projects don't have jsonResponseUrl
91- is_llm_project = project_info ["inputType" ] in {
92- "LLM_RLHF" ,
93- "LLM_INSTR_FOLLOWING" ,
94- "LLM_STATIC" ,
95- }
96-
91+ """List assets with given options for VIDEO and GEOSPATIAL projects."""
9792 assets_batch_max_amount = 10 if project_info ["inputType" ] == "VIDEO" else 50
9893 batch_size_to_use = min (options .batch_size , assets_batch_max_amount )
9994
100- # For LLM projects fetching annotations, adjust batch size based on annotation count
101- if is_llm_project and (
102- "labels.jsonResponse" in fields or "latestLabel.jsonResponse" in fields
103- ):
95+ options = QueryOptions (options .disable_tqdm , options .first , options .skip , batch_size_to_use )
96+
97+ requested_labels_json_response = "labels.jsonResponse" in fields
98+ requested_latest_label_json_response = "latestLabel.jsonResponse" in fields
99+
100+ required_fields = {"content" , "jsonContent" , "resolution.width" , "resolution.height" }
101+ fields = list (fields )
102+
103+ if requested_labels_json_response :
104+ required_fields .add ("labels.jsonResponseUrl" )
105+ if requested_latest_label_json_response :
106+ required_fields .add ("latestLabel.jsonResponseUrl" )
107+
108+ for field in required_fields :
109+ if field not in fields :
110+ fields .append (field )
111+
112+ fragment = fragment_builder (fields )
113+ query = get_assets_query (fragment )
114+ where = asset_where_mapper (filters )
115+ assets_gen = PaginatedGraphQLQuery (self .graphql_client ).execute_query_from_paginated_call (
116+ query , where , options , "Retrieving assets" , GQL_COUNT_ASSETS
117+ )
118+ assets_gen = (
119+ load_asset_json_fields (asset , fields , self .http_client ) for asset in assets_gen
120+ )
121+
122+ yield from assets_gen
123+
124+ def llm_list_assets_split (
125+ self ,
126+ filters : AssetFilters ,
127+ fields : ListOrTuple [str ],
128+ options : QueryOptions ,
129+ project_info ,
130+ ) -> Generator [dict , None , None ]:
131+ """List assets with given options for LLM projects.
132+
133+ This method handles the specific logic for LLM projects where jsonResponse
134+ needs to be rebuilt from annotations client-side.
135+ """
136+ assets_batch_max_amount = 50
137+ batch_size_to_use = min (options .batch_size , assets_batch_max_amount )
138+
139+ requested_labels_json_response = "labels.jsonResponse" in fields
140+ requested_latest_label_json_response = "latestLabel.jsonResponse" in fields
141+ needs_json_response = requested_labels_json_response or requested_latest_label_json_response
142+
143+ if needs_json_response :
104144 nb_annotations = self .count_assets_annotations (filters )
105145 batch_size = (
106146 1
@@ -112,33 +152,17 @@ def list_assets_split( # pylint: disable=too-many-branches
112152
113153 options = QueryOptions (options .disable_tqdm , options .first , options .skip , batch_size )
114154
115- requested_labels_json_response = "labels.jsonResponse" in fields
116- requested_latest_label_json_response = "latestLabel.jsonResponse" in fields
117- needs_json_response = requested_labels_json_response or requested_latest_label_json_response
118-
119155 required_fields = {"content" , "jsonContent" , "resolution.width" , "resolution.height" }
120156 fields = list (fields )
121157
122158 static_fragments = {}
123- if is_llm_project and needs_json_response :
124- # For LLM projects: fetch annotations and rebuild jsonResponse client-side
125- inner_annotation_fragment = get_annotation_fragment ()
126- annotation_fragment = f"""
127- annotations {{
128- { inner_annotation_fragment }
129- }}
130- """
131- static_fragments = {"labels" : annotation_fragment , "latestLabel" : annotation_fragment }
132-
133- fields = list (fields )
134- for field in required_fields :
135- if field not in fields :
136- fields .append (field )
137- else :
138- if requested_labels_json_response :
139- required_fields .add ("labels.jsonResponseUrl" )
140- if requested_latest_label_json_response :
141- required_fields .add ("latestLabel.jsonResponseUrl" )
159+ inner_annotation_fragment = get_annotation_fragment ()
160+ annotation_fragment = f"""
161+ annotations {{
162+ { inner_annotation_fragment }
163+ }}
164+ """
165+ static_fragments = {"labels" : annotation_fragment , "latestLabel" : annotation_fragment }
142166
143167 for field in required_fields :
144168 if field not in fields :
@@ -154,7 +178,7 @@ def list_assets_split( # pylint: disable=too-many-branches
154178 load_asset_json_fields (asset , fields , self .http_client ) for asset in assets_gen
155179 )
156180
157- if is_llm_project and needs_json_response :
181+ if needs_json_response :
158182 # Rebuild jsonResponse from annotations for LLM projects
159183 converter = AnnotationsToJsonResponseConverter (
160184 json_interface = project_info ["jsonInterface" ],
0 commit comments