Skip to content

Commit 3e777ee

Browse files
FannyGaudinbaptiste-olivier
authored andcommitted
feat(LAB-3244): on LLM dynamic projects export annotations at conversation level
1 parent c2bb207 commit 3e777ee

4 files changed

Lines changed: 131 additions & 22 deletions

File tree

src/kili/llm/services/export/dynamic.py

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def export(
5050
chat_items = label["chatItems"]
5151
annotations = label["annotations"]
5252
rounds = self._build_rounds(chat_items, annotations, json_interface)
53+
total_rounds = len(rounds)
5354
for step, round in enumerate(rounds):
5455
raw_data = _format_raw_data(
5556
round["context"]
@@ -59,25 +60,30 @@ def export(
5960
label["id"],
6061
obfuscated_models,
6162
)
63+
formatted_response = _format_json_response(
64+
json_interface["jobs"],
65+
round["annotations"],
66+
round["completion"],
67+
obfuscated_models,
68+
)
69+
label_data = {
70+
"author": label["author"]["email"],
71+
"created_at": label["createdAt"],
72+
"label_type": label["labelType"],
73+
"label": {},
74+
}
75+
if formatted_response["turn"]:
76+
label_data["label"]["turn"] = formatted_response["turn"]
77+
if step == total_rounds - 1 and formatted_response["conversation"]:
78+
label_data["label"]["conversation"] = formatted_response["conversation"]
79+
6280
result[f"{step}"] = {
6381
"external_id": asset["externalId"],
6482
"metadata": asset["jsonMetadata"],
6583
"models": _format_models_object(
6684
asset["assetProjectModels"], obfuscated_models
6785
),
68-
"labels": [
69-
{
70-
"author": label["author"]["email"],
71-
"created_at": label["createdAt"],
72-
"label_type": label["labelType"],
73-
"label": _format_json_response(
74-
json_interface["jobs"],
75-
round["annotations"],
76-
round["completion"],
77-
obfuscated_models,
78-
),
79-
}
80-
],
86+
"labels": [label_data],
8187
"raw_data": raw_data,
8288
"status": asset["status"],
8389
}
@@ -173,6 +179,9 @@ def _build_rounds(self, chat_items, annotations, json_interface):
173179
has_children = True
174180
parent_target = chat_item["id"]
175181

182+
current_round["annotations"] += [
183+
annotation for annotation in annotations if annotation["chatItemId"] is None
184+
]
176185
rounds.append(current_round)
177186
new_context = (
178187
current_round["context"]
@@ -191,6 +200,9 @@ def _build_rounds(self, chat_items, annotations, json_interface):
191200

192201
raise ValueError(f"Role {node['role']} not supported")
193202
if current_round["prompt"] is not None:
203+
current_round["annotations"] += [
204+
annotation for annotation in annotations if annotation["chatItemId"] is None
205+
]
194206
rounds.append(current_round)
195207
return rounds
196208

@@ -225,8 +237,8 @@ def _format_comparison_annotation(annotation, completions, job, obfuscated_model
225237

226238
def _format_json_response(
227239
jobs_config: Dict, annotations: List[Dict], completions: List[Dict], obfuscated_models: Dict
228-
) -> Dict[str, Union[str, List[str]]]:
229-
result = {}
240+
) -> Dict[str, Dict[str, Union[str, List[str]]]]:
241+
result = {"turn": {}, "conversation": {}}
230242
for annotation in annotations:
231243
formatted_response = None
232244
job = jobs_config[annotation["job"]]
@@ -243,8 +255,10 @@ def _format_json_response(
243255
logging.warning(
244256
f"Annotation with job {annotation['job']} with mlTask {job['mlTask']} not supported. Ignored in the export."
245257
)
258+
elif "level" in job and job["level"] == "conversation":
259+
result["conversation"][annotation["job"]] = formatted_response
246260
else:
247-
result[annotation["job"]] = formatted_response
261+
result["turn"][annotation["job"]] = formatted_response
248262

249263
return result
250264

src/kili/llm/services/export/static.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def _process_llm_v1(
6464

6565
def _format_json_response(
6666
jobs_config: Dict, json_response: Dict
67-
) -> Dict[str, Union[str, List[str]]]:
67+
) -> Dict[str, Dict[str, Union[str, List[str]]]]:
6868
result = {}
6969
for job_name, job_value in json_response.items():
7070
job_config = jobs_config[job_name]
@@ -83,6 +83,8 @@ def _format_json_response(
8383
result[job_name] = job_value["text"]
8484
else:
8585
logging.warning(f"Job {job_name} with mlTask {job_config['mlTask']} not supported")
86+
if len(result) != 0:
87+
return {"conversation": result}
8688
return result
8789

8890

tests/unit/llm/services/export/test_dynamic.py

Lines changed: 96 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import copy
2+
13
import pytest
24

35
from kili.llm.presentation.client.llm import LlmClientMethods
@@ -278,7 +280,9 @@
278280
"author": "test+admin@kili-technology.com",
279281
"created_at": "2024-08-06T12:30:42.122Z",
280282
"label_type": "DEFAULT",
281-
"label": {"COMPARISON_JOB": "A_3", "CLASSIFICATION_JOB": ["BOTH_ARE_GOOD"]},
283+
"label": {
284+
"turn": {"COMPARISON_JOB": "A_3", "CLASSIFICATION_JOB": ["BOTH_ARE_GOOD"]},
285+
},
282286
}
283287
],
284288
},
@@ -353,7 +357,9 @@
353357
"author": "test+admin@kili-technology.com",
354358
"created_at": "2024-08-06T12:30:42.122Z",
355359
"label_type": "DEFAULT",
356-
"label": {"COMPARISON_JOB": "B_1"},
360+
"label": {
361+
"turn": {"COMPARISON_JOB": "B_1"},
362+
},
357363
}
358364
],
359365
},
@@ -442,7 +448,9 @@
442448
"author": "test+admin@kili-technology.com",
443449
"created_at": "2024-08-06T12:30:42.122Z",
444450
"label_type": "DEFAULT",
445-
"label": {"COMPARISON_JOB": "A_2"},
451+
"label": {
452+
"turn": {"COMPARISON_JOB": "A_2"},
453+
},
446454
}
447455
],
448456
},
@@ -616,3 +624,88 @@ def test_export_dynamic_empty_json_interface(mocker):
616624
kili_llm.export(
617625
project_id="project_id",
618626
)
627+
628+
629+
def test_export_dynamic_with_conversation_level(mocker):
630+
updated_mock_json_interface = copy.deepcopy(mock_json_interface)
631+
632+
updated_mock_json_interface["jobs"].update(
633+
{
634+
"CLASSIFICATION_JOB_0": {
635+
"content": {
636+
"categories": {
637+
"GOOD": {"children": [], "name": "Good", "id": "category7"},
638+
"BAD": {"children": [], "name": "Bad", "id": "category8"},
639+
},
640+
"input": "radio",
641+
},
642+
"level": "conversation",
643+
"instruction": "Overall quality",
644+
"mlTask": "CLASSIFICATION",
645+
"required": 1,
646+
"isChild": False,
647+
"isNew": False,
648+
},
649+
"TRANSCRIPTION_JOB": {
650+
"content": {"input": "textField"},
651+
"level": "conversation",
652+
"instruction": "Write something about the overall quality",
653+
"mlTask": "TRANSCRIPTION",
654+
"required": 1,
655+
"isChild": False,
656+
"isNew": False,
657+
},
658+
}
659+
)
660+
661+
updated_mock_fetch_assets = copy.deepcopy(mock_fetch_assets)
662+
updated_mock_fetch_assets[0]["labels"][0]["annotations"].extend(
663+
[
664+
{
665+
"id": "20241025134207822-9",
666+
"job": "CLASSIFICATION_JOB_0",
667+
"path": [],
668+
"labelId": "clzief6q2003e7tc91jm46uii",
669+
"chatItemId": None,
670+
"annotationValue": {
671+
"categories": ["GOOD"],
672+
},
673+
"__typename": "ClassificationAnnotation",
674+
},
675+
{
676+
"id": "20241025134209366-10",
677+
"job": "TRANSCRIPTION_JOB",
678+
"path": [],
679+
"labelId": "clzief6q2003e7tc91jm46uii",
680+
"chatItemId": None,
681+
"annotationValue": {
682+
"text": "something",
683+
},
684+
"__typename": "TranscriptionAnnotation",
685+
},
686+
]
687+
)
688+
689+
updated_expected_export = copy.deepcopy(expected_export)
690+
updated_expected_export[0]["2"]["labels"][0]["label"]["conversation"] = {
691+
"CLASSIFICATION_JOB_0": ["GOOD"],
692+
"TRANSCRIPTION_JOB": "something",
693+
}
694+
get_project_return_val = {
695+
"jsonInterface": updated_mock_json_interface,
696+
"inputType": "LLM_INSTR_FOLLOWING",
697+
"title": "Test project",
698+
"id": "project_id",
699+
"dataConnections": None,
700+
}
701+
kili_api_gateway = mocker.MagicMock()
702+
kili_api_gateway.count_assets.return_value = 3
703+
kili_api_gateway.get_project.return_value = get_project_return_val
704+
kili_api_gateway.list_assets.return_value = updated_mock_fetch_assets
705+
706+
kili_llm = LlmClientMethods(kili_api_gateway)
707+
708+
result = kili_llm.export(
709+
project_id="project_id",
710+
)
711+
assert result == updated_expected_export

tests/unit/llm/services/export/test_static.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
"author": "test+admin@kili-technology.com",
6161
"created_at": "2024-08-05T13:03:00.051Z",
6262
"label_type": "DEFAULT",
63-
"label": {"CLASSIFICATION_JOB": ["A_BETTER_THAN_B"]},
63+
"label": {"conversation": {"CLASSIFICATION_JOB": ["A_BETTER_THAN_B"]}},
6464
}
6565
],
6666
},
@@ -117,7 +117,7 @@
117117
"author": "test+admin@kili-technology.com",
118118
"created_at": "2024-08-05T13:03:03.061Z",
119119
"label_type": "DEFAULT",
120-
"label": {"CLASSIFICATION_JOB": ["B_BETTER_THAN_A"]},
120+
"label": {"conversation": {"CLASSIFICATION_JOB": ["B_BETTER_THAN_A"]}},
121121
}
122122
],
123123
},

0 commit comments

Comments
 (0)