Skip to content

Commit 1e84723

Browse files
FannyGaudinbaptiste-olivier
authored andcommitted
feat(LAB-3105): add tests on llm_v1 format
1 parent de80a02 commit 1e84723

11 files changed

Lines changed: 539 additions & 160 deletions

File tree

src/kili/entrypoints/cli/project/export.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ def export_labels(
123123
asset_filter_kwargs=None,
124124
normalized_coordinates=normalized_coordinates,
125125
label_type_in=None,
126+
include_sent_back_labels=None,
126127
)
127128
except NoCompatibleJobError as excp:
128129
print(str(excp))

src/kili/llm/services/export/dynamic.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,6 @@
2929
"modelName",
3030
]
3131

32-
ASSET_NEEDED_FIELDS = [
33-
"assetProjectModels.id",
34-
"assetProjectModels.projectModelId",
35-
"assetProjectModels.configuration",
36-
"content",
37-
"externalId",
38-
"jsonMetadata",
39-
*(f"labels.{field}" for field in LABELS_NEEDED_FIELDS),
40-
"status",
41-
]
42-
4332

4433
class LLMDynamicExporter:
4534
"""Handle exports of LLM_RLHF projects."""

src/kili/presentation/client/label.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1138,7 +1138,7 @@ def export_labels(
11381138
asset_filter_kwargs: Optional[Dict[str, object]] = None,
11391139
normalized_coordinates: Optional[bool] = None,
11401140
label_type_in: Optional[List[str]] = None,
1141-
include_sent_back_labels: Optional[bool] = True,
1141+
include_sent_back_labels: Optional[bool] = None,
11421142
) -> Optional[List[Dict[str, Union[List[str], str]]]]:
11431143
# pylint: disable=line-too-long
11441144
"""Export the project labels with the requested format into the requested output path.

src/kili/services/export/__init__.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,17 @@ def export_labels( # pylint: disable=too-many-arguments, too-many-locals
3939
asset_filter_kwargs: Optional[Dict[str, object]],
4040
normalized_coordinates: Optional[bool],
4141
label_type_in: Optional[List[str]],
42-
include_sent_back_labels: Optional[bool] = True,
42+
include_sent_back_labels: Optional[bool],
4343
) -> Optional[List[Dict[str, Union[List[str], str]]]]:
4444
"""Export the selected assets into the required format, and save it into a file archive."""
4545
kili.kili_api_gateway.get_project(project_id, ["id"])
4646

47+
include_sent_back_labels = (
48+
include_sent_back_labels
49+
if include_sent_back_labels is not None
50+
else (label_format != "llm_v1")
51+
)
52+
4753
export_params = ExportParams(
4854
assets_ids=asset_ids,
4955
project_id=project_id,
@@ -57,7 +63,7 @@ def export_labels( # pylint: disable=too-many-arguments, too-many-locals
5763
asset_filter_kwargs=asset_filter_kwargs,
5864
normalized_coordinates=normalized_coordinates,
5965
label_type_in=label_type_in,
60-
include_sent_back_labels=include_sent_back_labels if label_format != "llm_v1" else False,
66+
include_sent_back_labels=include_sent_back_labels,
6167
)
6268

6369
logger = get_logger(log_level)

src/kili/services/export/format/base.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,8 +289,9 @@ def preprocess_assets(self, assets: List[Dict]) -> List[Dict]:
289289
lambda label: label["isSentBackToQueue"] is False, labels_of_asset
290290
)
291291
)
292-
asset["labels"] = labels_of_asset
293-
assets_in_format.append(asset)
292+
if len(labels_of_asset) > 0:
293+
asset["labels"] = labels_of_asset
294+
assets_in_format.append(asset)
294295
if "latestLabel" in asset:
295296
label = asset["latestLabel"]
296297
if label is not None:

src/kili/services/export/format/llm/__init__.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def process_and_save(
5353
def process(self, assets: List[Dict]) -> List[Dict[str, Union[List[str], str]]]:
5454
"""LLM specific process."""
5555
warnings.warn(
56-
"Exporting llm labels with `kili.export` is deprecated."
56+
"Exporting llm labels with `kili.export_labels` is deprecated."
5757
" Please use `kili.llm.export` instead.",
5858
DeprecationWarning,
5959
stacklevel=2,
@@ -97,6 +97,8 @@ def _process_llm_dynamic_v1(self, assets: List[Dict]) -> List[Dict[str, Union[Li
9797

9898
def _process_llm_v1(self, assets: List[Dict]) -> List[Dict[str, Union[List[str], str]]]:
9999
result = []
100+
if len(assets) == 0:
101+
return result
100102
for asset in assets:
101103
result.append(
102104
{
@@ -263,7 +265,10 @@ def _format_raw_data(
263265
"id": _safe_pop(chat_items_ids),
264266
"chat_id": chat_id,
265267
"model": models[index_completion]
266-
if (index == len(prompts) - 1 or all_model_keys)
268+
if (
269+
(index == len(prompts) - 1 or all_model_keys)
270+
and len(models) > index_completion
271+
)
267272
else None,
268273
}
269274
)

tests/unit/llm/services/export/test_static.py

Lines changed: 3 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -2,148 +2,9 @@
22
import tempfile
33

44
from kili.llm.presentation.client.llm import LlmClientMethods
5-
6-
mock_json_interface = {
7-
"jobs": {
8-
"CLASSIFICATION_JOB": {
9-
"content": {
10-
"categories": {
11-
"A_BETTER_THAN_B": {
12-
"children": [],
13-
"name": "A better than B",
14-
"id": "category1",
15-
},
16-
"B_BETTER_THAN_A": {
17-
"children": [],
18-
"name": "B better than A",
19-
"id": "category2",
20-
},
21-
"TIE": {"children": [], "name": "Tie", "id": "category3"},
22-
},
23-
"input": "radio",
24-
},
25-
"instruction": "Compare",
26-
"mlTask": "CLASSIFICATION",
27-
"required": 0,
28-
"isChild": False,
29-
"isNew": False,
30-
},
31-
"TRANSCRIPTION_JOB": {
32-
"content": {"input": "markdown"},
33-
"instruction": "",
34-
"mlTask": "TRANSCRIPTION",
35-
"required": 0,
36-
"isChild": False,
37-
"isNew": False,
38-
},
39-
}
40-
}
41-
42-
mock_fetch_assets = [
43-
{
44-
"labels": [
45-
{
46-
"author": {
47-
"id": "user-1",
48-
"email": "test+admin@kili-technology.com",
49-
"firstname": "Test",
50-
"lastname": "Admin",
51-
},
52-
"jsonResponse": {
53-
"CLASSIFICATION_JOB": {"categories": [{"name": "A_BETTER_THAN_B"}]}
54-
},
55-
"createdAt": "2024-08-05T13:03:00.051Z",
56-
"isLatestLabelForUser": True,
57-
"isSentBackToQueue": False,
58-
"labelType": "DEFAULT",
59-
"modelName": None,
60-
}
61-
],
62-
"content": "https://storage.googleapis.com/label-public-staging/demo-projects/LLM/01.json",
63-
"externalId": "asset#0",
64-
"jsonMetadata": {},
65-
"status": "LABELED",
66-
},
67-
{
68-
"labels": [
69-
{
70-
"author": {
71-
"id": "user-1",
72-
"email": "test+admin@kili-technology.com",
73-
"firstname": "Test",
74-
"lastname": "Admin",
75-
},
76-
"jsonResponse": {
77-
"CLASSIFICATION_JOB": {"categories": [{"name": "B_BETTER_THAN_A"}]}
78-
},
79-
"createdAt": "2024-08-05T13:03:03.061Z",
80-
"isLatestLabelForUser": True,
81-
"isSentBackToQueue": False,
82-
"labelType": "DEFAULT",
83-
"modelName": None,
84-
}
85-
],
86-
"content": "https://storage.googleapis.com/label-public-staging/demo-projects/LLM/02.json",
87-
"externalId": "asset#1",
88-
"jsonMetadata": {},
89-
"status": "LABELED",
90-
},
91-
{
92-
"labels": [
93-
{
94-
"author": {
95-
"id": "user-1",
96-
"email": "test+admin@kili-technology.com",
97-
"firstname": "Test",
98-
"lastname": "Admin",
99-
},
100-
"jsonResponse": {
101-
"CLASSIFICATION_JOB": {"categories": [{"name": "TIE"}]},
102-
"TRANSCRIPTION_JOB": {"text": "There is only some formatting changes\n"},
103-
},
104-
"createdAt": "2024-08-05T13:03:16.028Z",
105-
"isLatestLabelForUser": True,
106-
"isSentBackToQueue": True,
107-
"labelType": "DEFAULT",
108-
"modelName": None,
109-
}
110-
],
111-
"content": "https://storage.googleapis.com/label-public-staging/demo-projects/LLM/03.json",
112-
"externalId": "asset#2",
113-
"jsonMetadata": {},
114-
"status": "LABELED",
115-
},
116-
]
117-
118-
mock_raw_asset_content = """{
119-
"prompts": [
120-
{
121-
"prompt": "BLABLABLA",
122-
"completions": [
123-
{
124-
"content": "response A1"
125-
},
126-
{
127-
"content": "response B1"
128-
}
129-
]
130-
},
131-
{
132-
"prompt": "BLIBLIBLI",
133-
"completions": [
134-
{
135-
"content": "response A2"
136-
},
137-
{
138-
"content": "response B2"
139-
}
140-
]
141-
}
142-
],
143-
"type": "markdown",
144-
"version": "0.1"
145-
}
146-
"""
5+
from tests.unit.services.export.fakes.llm_json_interface import mock_json_interface
6+
from tests.unit.services.export.fakes.llm_project_assets import mock_fetch_assets
7+
from tests.unit.services.export.fakes.llm_raw_asset_content import mock_raw_asset_content
1478

1489
expected_export = [
14910
{
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
mock_json_interface = {
2+
"jobs": {
3+
"CLASSIFICATION_JOB": {
4+
"content": {
5+
"categories": {
6+
"A_BETTER_THAN_B": {
7+
"children": [],
8+
"name": "A better than B",
9+
"id": "category1",
10+
},
11+
"B_BETTER_THAN_A": {
12+
"children": [],
13+
"name": "B better than A",
14+
"id": "category2",
15+
},
16+
"TIE": {"children": [], "name": "Tie", "id": "category3"},
17+
},
18+
"input": "radio",
19+
},
20+
"instruction": "Compare",
21+
"mlTask": "CLASSIFICATION",
22+
"required": 0,
23+
"isChild": False,
24+
"isNew": False,
25+
},
26+
"TRANSCRIPTION_JOB": {
27+
"content": {"input": "markdown"},
28+
"instruction": "",
29+
"mlTask": "TRANSCRIPTION",
30+
"required": 0,
31+
"isChild": False,
32+
"isNew": False,
33+
},
34+
}
35+
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
mock_fetch_assets = [
2+
{
3+
"labels": [
4+
{
5+
"author": {
6+
"id": "user-1",
7+
"email": "test+admin@kili-technology.com",
8+
"firstname": "Test",
9+
"lastname": "Admin",
10+
},
11+
"jsonResponse": {
12+
"CLASSIFICATION_JOB": {"categories": [{"name": "A_BETTER_THAN_B"}]}
13+
},
14+
"createdAt": "2024-08-05T13:03:00.051Z",
15+
"isLatestLabelForUser": True,
16+
"isSentBackToQueue": False,
17+
"labelType": "DEFAULT",
18+
"modelName": None,
19+
}
20+
],
21+
"content": "https://storage.googleapis.com/label-public-staging/demo-projects/LLM/01.json",
22+
"externalId": "asset#0",
23+
"jsonMetadata": {},
24+
"status": "LABELED",
25+
},
26+
{
27+
"labels": [
28+
{
29+
"author": {
30+
"id": "user-1",
31+
"email": "test+admin@kili-technology.com",
32+
"firstname": "Test",
33+
"lastname": "Admin",
34+
},
35+
"jsonResponse": {
36+
"CLASSIFICATION_JOB": {"categories": [{"name": "B_BETTER_THAN_A"}]}
37+
},
38+
"createdAt": "2024-08-05T13:03:03.061Z",
39+
"isLatestLabelForUser": True,
40+
"isSentBackToQueue": False,
41+
"labelType": "DEFAULT",
42+
"modelName": None,
43+
}
44+
],
45+
"content": "https://storage.googleapis.com/label-public-staging/demo-projects/LLM/02.json",
46+
"externalId": "asset#1",
47+
"jsonMetadata": {},
48+
"status": "LABELED",
49+
},
50+
{
51+
"labels": [
52+
{
53+
"author": {
54+
"id": "user-1",
55+
"email": "test+admin@kili-technology.com",
56+
"firstname": "Test",
57+
"lastname": "Admin",
58+
},
59+
"jsonResponse": {
60+
"CLASSIFICATION_JOB": {"categories": [{"name": "TIE"}]},
61+
"TRANSCRIPTION_JOB": {"text": "There is only some formatting changes\n"},
62+
},
63+
"createdAt": "2024-08-05T13:03:16.028Z",
64+
"isLatestLabelForUser": True,
65+
"isSentBackToQueue": True,
66+
"labelType": "DEFAULT",
67+
"modelName": None,
68+
}
69+
],
70+
"content": "https://storage.googleapis.com/label-public-staging/demo-projects/LLM/03.json",
71+
"externalId": "asset#2",
72+
"jsonMetadata": {},
73+
"status": "LABELED",
74+
},
75+
]
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
mock_raw_asset_content = """{
2+
"prompts": [
3+
{
4+
"prompt": "BLABLABLA",
5+
"completions": [
6+
{
7+
"content": "response A1"
8+
},
9+
{
10+
"content": "response B1"
11+
}
12+
]
13+
},
14+
{
15+
"prompt": "BLIBLIBLI",
16+
"completions": [
17+
{
18+
"content": "response A2"
19+
},
20+
{
21+
"content": "response B2"
22+
}
23+
]
24+
}
25+
],
26+
"type": "markdown",
27+
"version": "0.1"
28+
}
29+
"""

0 commit comments

Comments
 (0)