Skip to content

Commit 1fc158a

Browse files
committed
complete small fixes to allow all cards to pass through _source_to_dataset
Signed-off-by: dafnapension <dafnashein@yahoo.com>
1 parent 6be87ad commit 1fc158a

14 files changed

Lines changed: 126 additions & 1 deletion

prepare/cards/bfcl.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
from unitxt.operators import (
66
Copy,
77
ExecuteExpression,
8+
FilterByExpression,
89
Set,
910
)
10-
from unitxt.stream_operators import JoinStreams
11+
from unitxt.stream_operators import DeleteSplits, JoinStreams
1112
from unitxt.test_utils.card import test_card
1213

1314
base_path = "https://raw.githubusercontent.com/ShishirPatil/gorilla/70b6a4a2144597b1f99d1f4d3185d35d7ee532a4/berkeley-function-call-leaderboard/data/"
@@ -31,6 +32,7 @@
3132
on="id",
3233
new_stream_name="test",
3334
),
35+
DeleteSplits(splits=["questions", "answers"]),
3436
Copy(field="question/0/0/content", to_field="query"),
3537
Copy(field="function", to_field="tools"),
3638
"operators.fix_json_schema",
@@ -100,9 +102,13 @@
100102
on="id",
101103
new_stream_name="test",
102104
),
105+
DeleteSplits(splits=["questions", "answers"]),
103106
Copy(field="question/*/0", to_field="dialog"),
104107
Copy(field="function", to_field="tools"),
105108
"operators.fix_json_schema",
109+
FilterByExpression(
110+
expression="all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
111+
),
106112
ExecuteExpression(
107113
expression='[{"name": k, "arguments": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != ""] for vval in v.values()])]',
108114
to_field="reference_calls",

prepare/operators/fix_json_schema.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
operator = RecursiveReplace(
55
key="type",
66
map_values={
7+
"": "object",
78
"dict": "object",
89
"float": "number",
910
"tuple": "array",

src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@
2020
"on": "id",
2121
"new_stream_name": "test"
2222
},
23+
{
24+
"__type__": "delete_splits",
25+
"splits": [
26+
"questions",
27+
"answers"
28+
]
29+
},
2330
{
2431
"__type__": "copy",
2532
"field": "question/*/0",
@@ -31,6 +38,10 @@
3138
"to_field": "tools"
3239
},
3340
"operators.fix_json_schema",
41+
{
42+
"__type__": "filter_by_expression",
43+
"expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
44+
},
3445
{
3546
"__type__": "execute_expression",
3647
"expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]",

src/unitxt/catalog/cards/bfcl/multi_turn/javascript_v3.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@
2020
"on": "id",
2121
"new_stream_name": "test"
2222
},
23+
{
24+
"__type__": "delete_splits",
25+
"splits": [
26+
"questions",
27+
"answers"
28+
]
29+
},
2330
{
2431
"__type__": "copy",
2532
"field": "question/*/0",
@@ -31,6 +38,10 @@
3138
"to_field": "tools"
3239
},
3340
"operators.fix_json_schema",
41+
{
42+
"__type__": "filter_by_expression",
43+
"expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
44+
},
3445
{
3546
"__type__": "execute_expression",
3647
"expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]",

src/unitxt/catalog/cards/bfcl/multi_turn/live_multiple_v3.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@
2020
"on": "id",
2121
"new_stream_name": "test"
2222
},
23+
{
24+
"__type__": "delete_splits",
25+
"splits": [
26+
"questions",
27+
"answers"
28+
]
29+
},
2330
{
2431
"__type__": "copy",
2532
"field": "question/*/0",
@@ -31,6 +38,10 @@
3138
"to_field": "tools"
3239
},
3340
"operators.fix_json_schema",
41+
{
42+
"__type__": "filter_by_expression",
43+
"expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
44+
},
3445
{
3546
"__type__": "execute_expression",
3647
"expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]",

src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_multiple_v3.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@
2020
"on": "id",
2121
"new_stream_name": "test"
2222
},
23+
{
24+
"__type__": "delete_splits",
25+
"splits": [
26+
"questions",
27+
"answers"
28+
]
29+
},
2330
{
2431
"__type__": "copy",
2532
"field": "question/*/0",
@@ -31,6 +38,10 @@
3138
"to_field": "tools"
3239
},
3340
"operators.fix_json_schema",
41+
{
42+
"__type__": "filter_by_expression",
43+
"expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
44+
},
3445
{
3546
"__type__": "execute_expression",
3647
"expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]",

src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_v3.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@
2020
"on": "id",
2121
"new_stream_name": "test"
2222
},
23+
{
24+
"__type__": "delete_splits",
25+
"splits": [
26+
"questions",
27+
"answers"
28+
]
29+
},
2330
{
2431
"__type__": "copy",
2532
"field": "question/*/0",
@@ -31,6 +38,10 @@
3138
"to_field": "tools"
3239
},
3340
"operators.fix_json_schema",
41+
{
42+
"__type__": "filter_by_expression",
43+
"expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
44+
},
3445
{
3546
"__type__": "execute_expression",
3647
"expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]",

src/unitxt/catalog/cards/bfcl/multi_turn/live_simple_v3.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@
2020
"on": "id",
2121
"new_stream_name": "test"
2222
},
23+
{
24+
"__type__": "delete_splits",
25+
"splits": [
26+
"questions",
27+
"answers"
28+
]
29+
},
2330
{
2431
"__type__": "copy",
2532
"field": "question/*/0",
@@ -31,6 +38,10 @@
3138
"to_field": "tools"
3239
},
3340
"operators.fix_json_schema",
41+
{
42+
"__type__": "filter_by_expression",
43+
"expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
44+
},
3445
{
3546
"__type__": "execute_expression",
3647
"expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]",

src/unitxt/catalog/cards/bfcl/multi_turn/multiple_v3.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@
2020
"on": "id",
2121
"new_stream_name": "test"
2222
},
23+
{
24+
"__type__": "delete_splits",
25+
"splits": [
26+
"questions",
27+
"answers"
28+
]
29+
},
2330
{
2431
"__type__": "copy",
2532
"field": "question/*/0",
@@ -31,6 +38,10 @@
3138
"to_field": "tools"
3239
},
3340
"operators.fix_json_schema",
41+
{
42+
"__type__": "filter_by_expression",
43+
"expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
44+
},
3445
{
3546
"__type__": "execute_expression",
3647
"expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]",

src/unitxt/catalog/cards/bfcl/multi_turn/parallel_multiple_v3.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@
2020
"on": "id",
2121
"new_stream_name": "test"
2222
},
23+
{
24+
"__type__": "delete_splits",
25+
"splits": [
26+
"questions",
27+
"answers"
28+
]
29+
},
2330
{
2431
"__type__": "copy",
2532
"field": "question/*/0",
@@ -31,6 +38,10 @@
3138
"to_field": "tools"
3239
},
3340
"operators.fix_json_schema",
41+
{
42+
"__type__": "filter_by_expression",
43+
"expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
44+
},
3445
{
3546
"__type__": "execute_expression",
3647
"expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]",

0 commit comments

Comments
 (0)