Skip to content

Commit a52ca79

Browse files
committed
rename test cases and code with new data handler values
Signed-off-by: Dushyant Behl <dushyantbehl@in.ibm.com>
1 parent c77f6fd commit a52ca79

20 files changed

Lines changed: 189 additions & 167 deletions

tests/artifacts/predefined_data_configs/apply_custom_jinja_template.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ datasets:
1010
remove_columns: all
1111
batched: false
1212
fn_kwargs:
13-
dataset_text_field: "dataset_text_field"
13+
formatted_text_column_name: "formatted_text"
1414
template: "dataset_template"
1515
add_eos_token: true

tests/artifacts/predefined_data_configs/apply_custom_template.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ datasets:
1010
remove_columns: all
1111
batched: false
1212
fn_kwargs:
13-
dataset_text_field: "dataset_text_field"
13+
formatted_text_column_name: "formatted_text"
1414
template: "dataset_template"
1515
add_eos_token: true

tests/artifacts/predefined_data_configs/apply_custom_template_streaming.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,5 @@ datasets:
1111
remove_columns: all
1212
batched: false
1313
fn_kwargs:
14-
dataset_text_field: "dataset_text_field"
14+
formatted_text_column_name: "formatted_text"
1515
template: "dataset_template"

tests/artifacts/predefined_data_configs/duplicate_columns.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ datasets:
99
arguments:
1010
batched: false
1111
fn_kwargs:
12-
old_column: "input_ids"
13-
new_column: "labels"
12+
old_column_name: "input_ids"
13+
new_column_name: "labels"

tests/artifacts/predefined_data_configs/multi_turn_data_with_chat_template.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ datasets:
1818
arguments:
1919
remove_columns: all
2020
fn_kwargs:
21-
dataset_text_field: "formatted_chat_data"
21+
formatted_text_column_name: "formatted_chat_data"
2222
- name: dataset_2
2323
data_paths:
2424
- "FILE_PATH"
@@ -27,7 +27,7 @@ datasets:
2727
arguments:
2828
remove_columns: all
2929
fn_kwargs:
30-
dataset_text_field: "formatted_chat_data"
30+
formatted_text_column_name: "formatted_chat_data"
3131
- name: dataset_3
3232
data_paths:
3333
- "FILE_PATH"
@@ -36,4 +36,4 @@ datasets:
3636
arguments:
3737
remove_columns: all
3838
fn_kwargs:
39-
dataset_text_field: "formatted_chat_data"
39+
formatted_text_column_name: "formatted_chat_data"

tests/artifacts/predefined_data_configs/multi_turn_data_with_chat_template_granite_3_1B.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ datasets:
5959
arguments:
6060
remove_columns: all
6161
fn_kwargs:
62-
dataset_text_field: "formatted_chat_data"
63-
conversation_column: "messages"
62+
formatted_text_column_name: "formatted_chat_data"
63+
conversation_column_name: "messages"
6464
- name: dataset_2
6565
data_paths:
6666
- "FILE_PATH"
@@ -69,8 +69,8 @@ datasets:
6969
arguments:
7070
remove_columns: all
7171
fn_kwargs:
72-
dataset_text_field: "formatted_chat_data"
73-
conversation_column: "messages"
72+
formatted_text_column_name: "formatted_chat_data"
73+
conversation_column_name: "messages"
7474
- name: dataset_3
7575
data_paths:
7676
- "FILE_PATH"
@@ -79,5 +79,5 @@ datasets:
7979
arguments:
8080
remove_columns: all
8181
fn_kwargs:
82-
dataset_text_field: "formatted_chat_data"
83-
conversation_column: "messages"
82+
formatted_text_column_name: "formatted_chat_data"
83+
conversation_column_name: "messages"

tests/artifacts/predefined_data_configs/multiple_datasets_with_sampling.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ datasets:
1313
remove_columns: all
1414
batched: false
1515
fn_kwargs:
16-
input_field_name: input
17-
output_field_name: output
16+
input_column_name: input
17+
output_column_name: output
1818
- name: dataset_2
1919
sampling: 0.4
2020
data_paths:
@@ -25,8 +25,8 @@ datasets:
2525
remove_columns: all
2626
batched: false
2727
fn_kwargs:
28-
input_field_name: input
29-
output_field_name: output
28+
input_column_name: input
29+
output_column_name: output
3030
- name: dataset_3
3131
sampling: 0.3
3232
data_paths:
@@ -37,5 +37,5 @@ datasets:
3737
remove_columns: all
3838
batched: false
3939
fn_kwargs:
40-
input_field_name: input
41-
output_field_name: output
40+
input_column_name: input
41+
output_column_name: output

tests/artifacts/predefined_data_configs/rename_select_columns.yaml

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,20 @@ datasets:
77
data_handlers:
88
- name: rename_columns
99
arguments:
10-
column_mapping:
11-
"input" : "instruction"
12-
"output" : "response"
10+
fn_kwargs:
11+
column_mapping:
12+
"input" : "instruction"
13+
"output" : "response"
1314
- name: select_columns
1415
arguments:
15-
column_names:
16-
- "instruction"
17-
- "response"
16+
fn_kwargs:
17+
column_names:
18+
- "instruction"
19+
- "response"
1820
- name: tokenize_and_apply_input_masking
1921
arguments:
2022
remove_columns: all
2123
batched: false
2224
fn_kwargs:
23-
input_field_name: instruction
24-
output_field_name: response
25+
input_column_name: instruction
26+
output_column_name: response

tests/artifacts/predefined_data_configs/skip_large_columns_data_handler_template.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,15 @@ datasets:
1010
remove_columns: all
1111
batched: true
1212
fn_kwargs:
13-
dataset_text_field: "output"
13+
text_column_name: "output"
1414
- name: duplicate_columns
1515
arguments:
1616
batched: true
1717
fn_kwargs:
18-
old_column: "input_ids"
19-
new_column: "labels"
20-
- name: skip_large_columns
18+
old_column_name: "input_ids"
19+
new_column_name: "labels"
20+
- name: skip_samples_with_large_columns
2121
arguments:
2222
fn_kwargs:
2323
column_name: "input_ids"
24-
max_length: 50
24+
max_allowed_length: 50

tests/artifacts/predefined_data_configs/tokenize_and_apply_input_masking.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ datasets:
1010
remove_columns: all
1111
batched: false
1212
fn_kwargs:
13-
input_field_name: input
14-
output_field_name: output
13+
input_column_name: input
14+
output_column_name: output
1515
add_eos_token: true

0 commit comments

Comments
 (0)