-
Notifications
You must be signed in to change notification settings - Fork 19
Expand file tree
/
Copy pathscenarios-orca.yaml
More file actions
114 lines (105 loc) · 3.74 KB
/
scenarios-orca.yaml
File metadata and controls
114 lines (105 loc) · 3.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# This file holds a sample full-finetuning scenario and
# demonstrates various pretokenization scenarios
# the data_processing stanza is optional
# - if it is missing, then the defaults is to use alpaca
# with instruct formatting and no tokenization
# - this is an older style method which does not rely on
# chat templates, this will also do instruct formatting
# - but if tokenize = True, this works only if
# sft_trainer accepts pretokenized dataset
# data_processing:
# dataset_name: yahma/alpaca-cleaned
# formatting: "instruct"
# tokenize: True
# input_field: input
# - this is the new style, with the chat templates for formatting
# - this is the best approach to keep things flexible and
# allows to configure many different datasets
# - there is an option of setting tokenize is True or False
# NOTE: on tokenization
# if tokenize = True then its a pretokenization flow, then below set
# - response_template: null
# - dataset_text_field: null
# otherwise if tokenize = False, then do not set the above to null
data_processing:
dataset_name: microsoft/orca-math-word-problems-200k
chat_template: |
{%- for message in messages %}
USER:
{{ message['question'] }}
ASSISTANT:
{{ message['answer'] }}
{%- endfor %}
dataset_split: "train[:8000]"
tokenize: false
# scenarios
scenarios:
- name: full-finetuning
arguments:
learning_rate: 2e-5
torch_dtype: float16
gradient_accumulation_steps: 2
max_steps: null
packing: False
model_name_or_path:
- 'mistralai/Mistral-7B-v0.1'
dataset_text_field: 'output'
response_template: "\n\nASSISTANT:"
- name: padding-free
framework_config:
- aadp-padding-free
- aadp-padding-free-multipack
arguments:
learning_rate: 2e-5
torch_dtype: float16
gradient_accumulation_steps: 2
max_steps: null
packing: False
model_name_or_path:
- 'mistralai/Mistral-7B-v0.1'
dataset_text_field: 'output'
response_template: "\n\nASSISTANT:"
- name: accelerated-peft-bnb
framework_config:
- accelerated-peft-bnb
- accelerated-peft-bnb-padding-free
- accelerated-peft-bnb-foak
- accelerated-peft-bnb-foak-padding-free
arguments:
fp16: True
learning_rate: 2e-4
torch_dtype: float16
peft_method: lora
r: 16
lora_alpha: 16
lora_dropout: 0.1
target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"]
max_steps: null
gradient_accumulation_steps: 2
packing: False
model_name_or_path:
- 'mistralai/Mistral-7B-v0.1'
dataset_text_field: 'output'
response_template: "\n\nASSISTANT:"
- name: accelerated-peft-gptq
framework_config:
- accelerated-peft-autogptq
- accelerated-peft-autogptq-padding-free
- accelerated-peft-autogptq-foak
- accelerated-peft-autogptq-foak-padding-free
arguments:
learning_rate: 2e-4
fp16: True
torch_dtype: float16
peft_method: lora
r: 16
lora_alpha: 16
lora_dropout: 0.1
target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"]
max_steps: null
gradient_accumulation_steps: 2
packing: False
model_name_or_path:
- 'TheBloke/Mistral-7B-v0.1-GPTQ'
dataset_text_field: 'output'
response_template: "\n\nASSISTANT:"