Skip to content

Commit 9c7858e

Browse files
authored
upgrade trl (#601)
Signed-off-by: Dushyant Behl <dushyantbehl@in.ibm.com>
1 parent 5951c9a commit 9c7858e

2 files changed

Lines changed: 4 additions & 11 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ dependencies = [
3434
"sentencepiece>=0.1.99,<0.3",
3535
"tokenizers>=0.13.3,<1.0",
3636
"tqdm>=4.66.2,<5.0",
37-
"trl>=0.13,<0.18",
37+
"trl>=0.13,<0.20",
3838
"peft>=0.15.0,<=0.15.2",
3939
"protobuf>=5.28.0,<6.0.0",
4040
"datasets>=3.5.0,<4.0",

tuning/data/data_preprocessing_utils.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -92,16 +92,9 @@ def get_data_collator(
9292
)
9393

9494
if is_traindata_tokenized:
95-
# Note that this automatically pads labels with -100
96-
# TODO check if this is sufficient for preprocessed
97-
# TODO with the release of trl v0.17.0, DataCollatorForSeq2Seq
98-
# was removed from tokenized data processing, should eventually
99-
# be added back in with support directly in fms-hf-tuning, not
100-
# dependent on trl.
101-
# return DataCollatorForSeq2Seq(
102-
# tokenizer=tokenizer, padding=True, max_length=max_seq_length
103-
# )
104-
return None
95+
return DataCollatorForSeq2Seq(
96+
tokenizer=tokenizer, padding=True, max_length=max_seq_length
97+
)
10598

10699
# TODO: near term - how response template ids are parsed out needs to be cleaned.
107100
# The [2:] here applies if response template has \n prefix, it is needed to strip \n,

0 commit comments

Comments
 (0)