You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: ajet/default_config/ajet_config_schema.py
+6-14Lines changed: 6 additions & 14 deletions
Original file line number
Diff line number
Diff line change
@@ -3,7 +3,7 @@
3
3
4
4
5
5
fromdataclassesimportdataclass, field
6
-
fromtypingimportAny, Dict, List
6
+
fromtypingimportAny, Dict, List, Optional
7
7
8
8
9
9
@dataclass
@@ -26,6 +26,7 @@ class AjetTrainerCommon:
26
26
use_kl_in_reward: bool=False
27
27
kl_penalty_type: str="kl"
28
28
ppo_epochs: int=1
29
+
ulysses_sequence_parallel_size: int=1
29
30
val_print_to_markdown_file_path: str|None=None
30
31
train_print_to_markdown_file_path: str|None=None
31
32
total_training_steps: int|None=None
@@ -34,20 +35,9 @@ class AjetTrainerCommon:
34
35
total_epochs: int=50
35
36
val_pass_n: int=4
36
37
val_before_train: bool=False
37
-
# When enabled, every sample produced by the same episode (same
38
-
# non_tensor_batch["episode_uuids"]) gets its loss weight multiplied by
39
-
# 1/N (N = number of samples in that episode) so each episode contributes
40
-
# equally to the policy-gradient update regardless of how many samples it
41
-
# generated. Disabled by default (current behaviour: every sample weighted
42
-
# equally).
38
+
# When enabled, every sample produced by the same episode (same non_tensor_batch["episode_uuids"]) gets its loss weight multiplied by 1/N (N = number of samples in that episode)
0 commit comments