Skip to content

Commit e64b446

Browse files
authored
Merge branch 'main' into release-please--branches--main--changes--next
2 parents e374d1d + 9b32b95 commit e64b446

4 files changed

Lines changed: 13 additions & 0 deletions

File tree

src/together/lib/cli/api/fine_tuning/create.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,10 @@ async def create(
9191
int | Literal["max"],
9292
Parameter(converter=int_or_max_converter, alias="-b", help="Train batch size"),
9393
] = "max",
94+
gradient_accumulation_steps: Annotated[
95+
Optional[int],
96+
Parameter(help="Number of gradient accumulation steps (increases effective batch size without more memory)"),
97+
] = None,
9498
learning_rate: Annotated[float, Parameter(alias="--lr", help="Learning rate")] = DEFAULT_LEARNING_RATE,
9599
lr_scheduler_type: Annotated[
96100
Literal["linear", "cosine"], Parameter(help="Learning rate scheduler type")
@@ -174,6 +178,7 @@ async def create(
174178
max_seq_length=max_seq_length,
175179
n_checkpoints=n_checkpoints,
176180
batch_size=batch_size,
181+
gradient_accumulation_steps=gradient_accumulation_steps,
177182
learning_rate=learning_rate,
178183
lr_scheduler_type=lr_scheduler_type,
179184
min_lr_ratio=min_lr_ratio,

src/together/lib/resources/fine_tuning.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def create_finetune_request(
4242
n_evals: int | None = 0,
4343
n_checkpoints: int | None = 1,
4444
batch_size: int | Literal["max"] = "max",
45+
gradient_accumulation_steps: int | None = None,
4546
learning_rate: float | None = 0.00001,
4647
lr_scheduler_type: Literal["linear", "cosine"] = "cosine",
4748
min_lr_ratio: float | None = 0.0,
@@ -255,6 +256,7 @@ def create_finetune_request(
255256
n_evals=n_evals,
256257
n_checkpoints=n_checkpoints,
257258
batch_size=batch_size,
259+
gradient_accumulation_steps=gradient_accumulation_steps,
258260
learning_rate=learning_rate or 0.00001,
259261
lr_scheduler=lr_scheduler,
260262
warmup_ratio=warmup_ratio,

src/together/lib/types/fine_tuning.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,8 @@ class FinetuneRequest(BaseModel):
481481
max_seq_length: Union[int, None] = None
482482
# training batch size
483483
batch_size: Union[int, Literal["max"], None] = None
484+
# gradient accumulation steps
485+
gradient_accumulation_steps: Union[int, None] = None
484486
# up to 40 character suffix for output model name
485487
suffix: Union[str, None] = None
486488
# weights & biases api key

src/together/resources/fine_tuning.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def create(
8888
n_evals: int | None = 0,
8989
n_checkpoints: int | None = 1,
9090
batch_size: int | Literal["max"] = "max",
91+
gradient_accumulation_steps: int | None = None,
9192
learning_rate: float | None = 0.00001,
9293
lr_scheduler_type: Literal["linear", "cosine"] = "cosine",
9394
min_lr_ratio: float = 0.0,
@@ -220,6 +221,7 @@ def create(
220221
n_evals=n_evals,
221222
n_checkpoints=n_checkpoints,
222223
batch_size=batch_size,
224+
gradient_accumulation_steps=gradient_accumulation_steps,
223225
learning_rate=learning_rate,
224226
lr_scheduler_type=lr_scheduler_type,
225227
min_lr_ratio=min_lr_ratio,
@@ -693,6 +695,7 @@ async def create(
693695
n_evals: int | None = 0,
694696
n_checkpoints: int | None = 1,
695697
batch_size: int | Literal["max"] = "max",
698+
gradient_accumulation_steps: int | None = None,
696699
learning_rate: float | None = 0.00001,
697700
lr_scheduler_type: Literal["linear", "cosine"] = "cosine",
698701
min_lr_ratio: float = 0.0,
@@ -824,6 +827,7 @@ async def create(
824827
n_evals=n_evals,
825828
n_checkpoints=n_checkpoints,
826829
batch_size=batch_size,
830+
gradient_accumulation_steps=gradient_accumulation_steps,
827831
learning_rate=learning_rate,
828832
lr_scheduler_type=lr_scheduler_type,
829833
min_lr_ratio=min_lr_ratio,

0 commit comments

Comments
 (0)