Skip to content

Commit ec37629

Browse files
authored
Improve docstrings and type hints in scheduling_ddim_cogvideox.py (#12992)
docs: improve docstring scheduling_ddim_cogvideox.py
1 parent 4b843c8 commit ec37629

26 files changed

+162
-103
lines changed

src/diffusers/schedulers/scheduling_consistency_decoder.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
def betas_for_alpha_bar(
1515
num_diffusion_timesteps: int,
1616
max_beta: float = 0.999,
17-
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
17+
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
1818
) -> torch.Tensor:
1919
"""
2020
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -28,8 +28,8 @@ def betas_for_alpha_bar(
2828
The number of betas to produce.
2929
max_beta (`float`, defaults to `0.999`):
3030
The maximum beta to use; use values lower than 1 to avoid numerical instability.
31-
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
32-
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
31+
alpha_transform_type (`str`, defaults to `"cosine"`):
32+
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
3333
3434
Returns:
3535
`torch.Tensor`:

src/diffusers/schedulers/scheduling_ddim.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class DDIMSchedulerOutput(BaseOutput):
5151
def betas_for_alpha_bar(
5252
num_diffusion_timesteps: int,
5353
max_beta: float = 0.999,
54-
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
54+
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
5555
) -> torch.Tensor:
5656
"""
5757
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
6565
The number of betas to produce.
6666
max_beta (`float`, defaults to `0.999`):
6767
The maximum beta to use; use values lower than 1 to avoid numerical instability.
68-
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
69-
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
68+
alpha_transform_type (`str`, defaults to `"cosine"`):
69+
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
7070
7171
Returns:
7272
`torch.Tensor`:

src/diffusers/schedulers/scheduling_ddim_cogvideox.py

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class DDIMSchedulerOutput(BaseOutput):
5151
def betas_for_alpha_bar(
5252
num_diffusion_timesteps: int,
5353
max_beta: float = 0.999,
54-
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
54+
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
5555
) -> torch.Tensor:
5656
"""
5757
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
6565
The number of betas to produce.
6666
max_beta (`float`, defaults to `0.999`):
6767
The maximum beta to use; use values lower than 1 to avoid numerical instability.
68-
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
69-
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
68+
alpha_transform_type (`str`, defaults to `"cosine"`):
69+
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
7070
7171
Returns:
7272
`torch.Tensor`:
@@ -100,14 +100,13 @@ def alpha_bar_fn(t):
100100
return torch.tensor(betas, dtype=torch.float32)
101101

102102

103-
def rescale_zero_terminal_snr(alphas_cumprod):
103+
def rescale_zero_terminal_snr(alphas_cumprod: torch.Tensor) -> torch.Tensor:
104104
"""
105-
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
106-
105+
Rescales betas to have zero terminal SNR Based on (Algorithm 1)[https://huggingface.co/papers/2305.08891]
107106
108107
Args:
109-
betas (`torch.Tensor`):
110-
the betas that the scheduler is being initialized with.
108+
alphas_cumprod (`torch.Tensor`):
109+
The alphas cumulative products that the scheduler is being initialized with.
111110
112111
Returns:
113112
`torch.Tensor`: rescaled betas with zero terminal SNR
@@ -142,11 +141,11 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
142141
Args:
143142
num_train_timesteps (`int`, defaults to 1000):
144143
The number of diffusion steps to train the model.
145-
beta_start (`float`, defaults to 0.0001):
144+
beta_start (`float`, defaults to 0.00085):
146145
The starting `beta` value of inference.
147-
beta_end (`float`, defaults to 0.02):
146+
beta_end (`float`, defaults to 0.0120):
148147
The final `beta` value.
149-
beta_schedule (`str`, defaults to `"linear"`):
148+
beta_schedule (`str`, defaults to `"scaled_linear"`):
150149
The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
151150
`linear`, `scaled_linear`, or `squaredcos_cap_v2`.
152151
trained_betas (`np.ndarray`, *optional*):
@@ -179,6 +178,8 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
179178
Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
180179
dark samples instead of limiting it to samples with medium brightness. Loosely related to
181180
[`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
181+
snr_shift_scale (`float`, defaults to 3.0):
182+
Shift scale for SNR.
182183
"""
183184

184185
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
@@ -190,15 +191,15 @@ def __init__(
190191
num_train_timesteps: int = 1000,
191192
beta_start: float = 0.00085,
192193
beta_end: float = 0.0120,
193-
beta_schedule: str = "scaled_linear",
194+
beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "scaled_linear",
194195
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
195196
clip_sample: bool = True,
196197
set_alpha_to_one: bool = True,
197198
steps_offset: int = 0,
198-
prediction_type: str = "epsilon",
199+
prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
199200
clip_sample_range: float = 1.0,
200201
sample_max_value: float = 1.0,
201-
timestep_spacing: str = "leading",
202+
timestep_spacing: Literal["linspace", "leading", "trailing"] = "leading",
202203
rescale_betas_zero_snr: bool = False,
203204
snr_shift_scale: float = 3.0,
204205
):
@@ -208,7 +209,15 @@ def __init__(
208209
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
209210
elif beta_schedule == "scaled_linear":
210211
# this schedule is very specific to the latent diffusion model.
211-
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float64) ** 2
212+
self.betas = (
213+
torch.linspace(
214+
beta_start**0.5,
215+
beta_end**0.5,
216+
num_train_timesteps,
217+
dtype=torch.float64,
218+
)
219+
** 2
220+
)
212221
elif beta_schedule == "squaredcos_cap_v2":
213222
# Glide cosine schedule
214223
self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -238,7 +247,7 @@ def __init__(
238247
self.num_inference_steps = None
239248
self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
240249

241-
def _get_variance(self, timestep, prev_timestep):
250+
def _get_variance(self, timestep: int, prev_timestep: int) -> torch.Tensor:
242251
alpha_prod_t = self.alphas_cumprod[timestep]
243252
alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
244253
beta_prod_t = 1 - alpha_prod_t
@@ -265,7 +274,11 @@ def scale_model_input(self, sample: torch.Tensor, timestep: Optional[int] = None
265274
"""
266275
return sample
267276

268-
def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
277+
def set_timesteps(
278+
self,
279+
num_inference_steps: int,
280+
device: Optional[Union[str, torch.device]] = None,
281+
) -> None:
269282
"""
270283
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
271284
@@ -317,7 +330,7 @@ def step(
317330
sample: torch.Tensor,
318331
eta: float = 0.0,
319332
use_clipped_model_output: bool = False,
320-
generator=None,
333+
generator: Optional[torch.Generator] = None,
321334
variance_noise: Optional[torch.Tensor] = None,
322335
return_dict: bool = True,
323336
) -> Union[DDIMSchedulerOutput, Tuple]:
@@ -328,7 +341,7 @@ def step(
328341
Args:
329342
model_output (`torch.Tensor`):
330343
The direct output from learned diffusion model.
331-
timestep (`float`):
344+
timestep (`int`):
332345
The current discrete timestep in the diffusion chain.
333346
sample (`torch.Tensor`):
334347
A current instance of a sample created by the diffusion process.
@@ -487,5 +500,5 @@ def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: tor
487500
velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
488501
return velocity
489502

490-
def __len__(self):
503+
def __len__(self) -> int:
491504
return self.config.num_train_timesteps

src/diffusers/schedulers/scheduling_ddim_inverse.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class DDIMSchedulerOutput(BaseOutput):
4949
def betas_for_alpha_bar(
5050
num_diffusion_timesteps: int,
5151
max_beta: float = 0.999,
52-
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
52+
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
5353
) -> torch.Tensor:
5454
"""
5555
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -63,8 +63,8 @@ def betas_for_alpha_bar(
6363
The number of betas to produce.
6464
max_beta (`float`, defaults to `0.999`):
6565
The maximum beta to use; use values lower than 1 to avoid numerical instability.
66-
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
67-
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
66+
alpha_transform_type (`str`, defaults to `"cosine"`):
67+
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
6868
6969
Returns:
7070
`torch.Tensor`:

src/diffusers/schedulers/scheduling_ddim_parallel.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class DDIMParallelSchedulerOutput(BaseOutput):
5151
def betas_for_alpha_bar(
5252
num_diffusion_timesteps: int,
5353
max_beta: float = 0.999,
54-
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
54+
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
5555
) -> torch.Tensor:
5656
"""
5757
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
6565
The number of betas to produce.
6666
max_beta (`float`, defaults to `0.999`):
6767
The maximum beta to use; use values lower than 1 to avoid numerical instability.
68-
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
69-
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
68+
alpha_transform_type (`str`, defaults to `"cosine"`):
69+
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
7070
7171
Returns:
7272
`torch.Tensor`:

src/diffusers/schedulers/scheduling_ddpm.py

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class DDPMSchedulerOutput(BaseOutput):
4848
def betas_for_alpha_bar(
4949
num_diffusion_timesteps: int,
5050
max_beta: float = 0.999,
51-
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
51+
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
5252
) -> torch.Tensor:
5353
"""
5454
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -62,8 +62,8 @@ def betas_for_alpha_bar(
6262
The number of betas to produce.
6363
max_beta (`float`, defaults to `0.999`):
6464
The maximum beta to use; use values lower than 1 to avoid numerical instability.
65-
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
66-
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
65+
alpha_transform_type (`str`, defaults to `"cosine"`):
66+
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
6767
6868
Returns:
6969
`torch.Tensor`:
@@ -192,7 +192,12 @@ def __init__(
192192
beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2", "sigmoid"] = "linear",
193193
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
194194
variance_type: Literal[
195-
"fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"
195+
"fixed_small",
196+
"fixed_small_log",
197+
"fixed_large",
198+
"fixed_large_log",
199+
"learned",
200+
"learned_range",
196201
] = "fixed_small",
197202
clip_sample: bool = True,
198203
prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
@@ -210,7 +215,15 @@ def __init__(
210215
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
211216
elif beta_schedule == "scaled_linear":
212217
# this schedule is very specific to the latent diffusion model.
213-
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
218+
self.betas = (
219+
torch.linspace(
220+
beta_start**0.5,
221+
beta_end**0.5,
222+
num_train_timesteps,
223+
dtype=torch.float32,
224+
)
225+
** 2
226+
)
214227
elif beta_schedule == "squaredcos_cap_v2":
215228
# Glide cosine schedule
216229
self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -337,7 +350,14 @@ def _get_variance(
337350
t: int,
338351
predicted_variance: Optional[torch.Tensor] = None,
339352
variance_type: Optional[
340-
Literal["fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"]
353+
Literal[
354+
"fixed_small",
355+
"fixed_small_log",
356+
"fixed_large",
357+
"fixed_large_log",
358+
"learned",
359+
"learned_range",
360+
]
341361
] = None,
342362
) -> torch.Tensor:
343363
"""
@@ -472,7 +492,10 @@ def step(
472492

473493
prev_t = self.previous_timestep(t)
474494

475-
if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]:
495+
if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in [
496+
"learned",
497+
"learned_range",
498+
]:
476499
model_output, predicted_variance = torch.split(model_output, sample.shape[1], dim=1)
477500
else:
478501
predicted_variance = None
@@ -521,7 +544,10 @@ def step(
521544
if t > 0:
522545
device = model_output.device
523546
variance_noise = randn_tensor(
524-
model_output.shape, generator=generator, device=device, dtype=model_output.dtype
547+
model_output.shape,
548+
generator=generator,
549+
device=device,
550+
dtype=model_output.dtype,
525551
)
526552
if self.variance_type == "fixed_small_log":
527553
variance = self._get_variance(t, predicted_variance=predicted_variance) * variance_noise

src/diffusers/schedulers/scheduling_ddpm_parallel.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class DDPMParallelSchedulerOutput(BaseOutput):
5050
def betas_for_alpha_bar(
5151
num_diffusion_timesteps: int,
5252
max_beta: float = 0.999,
53-
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
53+
alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine",
5454
) -> torch.Tensor:
5555
"""
5656
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -64,8 +64,8 @@ def betas_for_alpha_bar(
6464
The number of betas to produce.
6565
max_beta (`float`, defaults to `0.999`):
6666
The maximum beta to use; use values lower than 1 to avoid numerical instability.
67-
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
68-
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
67+
alpha_transform_type (`str`, defaults to `"cosine"`):
68+
The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`.
6969
7070
Returns:
7171
`torch.Tensor`:
@@ -202,7 +202,12 @@ def __init__(
202202
beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2", "sigmoid"] = "linear",
203203
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
204204
variance_type: Literal[
205-
"fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"
205+
"fixed_small",
206+
"fixed_small_log",
207+
"fixed_large",
208+
"fixed_large_log",
209+
"learned",
210+
"learned_range",
206211
] = "fixed_small",
207212
clip_sample: bool = True,
208213
prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
@@ -220,7 +225,15 @@ def __init__(
220225
self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
221226
elif beta_schedule == "scaled_linear":
222227
# this schedule is very specific to the latent diffusion model.
223-
self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
228+
self.betas = (
229+
torch.linspace(
230+
beta_start**0.5,
231+
beta_end**0.5,
232+
num_train_timesteps,
233+
dtype=torch.float32,
234+
)
235+
** 2
236+
)
224237
elif beta_schedule == "squaredcos_cap_v2":
225238
# Glide cosine schedule
226239
self.betas = betas_for_alpha_bar(num_train_timesteps)
@@ -350,7 +363,14 @@ def _get_variance(
350363
t: int,
351364
predicted_variance: Optional[torch.Tensor] = None,
352365
variance_type: Optional[
353-
Literal["fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"]
366+
Literal[
367+
"fixed_small",
368+
"fixed_small_log",
369+
"fixed_large",
370+
"fixed_large_log",
371+
"learned",
372+
"learned_range",
373+
]
354374
] = None,
355375
) -> torch.Tensor:
356376
"""

0 commit comments

Comments
 (0)