@@ -51,7 +51,7 @@ class DDIMSchedulerOutput(BaseOutput):
5151def betas_for_alpha_bar (
5252 num_diffusion_timesteps : int ,
5353 max_beta : float = 0.999 ,
54- alpha_transform_type : Literal ["cosine" , "exp" ] = "cosine" ,
54+ alpha_transform_type : Literal ["cosine" , "exp" , "laplace" ] = "cosine" ,
5555) -> torch .Tensor :
5656 """
5757 Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
@@ -65,8 +65,8 @@ def betas_for_alpha_bar(
6565 The number of betas to produce.
6666 max_beta (`float`, defaults to `0.999`):
6767 The maximum beta to use; use values lower than 1 to avoid numerical instability.
68- alpha_transform_type (`"cosine"` or `"exp" `, defaults to `"cosine"`):
69- The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp `.
68+ alpha_transform_type (`str `, defaults to `"cosine"`):
69+ The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace `.
7070
7171 Returns:
7272 `torch.Tensor`:
@@ -100,14 +100,13 @@ def alpha_bar_fn(t):
100100 return torch .tensor (betas , dtype = torch .float32 )
101101
102102
103- def rescale_zero_terminal_snr (alphas_cumprod ) :
103+ def rescale_zero_terminal_snr (alphas_cumprod : torch . Tensor ) -> torch . Tensor :
104104 """
105- Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
106-
105+ Rescales betas to have zero terminal SNR Based on (Algorithm 1)[https://huggingface.co/papers/2305.08891]
107106
108107 Args:
109- betas (`torch.Tensor`):
110- the betas that the scheduler is being initialized with.
108+ alphas_cumprod (`torch.Tensor`):
109+ The alphas cumulative products that the scheduler is being initialized with.
111110
112111 Returns:
113112 `torch.Tensor`: rescaled betas with zero terminal SNR
@@ -142,11 +141,11 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
142141 Args:
143142 num_train_timesteps (`int`, defaults to 1000):
144143 The number of diffusion steps to train the model.
145- beta_start (`float`, defaults to 0.0001 ):
144+ beta_start (`float`, defaults to 0.00085 ):
146145 The starting `beta` value of inference.
147- beta_end (`float`, defaults to 0.02 ):
146+ beta_end (`float`, defaults to 0.0120 ):
148147 The final `beta` value.
149- beta_schedule (`str`, defaults to `"linear "`):
148+ beta_schedule (`str`, defaults to `"scaled_linear "`):
150149 The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
151150 `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
152151 trained_betas (`np.ndarray`, *optional*):
@@ -179,6 +178,8 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
179178 Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
180179 dark samples instead of limiting it to samples with medium brightness. Loosely related to
181180 [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
181+ snr_shift_scale (`float`, defaults to 3.0):
182+ Shift scale for SNR.
182183 """
183184
184185 _compatibles = [e .name for e in KarrasDiffusionSchedulers ]
@@ -190,15 +191,15 @@ def __init__(
190191 num_train_timesteps : int = 1000 ,
191192 beta_start : float = 0.00085 ,
192193 beta_end : float = 0.0120 ,
193- beta_schedule : str = "scaled_linear" ,
194+ beta_schedule : Literal [ "linear" , "scaled_linear" , "squaredcos_cap_v2" ] = "scaled_linear" ,
194195 trained_betas : Optional [Union [np .ndarray , List [float ]]] = None ,
195196 clip_sample : bool = True ,
196197 set_alpha_to_one : bool = True ,
197198 steps_offset : int = 0 ,
198- prediction_type : str = "epsilon" ,
199+ prediction_type : Literal [ "epsilon" , "sample" , "v_prediction" ] = "epsilon" ,
199200 clip_sample_range : float = 1.0 ,
200201 sample_max_value : float = 1.0 ,
201- timestep_spacing : str = "leading" ,
202+ timestep_spacing : Literal [ "linspace" , "leading" , "trailing" ] = "leading" ,
202203 rescale_betas_zero_snr : bool = False ,
203204 snr_shift_scale : float = 3.0 ,
204205 ):
@@ -208,7 +209,15 @@ def __init__(
208209 self .betas = torch .linspace (beta_start , beta_end , num_train_timesteps , dtype = torch .float32 )
209210 elif beta_schedule == "scaled_linear" :
210211 # this schedule is very specific to the latent diffusion model.
211- self .betas = torch .linspace (beta_start ** 0.5 , beta_end ** 0.5 , num_train_timesteps , dtype = torch .float64 ) ** 2
212+ self .betas = (
213+ torch .linspace (
214+ beta_start ** 0.5 ,
215+ beta_end ** 0.5 ,
216+ num_train_timesteps ,
217+ dtype = torch .float64 ,
218+ )
219+ ** 2
220+ )
212221 elif beta_schedule == "squaredcos_cap_v2" :
213222 # Glide cosine schedule
214223 self .betas = betas_for_alpha_bar (num_train_timesteps )
@@ -238,7 +247,7 @@ def __init__(
238247 self .num_inference_steps = None
239248 self .timesteps = torch .from_numpy (np .arange (0 , num_train_timesteps )[::- 1 ].copy ().astype (np .int64 ))
240249
241- def _get_variance (self , timestep , prev_timestep ) :
250+ def _get_variance (self , timestep : int , prev_timestep : int ) -> torch . Tensor :
242251 alpha_prod_t = self .alphas_cumprod [timestep ]
243252 alpha_prod_t_prev = self .alphas_cumprod [prev_timestep ] if prev_timestep >= 0 else self .final_alpha_cumprod
244253 beta_prod_t = 1 - alpha_prod_t
@@ -265,7 +274,11 @@ def scale_model_input(self, sample: torch.Tensor, timestep: Optional[int] = None
265274 """
266275 return sample
267276
268- def set_timesteps (self , num_inference_steps : int , device : Union [str , torch .device ] = None ):
277+ def set_timesteps (
278+ self ,
279+ num_inference_steps : int ,
280+ device : Optional [Union [str , torch .device ]] = None ,
281+ ) -> None :
269282 """
270283 Sets the discrete timesteps used for the diffusion chain (to be run before inference).
271284
@@ -317,7 +330,7 @@ def step(
317330 sample : torch .Tensor ,
318331 eta : float = 0.0 ,
319332 use_clipped_model_output : bool = False ,
320- generator = None ,
333+ generator : Optional [ torch . Generator ] = None ,
321334 variance_noise : Optional [torch .Tensor ] = None ,
322335 return_dict : bool = True ,
323336 ) -> Union [DDIMSchedulerOutput , Tuple ]:
@@ -328,7 +341,7 @@ def step(
328341 Args:
329342 model_output (`torch.Tensor`):
330343 The direct output from learned diffusion model.
331- timestep (`float `):
344+ timestep (`int `):
332345 The current discrete timestep in the diffusion chain.
333346 sample (`torch.Tensor`):
334347 A current instance of a sample created by the diffusion process.
@@ -487,5 +500,5 @@ def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: tor
487500 velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
488501 return velocity
489502
490- def __len__ (self ):
503+ def __len__ (self ) -> int :
491504 return self .config .num_train_timesteps
0 commit comments