Skip to content

Commit ef309a1

Browse files
Add KVAE 1.0 (#13033)
* add kvae2d * add kvae3d video * add docs for kvae2d and kvae3d video * style fixes * fix kvae3d docs * fix normalzation * fix kvae video for code style * fix kvae video * kvae minor fixes * add gradient ckpting for kvaes * get rid of inplace ops kvae video * add tests for KVAEs * kvae2d normalization style change * kvaes fix style * update dummy_pt_objects test for kvaes --------- Co-authored-by: YiYi Xu <yixu310@gmail.com>
1 parent b9761ce commit ef309a1

File tree

11 files changed

+2056
-0
lines changed

11 files changed

+2056
-0
lines changed

docs/source/en/_toctree.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,10 @@
446446
title: AutoencoderKLHunyuanVideo
447447
- local: api/models/autoencoder_kl_hunyuan_video15
448448
title: AutoencoderKLHunyuanVideo15
449+
- local: api/models/autoencoder_kl_kvae
450+
title: AutoencoderKLKVAE
451+
- local: api/models/autoencoder_kl_kvae_video
452+
title: AutoencoderKLKVAEVideo
449453
- local: api/models/autoencoderkl_audio_ltx_2
450454
title: AutoencoderKLLTX2Audio
451455
- local: api/models/autoencoderkl_ltx_2
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
<!-- Copyright 2025 The Kandinsky Team and The HuggingFace Team. All rights reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. -->
14+
15+
# AutoencoderKLKVAE
16+
17+
The 2D variational autoencoder (VAE) model with KL loss.
18+
19+
The model can be loaded with the following code snippet.
20+
21+
```python
22+
import torch
23+
from diffusers import AutoencoderKLKVAE
24+
25+
vae = AutoencoderKLKVAE.from_pretrained("kandinskylab/KVAE-2D-1.0", subfolder="diffusers", torch_dtype=torch.bfloat16)
26+
```
27+
28+
## AutoencoderKLKVAE
29+
30+
[[autodoc]] AutoencoderKLKVAE
31+
- decode
32+
- all
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
<!-- Copyright 2025 The Kandinsky Team and The HuggingFace Team. All rights reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. -->
14+
15+
# AutoencoderKLKVAEVideo
16+
17+
The 3D variational autoencoder (VAE) model with KL loss.
18+
19+
The model can be loaded with the following code snippet.
20+
21+
```python
22+
import torch
23+
from diffusers import AutoencoderKLKVAEVideo
24+
25+
vae = AutoencoderKLKVAEVideo.from_pretrained("kandinskylab/KVAE-3D-1.0", subfolder="diffusers", torch_dtype=torch.float16)
26+
```
27+
28+
## AutoencoderKLKVAEVideo
29+
30+
[[autodoc]] AutoencoderKLKVAEVideo
31+
- decode
32+
- all
33+

src/diffusers/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@
193193
"AutoencoderKLHunyuanImageRefiner",
194194
"AutoencoderKLHunyuanVideo",
195195
"AutoencoderKLHunyuanVideo15",
196+
"AutoencoderKLKVAE",
197+
"AutoencoderKLKVAEVideo",
196198
"AutoencoderKLLTX2Audio",
197199
"AutoencoderKLLTX2Video",
198200
"AutoencoderKLLTXVideo",
@@ -975,6 +977,8 @@
975977
AutoencoderKLHunyuanImageRefiner,
976978
AutoencoderKLHunyuanVideo,
977979
AutoencoderKLHunyuanVideo15,
980+
AutoencoderKLKVAE,
981+
AutoencoderKLKVAEVideo,
978982
AutoencoderKLLTX2Audio,
979983
AutoencoderKLLTX2Video,
980984
AutoencoderKLLTXVideo,

src/diffusers/models/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
_import_structure["autoencoders.autoencoder_kl_hunyuanimage"] = ["AutoencoderKLHunyuanImage"]
4141
_import_structure["autoencoders.autoencoder_kl_hunyuanimage_refiner"] = ["AutoencoderKLHunyuanImageRefiner"]
4242
_import_structure["autoencoders.autoencoder_kl_hunyuanvideo15"] = ["AutoencoderKLHunyuanVideo15"]
43+
_import_structure["autoencoders.autoencoder_kl_kvae"] = ["AutoencoderKLKVAE"]
44+
_import_structure["autoencoders.autoencoder_kl_kvae_video"] = ["AutoencoderKLKVAEVideo"]
4345
_import_structure["autoencoders.autoencoder_kl_ltx"] = ["AutoencoderKLLTXVideo"]
4446
_import_structure["autoencoders.autoencoder_kl_ltx2"] = ["AutoencoderKLLTX2Video"]
4547
_import_structure["autoencoders.autoencoder_kl_ltx2_audio"] = ["AutoencoderKLLTX2Audio"]
@@ -161,6 +163,8 @@
161163
AutoencoderKLHunyuanImageRefiner,
162164
AutoencoderKLHunyuanVideo,
163165
AutoencoderKLHunyuanVideo15,
166+
AutoencoderKLKVAE,
167+
AutoencoderKLKVAEVideo,
164168
AutoencoderKLLTX2Audio,
165169
AutoencoderKLLTX2Video,
166170
AutoencoderKLLTXVideo,

src/diffusers/models/autoencoders/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from .autoencoder_kl_hunyuanimage import AutoencoderKLHunyuanImage
1010
from .autoencoder_kl_hunyuanimage_refiner import AutoencoderKLHunyuanImageRefiner
1111
from .autoencoder_kl_hunyuanvideo15 import AutoencoderKLHunyuanVideo15
12+
from .autoencoder_kl_kvae import AutoencoderKLKVAE
13+
from .autoencoder_kl_kvae_video import AutoencoderKLKVAEVideo
1214
from .autoencoder_kl_ltx import AutoencoderKLLTXVideo
1315
from .autoencoder_kl_ltx2 import AutoencoderKLLTX2Video
1416
from .autoencoder_kl_ltx2_audio import AutoencoderKLLTX2Audio

0 commit comments

Comments
 (0)