-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathapp.py
More file actions
144 lines (122 loc) Β· 4.42 KB
/
app.py
File metadata and controls
144 lines (122 loc) Β· 4.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# improve & refs https://huggingface.co/spaces/multimodalart/stable-video-diffusion
import torch
import os
from glob import glob
from typing import Optional, Tuple
import random
import gradio as gr
from diffusers import StableVideoDiffusionPipeline
from diffusers.utils import export_to_video
from PIL import Image
max_64_bit_int = 2 ** 63 - 1
output_folder = "outputs"
os.makedirs(output_folder, exist_ok=True)
pipe = StableVideoDiffusionPipeline.from_pretrained(
"/app/models/stabilityai/stable-video-diffusion-img2vid-xt",
torch_dtype=torch.float16,
variant="fp16",
)
pipe.to("cuda")
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
# According to your actual needs
#
# pipe.enable_model_cpu_offload()
# pipe.unet.enable_forward_chunking()
def sample(
image: Image,
seed: Optional[int] = 42,
randomize_seed: bool = True,
motion_bucket_id: int = 127,
fps_id: int = 6,
version: str = "svd_xt",
cond_aug: float = 0.02,
decoding_t: int = 3, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
device: str = "cuda",
output_folder: str = output_folder,
):
if randomize_seed:
seed = random.randint(0, max_64_bit_int)
base_count = len(glob(os.path.join(output_folder, "*.mp4")))
video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
frames = pipe(
image,
decode_chunk_size=decoding_t,
generator=torch.manual_seed(seed),
motion_bucket_id=motion_bucket_id,
noise_aug_strength=0.1,
num_frames=25,
).frames[0]
export_to_video(frames, video_path, fps=fps_id)
return video_path, seed
def resize_image(image: Image, output_size: Tuple[int, int] =(1024, 576)):
# Calculate aspect ratios
target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
image_aspect = image.width / image.height # Aspect ratio of the original image
# Resize then crop if the original image is larger
if image_aspect > target_aspect:
# Resize the image to match the target height, maintaining aspect ratio
new_height = output_size[1]
new_width = int(new_height * image_aspect)
resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
# Calculate coordinates for cropping
left = (new_width - output_size[0]) / 2
top = 0
right = (new_width + output_size[0]) / 2
bottom = output_size[1]
else:
# Resize the image to match the target width, maintaining aspect ratio
new_width = output_size[0]
new_height = int(new_width / image_aspect)
resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
# Calculate coordinates for cropping
left = 0
top = (new_height - output_size[1]) / 2
right = output_size[0]
bottom = (new_height + output_size[1]) / 2
# Crop the image
cropped_image = resized_image.crop((left, top, right, bottom))
# set correct image mode
if cropped_image.mode == "RGBA":
cropped_image = cropped_image.convert("RGB")
return cropped_image
def generate(image, seed, randomize_seed, motion_bucket_id, fps_id):
img = resize_image(image, output_size=(1024, 576))
video, seed = sample(img, seed, randomize_seed, motion_bucket_id, fps_id)
return video, seed
app = gr.Interface(
fn=generate,
inputs=[
gr.Image(label="Upload your image", type="pil"),
gr.Slider(
label="Seed",
value=42,
randomize=True,
minimum=0,
maximum=max_64_bit_int,
step=1,
),
gr.Checkbox(label="Randomize seed", value=True),
gr.Slider(
label="Motion bucket id",
info="Controls how much motion to add/remove from the image",
value=127,
minimum=1,
maximum=255,
),
gr.Slider(
label="Frames per second",
info="The length of your video in seconds will be 25/fps",
value=6,
minimum=5,
maximum=30,
),
],
outputs=[
gr.PlayableVideo(label="Generated video"),
gr.Textbox(label="Seed", type="text"),
],
)
if __name__ == "__main__":
app.queue(max_size=2)
app.launch(share=False, server_name="0.0.0.0", ssl_verify=False)