Spaces:

CharlieAmalet
/

SVD-XT-1.1

Runtime error

App Files Files Community

SVD-XT-1.1 / app.py

CharlieAmalet

For an unknown reason, the space becomes extremely slow -> rollback (#2)

7231c46 verified 3 months ago

raw

history blame contribute delete

No virus

6.43 kB

	import torch._dynamo
	torch._dynamo.config.suppress_errors = True

	import torch
	import gradio as gr
	import os
	import base64
	from glob import glob
	from pathlib import Path
	from typing import Optional

	from diffusers import StableVideoDiffusionPipeline
	from diffusers.utils import load_image, export_to_video
	from PIL import Image

	import uuid
	import random
	from huggingface_hub import login, hf_hub_download
	import spaces

	model_directory = './checkpoints'

	try:
	hf_hub_download(repo_id="vdo/stable-video-diffusion-img2vid-xt-1-1", filename="svd_xt_1_1.safetensors", local_dir=model_directory, cache_dir=model_directory)
	except (Exception, BaseException) as error:
	print(error)

	# pipe = StableVideoDiffusionPipeline.from_pretrained(
	# # "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
	# "vdo/stable-video-diffusion-img2vid-xt-1-1",
	# torch_dtype=torch.float16,
	# variant="fp16"
	# )

	# pipe.save_pretrained("./checkpoints", variant="fp16")

	if not os.path.exists(model_directory):
	pipe = StableVideoDiffusionPipeline.from_pretrained(
	# "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
	"vdo/stable-video-diffusion-img2vid-xt-1-1",
	torch_dtype=torch.float16,
	variant="fp16"
	)
	pipe.save_pretrained("./checkpoints", variant="fp16")
	else:
	try:
	pipe = StableVideoDiffusionPipeline.from_pretrained(
	model_directory,
	torch_dtype=torch.float16,
	variant="fp16"
	)
	except:
	pipe = StableVideoDiffusionPipeline.from_pretrained(
	# "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
	"vdo/stable-video-diffusion-img2vid-xt-1-1",
	torch_dtype=torch.float16,
	variant="fp16"
	)
	pipe.save_pretrained("./checkpoints", variant="fp16")

	# device = "cuda" if torch.cuda.is_available() else "cpu"
	# pipe.to(device)
	# pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
	#pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)

	max_64_bit_int = 2**63 - 1

	@spaces.GPU(enable_queue=True, duration=240)
	def generate_video(
	image: Image,
	seed: int,
	motion_bucket_id: int = 127,
	fps_id: int = 6,
	version: str = "svd_xt",
	cond_aug: float = 0.02,
	decoding_t: int = 3, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
	device: str = "cuda",
	output_folder: str = "outputs",
	):
	global pipe
	device = "cuda" if torch.cuda.is_available() else "cpu"
	pipe.to(device)
	# note julian: normally we should resize input images, but normally they are already in 1024x576, so..

	# also, I would like to experiment with vertical videos, and 1024x512 videos
	image = resize_image(image)

	if image.mode == "RGBA":
	image = image.convert("RGB")

	generator = torch.manual_seed(seed)

	os.makedirs(output_folder, exist_ok=True)
	base_count = len(glob(os.path.join(output_folder, "*.mp4")))
	video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")

	# pipe.to("cuda")
	frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
	export_to_video(frames, video_path, fps=fps_id)
	torch.manual_seed(seed)

	# Read the content of the video file and encode it to base64
	# with open(video_path, "rb") as video_file:
	# video_base64 = base64.b64encode(video_file.read()).decode('utf-8')

	# Prepend the appropriate data URI header with MIME type
	# video_data_uri = 'data:video/mp4;base64,' + video_base64

	# clean-up (otherwise there is a risk of "ghosting", eg. someone seeing the previous generated video",
	# of one of the steps go wrong)
	# os.remove(video_path)

	# return video_data_uri
	return video_path

	def resize_image(image, output_size=(1024, 576)):
	# Calculate aspect ratios
	target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
	image_aspect = image.width / image.height # Aspect ratio of the original image

	# Resize then crop if the original image is larger
	if image_aspect > target_aspect:
	# Resize the image to match the target height, maintaining aspect ratio
	new_height = output_size[1]
	new_width = int(new_height * image_aspect)
	resized_image = image.resize((new_width, new_height), Image.LANCZOS)
	# Calculate coordinates for cropping
	left = (new_width - output_size[0]) / 2
	top = 0
	right = (new_width + output_size[0]) / 2
	bottom = output_size[1]
	else:
	# Resize the image to match the target width, maintaining aspect ratio
	new_width = output_size[0]
	new_height = int(new_width / image_aspect)
	resized_image = image.resize((new_width, new_height), Image.LANCZOS)
	# Calculate coordinates for cropping
	left = 0
	top = (new_height - output_size[1]) / 2
	right = output_size[0]
	bottom = (new_height + output_size[1]) / 2

	# Crop the image
	cropped_image = resized_image.crop((left, top, right, bottom))
	return cropped_image


	css = """
	img, video {
	max-height: 400px;
	object-fit: contain;
	}
	video {
	margin: 0 auto
	}
	"""

	with gr.Blocks(css=css) as SVD_XT_1_1:
	with gr.Row():
	with gr.Column():
	image = gr.Image(label="Upload your image", type="pil")
	generate_btn = gr.Button("Generate")
	# base64_out = gr.Textbox(label="Base64 Video")
	seed = gr.Slider(label="Seed", value=42, randomize=False, minimum=0, maximum=max_64_bit_int, step=1)
	motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
	fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, minimum=5, maximum=30)

	with gr.Column():
	video_out = gr.Video(
	autoplay=True,
	# height=512,
	# width=512,
	# elem_id="video_output"
	)

	generate_btn.click(
	fn=generate_video,
	inputs=[image, seed, motion_bucket_id, fps_id],
	outputs=video_out,
	api_name="run"
	)

	SVD_XT_1_1.launch()