import os import torch import random import math import time from datetime import datetime from typing import Union, List from huggingface_hub import hf_hub_download import numpy as np import PIL.Image from diffusers import CogVideoXPipeline, CogVideoXDDIMScheduler, CogVideoXDPMScheduler from diffusers.utils import export_to_video import moviepy.editor as mp def download_file(repo_id, filename, subfolder): return hf_hub_download(repo_id=repo_id, filename=filename, subfolder=subfolder) def convert_to_gif(video_path): clip = mp.VideoFileClip(video_path) clip = clip.set_fps(8) clip = clip.resize(height=240) gif_path = video_path.replace(".mp4", ".gif") clip.write_gif(gif_path, fps=8) return gif_path def save_video(tensor: Union[List[np.ndarray], List[PIL.Image.Image]], fps: int = 8): timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") video_path = f"{timestamp}.mp4" export_to_video(tensor, video_path, fps=fps) return video_path # Downloading necessary files scheduler_config_path = download_file("vdo/CogVideoX-5b", "scheduler_config.json", "scheduler") text_encoder_config_path = download_file("vdo/CogVideoX-5b", "config.json", "text_encoder") text_encoder_model_1_path = download_file("vdo/CogVideoX-5b", "model-00001-of-00002.safetensors", "text_encoder") text_encoder_model_2_path = download_file("vdo/CogVideoX-5b", "model-00002-of-00002.safetensors", "text_encoder") text_encoder_index_path = download_file("vdo/CogVideoX-5b", "model.safetensors.index.json", "text_encoder") tokenizer_added_tokens_path = download_file("vdo/CogVideoX-5b", "added_tokens.json", "tokenizer") tokenizer_special_tokens_map_path = download_file("vdo/CogVideoX-5b", "special_tokens_map.json", "tokenizer") tokenizer_model_path = download_file("vdo/CogVideoX-5b", "spiece.model", "tokenizer") tokenizer_config_path = download_file("vdo/CogVideoX-5b", "tokenizer_config.json", "tokenizer") transformer_config_path = download_file("vdo/CogVideoX-5b", "config.json", "transformer") transformer_model_1_path = download_file("vdo/CogVideoX-5b", "diffusion_pytorch_model-00001-of-00002.safetensors", "transformer") transformer_model_2_path = download_file("vdo/CogVideoX-5b", "diffusion_pytorch_model-00002-of-00002.safetensors", "transformer") transformer_index_path = download_file("vdo/CogVideoX-5b", "diffusion_pytorch_model.safetensors.index.json", "transformer") vae_config_path = download_file("vdo/CogVideoX-5b", "config.json", "vae") vae_model_path = download_file("vdo/CogVideoX-5b", "diffusion_pytorch_model.safetensors", "vae") configuration_path = download_file("vdo/CogVideoX-5b", "configuration.json", "") model_index_path = download_file("vdo/CogVideoX-5b", "model_index.json", "") pipe = CogVideoXPipeline.from_pretrained("/content/CogVideoX-5b", torch_dtype=torch.float16) pipe.enable_model_cpu_offload() pipe.enable_sequential_cpu_offload() pipe.vae.enable_slicing() pipe.vae.enable_tiling() prompt = "A golden retriever, sporting sleek black sunglasses, with its lengthy fur flowing in the breeze, sprints playfully across a rooftop terrace, recently refreshed by a light rain. The scene unfolds from a distance, the dog's energetic bounds growing larger as it approaches the camera, its tail wagging with unrestrained joy, while droplets of water glisten on the concrete behind it. The overcast sky provides a dramatic backdrop, emphasizing the vibrant golden coat of the canine as it dashes towards the viewer." seed = 0 if seed == 0: random.seed(int(time.time())) seed = random.randint(0, 18446744073709551615) print(seed) with torch.inference_mode(): video_pt = pipe( prompt=prompt, num_videos_per_prompt=1, num_inference_steps=50, num_frames=49, use_dynamic_cfg=True, output_type="pt", guidance_scale=7.0, generator=torch.Generator(device="cpu").manual_seed(seed), ).frames batch_size = video_pt.shape[0] batch_video_frames = [] for batch_idx in range(batch_size): pt_image = video_pt[batch_idx] pt_image = torch.stack([pt_image[i] for i in range(pt_image.shape[0])]) image_np = VaeImageProcessor.pt_to_numpy(pt_image) image_pil = VaeImageProcessor.numpy_to_pil(image_np) batch_video_frames.append(image_pil) video_path = save_video(batch_video_frames[0], fps=math.ceil((len(batch_video_frames[0]) - 1) / 6)) gif_path = convert_to_gif(video_path)