Dream-wan2-2-faster-Pro

Runtime error

App Files Files Community

dream2589632147 commited on Oct 26

Commit

44a4bd9

verified ·

1 Parent(s): acd25ee

Update app.py

Browse files

Files changed (1) hide show

app.py +212 -126

app.py CHANGED Viewed

@@ -1,21 +1,34 @@
-import spaces
 import torch
 from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
 from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
 from diffusers.utils.export_utils import export_to_video
-import gradio as gr
-import tempfile
-import numpy as np
-from PIL import Image
-import random
-import gc
-from torchao.quantization import quantize_
-from torchao.quantization import Float8DynamicActivationFloat8WeightConfig, Int8WeightOnlyConfig
 # ------------------------
-# إعدادات النموذج
 # ------------------------
-MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
 MAX_DIM = 832
 MIN_DIM = 480
@@ -28,75 +41,17 @@ MAX_FRAMES_MODEL = 480
 MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
 MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
-# ------------------------
-# تحميل النموذج
-# ------------------------
-print("🔹 Loading model... Please wait, this may take a few minutes.")
-pipe = WanImageToVideoPipeline.from_pretrained(
-    MODEL_ID,
-    transformer=WanTransformer3DModel.from_pretrained(
-        'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
-        subfolder='transformer',
-        torch_dtype=torch.float16,
-        device_map='cuda'
-    ),
-    transformer_2=WanTransformer3DModel.from_pretrained(
-        'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
-        subfolder='transformer_2',
-        torch_dtype=torch.float16,
-        device_map='cuda'
-    ),
-    torch_dtype=torch.float16
-).to('cuda')
-pipe.load_lora_weights(
-    "Kijai/WanVideo_comfy",
-    weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
-    adapter_name="lightx2v"
-)
-pipe.load_lora_weights(
-    "Kijai/WanVideo_comfy",
-    weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
-    adapter_name="lightx2v_2",
-    load_into_transformer_2=True
-)
-pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1.0, 1.0])
-pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3.0, components=["transformer"])
-pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1.0, components=["transformer_2"])
-# لا نقوم بفك تحميل الـ LoRA بعد الدمج
-# ------------------------
-# كوانتاز اختياري (تسريع وتحسين الذاكرة)
-# ------------------------
-if torch.cuda.is_available():
-    try:
-        quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
-        quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
-        quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
-        print("✅ Quantization applied successfully.")
-    except Exception as e:
-        print(f"⚠️ Quantization skipped due to: {e}")
-# ------------------------
-# الموجهات الافتراضية
-# ------------------------
 default_prompt_i2v = (
-    "ultra realistic cinematic footage, perfectly preserved facial identity and body structure "
-    "across all frames, stable anatomy and consistent body proportions, realistic muscle definition, "
-    "natural motion flow and breathing dynamics, seamless motion continuity, photorealistic clothing "
-    "preservation with accurate fabric movement and lighting response, consistent outfit color and texture, "
-    "high-fidelity skin texture, detailed lighting and shadows"
 )
 default_negative_prompt = (
-    "low quality, low resolution, poor lighting, underexposed, overexposed, noise, flickering, artifacts, "
-    "stutter, inconsistent motion, broken motion, distorted face, changing face, unnatural anatomy"
 )
 # ------------------------
-# أدوات الصورة والفيديو
 # ------------------------
 def resize_image(image: Image.Image) -> Image.Image:
     width, height = image.size
@@ -107,106 +62,237 @@ def resize_image(image: Image.Image) -> Image.Image:
     MAX_AR = MAX_DIM / MIN_DIM
     MIN_AR = MIN_DIM / MAX_DIM
     if aspect_ratio > MAX_AR:
         crop_width = int(round(height * MAX_AR))
         left = (width - crop_width) // 2
-        image = image.crop((left, 0, left + crop_width, height))
     elif aspect_ratio < MIN_AR:
         crop_height = int(round(width / MIN_AR))
         top = (height - crop_height) // 2
-        image = image.crop((0, top, width, top + crop_height))
-    if width > height:
-        target_w = MAX_DIM
-        target_h = int(round(target_w / aspect_ratio))
     else:
-        target_h = MAX_DIM
-        target_w = int(round(target_h * aspect_ratio))
     final_w = max(MIN_DIM, min(MAX_DIM, round(target_w / MULTIPLE_OF) * MULTIPLE_OF))
     final_h = max(MIN_DIM, min(MAX_DIM, round(target_h / MULTIPLE_OF) * MULTIPLE_OF))
-    return image.resize((final_w, final_h), Image.LANCZOS)
 def get_num_frames(duration_seconds: float):
     return 1 + int(np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL))
 # ------------------------
-# عملية التوليد
 # ------------------------
-@spaces.GPU()
 def generate_video(
     input_image,
     prompt,
-    steps=4,
     negative_prompt=default_negative_prompt,
     duration_seconds=3.5,
     guidance_scale=1.0,
     guidance_scale_2=1.0,
     seed=42,
     randomize_seed=False,
-    progress=gr.Progress(track_tqdm=True)
 ):
-    if input_image is None:
-        raise gr.Error("⚠️ Please upload an input image first.")
-    num_frames = get_num_frames(duration_seconds)
-    current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
-    resized_image = resize_image(input_image)
-    with progress.tqdm(total=100) as pbar:
-        pbar.set_description("🎬 Generating video...")
-        output_frames_list = pipe(
-            image=resized_image,
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            height=resized_image.height,
-            width=resized_image.width,
-            num_frames=num_frames,
-            guidance_scale=float(guidance_scale),
-            guidance_scale_2=float(guidance_scale_2),
-            num_inference_steps=int(steps),
-            generator=torch.Generator(device="cuda").manual_seed(current_seed),
-        ).frames[0]
-    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
-        video_path = tmpfile.name
-    export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
-    torch.cuda.empty_cache()
-    gc.collect()
-    return video_path, current_seed
 # ------------------------
-# واجهة المستخدم
 # ------------------------
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet")) as demo:
     gr.HTML("""
     <div style="text-align:center; padding:20px;">
-        <h1 style="font-size: 2em;">Wan 2.2 Lightning Studio – AI Cinematic Video Generator</h1>
-        <p style="opacity:0.8;">⚡ Powered by dream2589632147</p>
     </div>
-    """)
     with gr.Row():
         with gr.Column(scale=1):
             input_image = gr.Image(label="🎞️ Input Image", type="pil")
             prompt = gr.Textbox(label="✨ Positive Prompt", value=default_prompt_i2v, lines=3)
             negative_prompt = gr.Textbox(label="🚫 Negative Prompt", value=default_negative_prompt, lines=3)
-            duration = gr.Slider(MIN_DURATION, MAX_DURATION, value=3.5, step=0.1, label="🎬 Duration (seconds)")
             with gr.Accordion("⚙️ Advanced Settings", open=False):
                 steps = gr.Slider(1, 30, value=6, step=1, label="Inference Steps")
                 guidance_scale = gr.Slider(0.0, 10.0, value=1.0, step=0.5, label="Guidance Scale 1")
                 guidance_scale_2 = gr.Slider(0.0, 10.0, value=1.0, step=0.5, label="Guidance Scale 2")
-                seed = gr.Slider(0, MAX_SEED, value=42, step=1, label="Seed")
-                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
             generate_btn = gr.Button("🚀 Generate Cinematic Video", variant="primary")
         with gr.Column(scale=1):
             video_output = gr.Video(label="🎬 Generated Video Preview", autoplay=True)
-            seed_output = gr.Textbox(label="🎲 Seed Used", interactive=False)
             download_btn = gr.File(label="⬇️ Download MP4")
     generate_btn.click(
         fn=generate_video,
         inputs=[input_image, prompt, steps, negative_prompt, duration,
@@ -214,7 +300,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet")) as demo:
         outputs=[video_output, seed_output]
     )
-    # زر تبديل الوضع الليلي/النهاري
     gr.HTML("""
     <script>
         const toggle = document.createElement('button');
@@ -232,7 +318,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet")) as demo:
     </script>
     """)
-    gr.Markdown("---\nMade with ❤️ using [Gradio](https://gradio.app) • Hosted on [Hugging Face Spaces](https://huggingface.co/spaces)")
 if __name__ == "__main__":
     demo.queue().launch()

+# app.py — Modified for dream2589632147/Dream-wan2-2-faster-Pro
+import os
+import tempfile
+import random
+import gc
+import traceback
+import numpy as np
+from PIL import Image
 import torch
+import gradio as gr
 from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
 from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
 from diffusers.utils.export_utils import export_to_video
+# Optional quantization (wrapped safely)
+try:
+    from torchao.quantization import quantize_
+    from torchao.quantization import Float8DynamicActivationFloat8WeightConfig, Int8WeightOnlyConfig
+    HAS_TORCHAO_QUANT = True
+except Exception:
+    HAS_TORCHAO_QUANT = False
 # ------------------------
+# Configuration
 # ------------------------
+MODEL_ID = "dream2589632147/Dream-wan2-2-faster-Pro"  # user's model
+# If your actual transformer checkpoint differs, update the following IDs accordingly:
+TRANSFORMER_BACKBONE = "cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers"
 MAX_DIM = 832
 MIN_DIM = 480
 MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
 MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
+# default prompts (shortened for readability — replace with your full prompts)
 default_prompt_i2v = (
+    "ultra realistic cinematic footage, perfectly preserved facial identity and body structure across all frames,"
+    " stable anatomy and consistent body proportions, realistic skin, photorealistic lighting"
 )
 default_negative_prompt = (
+    "low quality, low resolution, poor lighting, noise, flicker, artifact, changing face, inconsistent anatomy"
 )
 # ------------------------
+# Utility functions
 # ------------------------
 def resize_image(image: Image.Image) -> Image.Image:
     width, height = image.size
     MAX_AR = MAX_DIM / MIN_DIM
     MIN_AR = MIN_DIM / MAX_DIM
+    image_to_resize = image
     if aspect_ratio > MAX_AR:
         crop_width = int(round(height * MAX_AR))
         left = (width - crop_width) // 2
+        image_to_resize = image.crop((left, 0, left + crop_width, height))
     elif aspect_ratio < MIN_AR:
         crop_height = int(round(width / MIN_AR))
         top = (height - crop_height) // 2
+        image_to_resize = image.crop((0, top, width, top + crop_height))
     else:
+        if width > height:
+            target_w = MAX_DIM
+            target_h = int(round(target_w / aspect_ratio))
+        else:
+            target_h = MAX_DIM
+            target_w = int(round(target_h * aspect_ratio))
+    # ensure multiple-of constraint
     final_w = max(MIN_DIM, min(MAX_DIM, round(target_w / MULTIPLE_OF) * MULTIPLE_OF))
     final_h = max(MIN_DIM, min(MAX_DIM, round(target_h / MULTIPLE_OF) * MULTIPLE_OF))
+    return image_to_resize.resize((final_w, final_h), Image.LANCZOS)
 def get_num_frames(duration_seconds: float):
     return 1 + int(np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL))
 # ------------------------
+# Load pipeline (wrapped in try/except to provide clear messages)
 # ------------------------
+print("🔹 Loading pipeline. This can take a while...")
+try:
+    # Use float16 for compatibility with most GPUs (H200 should be OK)
+    transformer_kwargs = {
+        "subfolder": "transformer",
+        "torch_dtype": torch.float16,
+        "device_map": "cuda"
+    }
+    transformer_2_kwargs = {
+        "subfolder": "transformer_2",
+        "torch_dtype": torch.float16,
+        "device_map": "cuda"
+    }
+    transformer = WanTransformer3DModel.from_pretrained(TRANSFORMER_BACKBONE, **transformer_kwargs)
+    transformer_2 = WanTransformer3DModel.from_pretrained(TRANSFORMER_BACKBONE, **transformer_2_kwargs)
+    pipe = WanImageToVideoPipeline.from_pretrained(
+        MODEL_ID,
+        transformer=transformer,
+        transformer_2=transformer_2,
+        torch_dtype=torch.float16,
+    ).to("cuda")
+    print("✅ Pipeline loaded successfully.")
+    # Attempt to load LoRA adapters if available — wrapped for safety
+    try:
+        pipe.load_lora_weights(
+            "Kijai/WanVideo_comfy",
+            weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
+            adapter_name="lightx2v"
+        )
+        pipe.load_lora_weights(
+            "Kijai/WanVideo_comfy",
+            weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
+            adapter_name="lightx2v_2",
+            load_into_transformer_2=True
+        )
+        pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1.0, 1.0])
+        pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3.0, components=["transformer"])
+        pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1.0, components=["transformer_2"])
+        print("✅ LoRA adapters loaded and fused.")
+    except Exception as e:
+        print(f"⚠️ Could not load/fuse LoRA adapters: {e}")
+    # Optional quantization if torcha0 is installed and CUDA available
+    if torch.cuda.is_available() and HAS_TORCHAO_QUANT:
+        try:
+            quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
+            quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
+            quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
+            print("✅ Quantization applied.")
+        except Exception as e:
+            print(f"⚠️ Quantization skipped: {e}")
+    else:
+        if not HAS_TORCHAO_QUANT:
+            print("ℹ️ torchao.quantization not available; skipping quantization.")
+        else:
+            print("ℹ️ CUDA not available; skipping quantization.")
+except Exception as e:
+    print("❌ Failed to initialize pipeline. Full traceback:")
+    traceback.print_exc()
+    # It's OK to keep running the app; generate_video will catch missing pipe and return an error to UI
+    pipe = None
+# ------------------------
+# Video generation function
+# ------------------------
+@spaces.GPU() if hasattr(globals().get("spaces", None), "GPU") else (lambda f: f)
 def generate_video(
     input_image,
     prompt,
+    steps=6,
     negative_prompt=default_negative_prompt,
     duration_seconds=3.5,
     guidance_scale=1.0,
     guidance_scale_2=1.0,
     seed=42,
     randomize_seed=False,
+    progress=gr.Progress()  # injected Gradio progress (use correctly)
 ):
+    """
+    Returns: (video_path_for_preview, seed_used)
+    """
+    try:
+        if pipe is None:
+            return gr.update(value=None), "Error: pipeline not initialized on backend."
+        if input_image is None:
+            raise gr.Error("Please upload an input image.")
+        # Prepare
+        num_frames = get_num_frames(duration_seconds)
+        current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
+        resized_image = resize_image(input_image.convert("RGB"))
+        # Use the GRADIO progress context correctly
+        # NOTE: progress is an object returned by gr.Progress(); calling progress() returns context manager
+        with progress() as pbar:
+            pbar(0, desc="Starting generation...")
+            # Stage 1 — generate frames
+            pbar(10, desc="Running diffusion pipeline (prepare)...")
+            gen = torch.Generator(device="cuda").manual_seed(current_seed)
+            # Call pipeline (this is the heavy op)
+            pbar(20, desc="Generating frames (this may take a while)...")
+            result = pipe(
+                image=resized_image,
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                height=resized_image.height,
+                width=resized_image.width,
+                num_frames=num_frames,
+                guidance_scale=float(guidance_scale),
+                guidance_scale_2=float(guidance_scale_2),
+                num_inference_steps=int(steps),
+                generator=gen,
+            )
+            # result.frames shape depends on implementation; we expect list-like of frames
+            frames_list = None
+            try:
+                frames_list = result.frames[0]
+            except Exception:
+                # fallback: if result itself is a list or has frames attribute differently
+                if hasattr(result, "frames"):
+                    frames_list = result.frames
+                else:
+                    frames_list = result  # last resort
+            if frames_list is None:
+                raise RuntimeError("Pipeline returned no frames.")
+            pbar(70, desc="Encoding frames to MP4...")
+            # Save to temp file
+            with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
+                video_path = tmpfile.name
+            export_to_video(frames_list, video_path, fps=FIXED_FPS)
+            pbar(95, desc="Finalizing and cleaning memory...")
+            # cleanup
+            try:
+                torch.cuda.synchronize()
+            except Exception:
+                pass
+            torch.cuda.empty_cache()
+            gc.collect()
+            pbar(100, desc="Done!")
+        # Return path for gr.Video and the seed used (seed as string)
+        return video_path, str(current_seed)
+    except gr.Error as ge:
+        # expected user-facing error
+        return None, f"Input error: {ge}"
+    except Exception as e:
+        # log full traceback server-side
+        traceback_str = traceback.format_exc()
+        print("Error during generation:\n", traceback_str)
+        # return error message to UI (do not leak sensitive internals)
+        return None, f"Generation failed: {e}"
 # ------------------------
+# Gradio UI
 # ------------------------
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet")) as demo:
     gr.HTML("""
     <div style="text-align:center; padding:20px;">
+        <h1 style="font-size: 1.6em;">Dream Wan2.2 — Video Generator (wan2-2-faster-Pro)</h1>
+        <p style="opacity:0.8;">Model: {}</p>
     </div>
+    """.format(MODEL_ID))
     with gr.Row():
         with gr.Column(scale=1):
             input_image = gr.Image(label="🎞️ Input Image", type="pil")
             prompt = gr.Textbox(label="✨ Positive Prompt", value=default_prompt_i2v, lines=3)
             negative_prompt = gr.Textbox(label="🚫 Negative Prompt", value=default_negative_prompt, lines=3)
+            duration = gr.Slider(MIN_DURATION, MAX_DURATION, value=3.5, step=0.1, label="Duration (seconds)")
             with gr.Accordion("⚙️ Advanced Settings", open=False):
                 steps = gr.Slider(1, 30, value=6, step=1, label="Inference Steps")
                 guidance_scale = gr.Slider(0.0, 10.0, value=1.0, step=0.5, label="Guidance Scale 1")
                 guidance_scale_2 = gr.Slider(0.0, 10.0, value=1.0, step=0.5, label="Guidance Scale 2")
+                seed = gr.Number(value=42, label="Seed", precision=0)
+                randomize_seed = gr.Checkbox(label="Randomize Seed", value=False)
             generate_btn = gr.Button("🚀 Generate Cinematic Video", variant="primary")
         with gr.Column(scale=1):
             video_output = gr.Video(label="🎬 Generated Video Preview", autoplay=True)
+            seed_output = gr.Textbox(label="🎲 Seed Used / Status", interactive=False)
             download_btn = gr.File(label="⬇️ Download MP4")
+    # Wire up the button: outputs -> (video preview, seed/status)
     generate_btn.click(
         fn=generate_video,
         inputs=[input_image, prompt, steps, negative_prompt, duration,
         outputs=[video_output, seed_output]
     )
+    # Toggle theme script (kept from your original)
     gr.HTML("""
     <script>
         const toggle = document.createElement('button');
     </script>
     """)
+    gr.Markdown("---\nMade with ❤️ using Gradio • Hosted on Spaces")
 if __name__ == "__main__":
     demo.queue().launch()