Spaces:

NitinBot001
/

Text_Image-to-Video

Runtime error

App Files Files Community

NitinBot001 commited on Jun 19

Commit

7ee2d3d

verified ·

1 Parent(s): 9fdff0d

Create app.py

Browse files

Files changed (1) hide show

app.py +352 -0

app.py ADDED Viewed

	@@ -0,0 +1,352 @@

+import gradio as gr
+import torch
+from diffusers import LTXVideoTransformer3DModel, LTXVideoPipeline
+from transformers import T5EncoderModel, T5Tokenizer
+import spaces
+import numpy as np
+import tempfile
+import os
+import time
+import logging
+from PIL import Image
+import cv2
+from fastapi import FastAPI, File, UploadFile, Form, HTTPException
+from fastapi.responses import FileResponse
+import uvicorn
+import threading
+import json
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Global variables for model
+pipe = None
+device = "cuda" if torch.cuda.is_available() else "cpu"
+def load_model():
+    """Load the LTX-Video model with optimizations"""
+    global pipe
+    try:
+        logger.info("Loading LTX-Video model...")
+        # Load the pipeline
+        pipe = LTXVideoPipeline.from_pretrained(
+            "Lightricks/LTX-Video-0.9.7-dev",
+            torch_dtype=torch.bfloat16,
+            use_safetensors=True
+        )
+        # Move to device
+        pipe = pipe.to(device)
+        # Enable optimizations
+        pipe.vae.enable_tiling()
+        pipe.vae.enable_slicing()
+        # Enable memory efficient attention if available
+        if hasattr(pipe.unet, 'enable_xformers_memory_efficient_attention'):
+            pipe.unet.enable_xformers_memory_efficient_attention()
+        logger.info("Model loaded successfully!")
+        return True
+    except Exception as e:
+        logger.error(f"Error loading model: {e}")
+        return False
+def validate_inputs(prompt, duration, image=None):
+    """Validate input parameters"""
+    errors = []
+    if not prompt or len(prompt.strip()) == 0:
+        errors.append("Prompt is required")
+    if len(prompt) > 500:
+        errors.append("Prompt must be less than 500 characters")
+    if duration < 3 or duration > 5:
+        errors.append("Duration must be between 3 and 5 seconds")
+    if image is not None:
+        try:
+            if isinstance(image, str):
+                img = Image.open(image)
+            else:
+                img = image
+            # Check image dimensions
+            width, height = img.size
+            if width > 1024 or height > 1024:
+                errors.append("Image dimensions must be less than 1024x1024")
+        except Exception as e:
+            errors.append(f"Invalid image: {str(e)}")
+    return errors
+def frames_to_video(frames, output_path, fps=24):
+    """Convert frames to video using OpenCV"""
+    try:
+        height, width = frames[0].shape[:2]
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+        for frame in frames:
+            # Convert RGB to BGR for OpenCV
+            frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+            out.write(frame_bgr)
+        out.release()
+        return True
+    except Exception as e:
+        logger.error(f"Error creating video: {e}")
+        return False
+@spaces.GPU(duration=60)
+def generate_video_core(prompt, negative_prompt="", duration=4, image=None):
+    """Core video generation function with ZeroGPU decorator"""
+    global pipe
+    start_time = time.time()
+    try:
+        # Calculate number of frames (24 FPS)
+        num_frames = int(duration * 24)
+        # Prepare generation parameters
+        generation_kwargs = {
+            "prompt": prompt,
+            "negative_prompt": negative_prompt,
+            "num_frames": num_frames,
+            "height": 512,
+            "width": 768,
+            "num_inference_steps": 30,
+            "guidance_scale": 7.5,
+            "generator": torch.Generator(device=device).manual_seed(42)
+        }
+        # Add image if provided
+        if image is not None:
+            if isinstance(image, str):
+                image = Image.open(image)
+            # Resize image to match output dimensions
+            image = image.resize((768, 512), Image.Resampling.LANCZOS)
+            generation_kwargs["image"] = image
+        logger.info(f"Starting generation with {num_frames} frames...")
+        # Generate video
+        with torch.inference_mode():
+            result = pipe(**generation_kwargs)
+        # Get the generated frames
+        frames = result.frames[0]  # First (and only) video in batch
+        # Convert to numpy arrays if needed
+        if torch.is_tensor(frames):
+            frames = frames.cpu().numpy()
+        # Ensure frames are in the right format (0-255 uint8)
+        if frames.dtype != np.uint8:
+            frames = (frames * 255).astype(np.uint8)
+        # Create temporary video file
+        temp_dir = tempfile.mkdtemp()
+        video_path = os.path.join(temp_dir, "generated_video.mp4")
+        # Convert frames to video
+        success = frames_to_video(frames, video_path, fps=24)
+        if not success:
+            raise Exception("Failed to create video file")
+        generation_time = time.time() - start_time
+        logger.info(f"Video generated successfully in {generation_time:.2f} seconds")
+        return video_path, f"Generated in {generation_time:.2f}s"
+    except Exception as e:
+        logger.error(f"Error generating video: {e}")
+        raise Exception(f"Generation failed: {str(e)}")
+def generate_video_gradio(prompt, negative_prompt, duration, image):
+    """Gradio interface wrapper"""
+    try:
+        # Validate inputs
+        errors = validate_inputs(prompt, duration, image)
+        if errors:
+            return None, f"Validation errors: {'; '.join(errors)}"
+        # Check if model is loaded
+        if pipe is None:
+            return None, "Model not loaded. Please wait for initialization."
+        # Generate video
+        video_path, status = generate_video_core(prompt, negative_prompt, duration, image)
+        return video_path, status
+    except Exception as e:
+        logger.error(f"Gradio generation error: {e}")
+        return None, f"Error: {str(e)}"
+# Create Gradio interface
+def create_gradio_interface():
+    with gr.Blocks(title="LTX-Video Generator", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# 🎬 LTX-Video Generator")
+        gr.Markdown("Generate 3-5 second videos using the LTX-Video model from Lightricks")
+        with gr.Row():
+            with gr.Column(scale=1):
+                # Input controls
+                image_input = gr.File(
+                    label="Input Image (Optional)",
+                    file_types=[".png", ".jpg", ".jpeg"],
+                    type="filepath"
+                )
+                prompt_input = gr.Textbox(
+                    label="Prompt",
+                    placeholder="Describe the video you want to generate...",
+                    lines=3,
+                    max_lines=5
+                )
+                negative_prompt_input = gr.Textbox(
+                    label="Negative Prompt (Optional)",
+                    placeholder="What you don't want in the video...",
+                    lines=2,
+                    max_lines=3
+                )
+                duration_slider = gr.Slider(
+                    minimum=3,
+                    maximum=5,
+                    value=4,
+                    step=0.5,
+                    label="Duration (seconds)"
+                )
+                generate_btn = gr.Button("🎬 Generate Video", variant="primary")
+                gr.Markdown("**Estimated time:** 4-6 seconds")
+            with gr.Column(scale=1):
+                # Output controls
+                video_output = gr.Video(label="Generated Video")
+                status_output = gr.Textbox(label="Status", interactive=False)
+        # Event handlers
+        generate_btn.click(
+            fn=generate_video_gradio,
+            inputs=[prompt_input, negative_prompt_input, duration_slider, image_input],
+            outputs=[video_output, status_output]
+        )
+        # Examples
+        gr.Examples(
+            examples=[
+                ["A cat playing with a ball of yarn", "", 4, None],
+                ["Ocean waves crashing on a beach at sunset", "", 3, None],
+                ["A person walking through a forest", "blurry, low quality", 5, None],
+            ],
+            inputs=[prompt_input, negative_prompt_input, duration_slider, image_input]
+        )
+    return demo
+# FastAPI setup
+app = FastAPI(title="LTX-Video API", description="Generate videos using LTX-Video model")
+@app.post("/generate_video")
+async def api_generate_video(
+    prompt: str = Form(..., description="Text prompt for video generation"),
+    negative_prompt: str = Form("", description="Negative prompt (optional)"),
+    duration: float = Form(4.0, description="Duration in seconds (3-5)"),
+    image: UploadFile = File(None, description="Input image (optional)")
+):
+    """Generate video via API"""
+    try:
+        # Validate inputs
+        image_path = None
+        if image:
+            # Save uploaded image temporarily
+            temp_dir = tempfile.mkdtemp()
+            image_path = os.path.join(temp_dir, image.filename)
+            with open(image_path, "wb") as f:
+                content = await image.read()
+                f.write(content)
+        errors = validate_inputs(prompt, duration, image_path)
+        if errors:
+            raise HTTPException(status_code=400, detail={"errors": errors})
+        if pipe is None:
+            raise HTTPException(status_code=503, detail="Model not loaded")
+        # Generate video
+        video_path, status = generate_video_core(prompt, negative_prompt, duration, image_path)
+        # Return video file
+        return FileResponse(
+            video_path,
+            media_type="video/mp4",
+            filename=f"generated_video_{int(time.time())}.mp4"
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"API generation error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/")
+async def root():
+    """API documentation"""
+    return {
+        "message": "LTX-Video API",
+        "endpoints": {
+            "/generate_video": "POST - Generate video",
+            "/docs": "GET - API documentation"
+        },
+        "curl_example": """
+curl -X POST "http://localhost:7860/generate_video" \\
+  -F "prompt=A cat playing with a ball" \\
+  -F "duration=4" \\
+  -F "negative_prompt=blurry" \\
+  -F "image=@your_image.jpg" \\
+  --output generated_video.mp4
+        """
+    }
+def run_api():
+    """Run FastAPI server"""
+    uvicorn.run(app, host="0.0.0.0", port=7861, log_level="info")
+def main():
+    """Main function"""
+    # Load model
+    logger.info("Initializing LTX-Video Generator...")
+    model_loaded = load_model()
+    if not model_loaded:
+        logger.error("Failed to load model. Exiting.")
+        return
+    # Create Gradio interface
+    demo = create_gradio_interface()
+    # Start API server in a separate thread
+    api_thread = threading.Thread(target=run_api, daemon=True)
+    api_thread.start()
+    logger.info("API server started on http://localhost:7861")
+    # Launch Gradio interface
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_api=False
+    )
+if __name__ == "__main__":
+    main()