WaveCut commited on
Commit
ea4fe4b
·
verified ·
1 Parent(s): c82bf27

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. app.py +128 -0
  2. config.py +5 -0
  3. model_handler.py +68 -0
  4. requirements.txt +21 -0
  5. utils.py +8 -0
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from model_handler import ModelHandler
4
+ from utils import get_random_seed
5
+
6
+ # Initialize the model handler
7
+ # We initialize it here to load the model when the app starts
8
+ model_handler = ModelHandler()
9
+
10
+ def generate(
11
+ prompt,
12
+ negative_prompt,
13
+ width,
14
+ height,
15
+ steps,
16
+ guidance_scale,
17
+ seed,
18
+ progress=gr.Progress()
19
+ ):
20
+ """
21
+ Wrapper function to call the model inference.
22
+ """
23
+ if seed < 0:
24
+ seed = get_random_seed()
25
+
26
+ try:
27
+ image = model_handler.infer(
28
+ prompt=prompt,
29
+ negative_prompt=negative_prompt,
30
+ width=width,
31
+ height=height,
32
+ num_inference_steps=steps,
33
+ guidance_scale=guidance_scale,
34
+ seed=seed,
35
+ progress_callback=progress
36
+ )
37
+ return image, seed
38
+ except Exception as e:
39
+ raise gr.Error(f"Generation failed: {str(e)}")
40
+
41
+ # CSS for custom styling
42
+ css = """
43
+ .container { max-width: 900px; margin: auto; }
44
+ .header { text-align: center; margin-bottom: 20px; }
45
+ .header h1 { font-size: 2.5rem; font-weight: bold; color: #333; }
46
+ .header p { font-size: 1.1rem; color: #666; }
47
+ .footer { text-align: center; margin-top: 20px; font-size: 0.9rem; }
48
+ """
49
+
50
+ # Create the Gradio Interface
51
+ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
52
+
53
+ with gr.Column(elem_classes="container"):
54
+ # Header
55
+ with gr.Column(elem_classes="header"):
56
+ gr.Markdown(
57
+ """
58
+ # Kandinsky 5.0 Lite T2I (SFT)
59
+ ### Text-to-Image Generation
60
+ """
61
+ )
62
+ gr.Markdown("[Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
63
+
64
+ # Status info for hardware
65
+ device_info = "Running on **GPU** 🚀" if torch.cuda.is_available() else "Running on **CPU** ⚠️ (Inference will be slow)"
66
+ gr.Markdown(device_info)
67
+
68
+ with gr.Row():
69
+ # Left Column: Inputs
70
+ with gr.Column(scale=1):
71
+ prompt = gr.Textbox(
72
+ label="Prompt",
73
+ placeholder="Describe the image you want to generate...",
74
+ lines=3,
75
+ autofocus=True
76
+ )
77
+
78
+ negative_prompt = gr.Textbox(
79
+ label="Negative Prompt",
80
+ placeholder="Low quality, bad anatomy, blurry...",
81
+ lines=2,
82
+ value="low quality, bad anatomy, worst quality, deformed, disfigured"
83
+ )
84
+
85
+ with gr.Accordion("Advanced Settings", open=False):
86
+ with gr.Row():
87
+ width = gr.Slider(label="Width", minimum=256, maximum=1024, step=64, value=1024)
88
+ height = gr.Slider(label="Height", minimum=256, maximum=1024, step=64, value=1024)
89
+
90
+ steps = gr.Slider(label="Inference Steps", minimum=10, maximum=100, step=1, value=25)
91
+ guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=20.0, step=0.5, value=7.5)
92
+
93
+ with gr.Row():
94
+ seed = gr.Number(label="Seed", value=-1, precision=0, info="Set to -1 for random")
95
+ random_btn = gr.Button("🎲 Randomize", size="sm", variant="secondary")
96
+
97
+ run_btn = gr.Button("Generate Image", variant="primary", size="lg")
98
+
99
+ # Right Column: Output
100
+ with gr.Column(scale=1):
101
+ result_image = gr.Image(label="Generated Image", type="pil", interactive=False)
102
+ used_seed = gr.Number(label="Seed Used", interactive=False)
103
+
104
+ # Event Handlers
105
+ run_btn.click(
106
+ fn=generate,
107
+ inputs=[prompt, negative_prompt, width, height, steps, guidance_scale, seed],
108
+ outputs=[result_image, used_seed]
109
+ )
110
+
111
+ # Helper to randomize seed input visually
112
+ random_btn.click(lambda: -1, outputs=seed)
113
+
114
+ # Examples
115
+ gr.Examples(
116
+ examples=[
117
+ ["A futuristic cityscape with neon lights and flying cars, cyberpunk style, high detail", "low quality, blurry", 1024, 1024, 25, 7.5],
118
+ ["A cute red panda drinking coffee in a cozy cafe, digital art", "deformed, ugly", 1024, 1024, 25, 7.0],
119
+ ["Portrait of a warrior princess, intricate armor, dramatic lighting, photorealistic", "cartoon, sketch, monochrome", 1024, 1024, 30, 8.0]
120
+ ],
121
+ inputs=[prompt, negative_prompt, width, height, steps, guidance_scale],
122
+ fn=generate,
123
+ outputs=[result_image, used_seed],
124
+ cache_examples=False
125
+ )
126
+
127
+ if __name__ == "__main__":
128
+ demo.launch()
config.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Configuration settings (Optional, but good practice)
2
+ MODEL_ID = "kandinskylab/Kandinsky-5.0-T2I-Lite-sft-Diffusers"
3
+ MAX_IMAGE_SIZE = 1024
4
+ DEFAULT_STEPS = 50
5
+ DEFAULT_GUIDANCE = 3.5
model_handler.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from diffusers import AutoPipelineForTextToImage
3
+ import os
4
+
5
+ class ModelHandler:
6
+ def __init__(self):
7
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
8
+ self.model_id = "kandinskylab/Kandinsky-5.0-T2I-Lite-sft-Diffusers"
9
+ self.pipeline = None
10
+ self.load_model()
11
+
12
+ def load_model(self):
13
+ """
14
+ Loads the model pipeline. Uses float16 for GPU to save memory.
15
+ """
16
+ try:
17
+ print(f"Loading model: {self.model_id} on {self.device}...")
18
+
19
+ dtype = torch.float16 if self.device == "cuda" else torch.float32
20
+
21
+ # AutoPipeline handles the architecture detection automatically
22
+ self.pipeline = AutoPipelineForTextToImage.from_pretrained(
23
+ self.model_id,
24
+ torch_dtype=dtype,
25
+ use_safetensors=True
26
+ )
27
+
28
+ if self.device == "cuda":
29
+ self.pipeline.to("cuda")
30
+ # Optional: Enable CPU offload if VRAM is limited (e.g. < 8GB)
31
+ # self.pipeline.enable_model_cpu_offload()
32
+
33
+ print("Model loaded successfully.")
34
+
35
+ except Exception as e:
36
+ print(f"Error loading model: {e}")
37
+ # Fallback or re-raise depending on deployment needs
38
+ raise e
39
+
40
+ def infer(self, prompt, negative_prompt, width, height, num_inference_steps, guidance_scale, seed, progress_callback=None):
41
+ """
42
+ Runs inference on the loaded pipeline.
43
+ """
44
+ if self.pipeline is None:
45
+ self.load_model()
46
+
47
+ generator = torch.Generator(device=self.device).manual_seed(int(seed))
48
+
49
+ # Progress bar handling
50
+ def callback_dynamic(step, timestep, latents):
51
+ if progress_callback:
52
+ progress_callback((step, num_inference_steps))
53
+
54
+ # Depending on the specific diffusers version or pipeline type,
55
+ # callback usage might vary slightly, but this is standard for recent versions.
56
+ image = self.pipeline(
57
+ prompt=prompt,
58
+ negative_prompt=negative_prompt,
59
+ width=width,
60
+ height=height,
61
+ num_inference_steps=num_inference_steps,
62
+ guidance_scale=guidance_scale,
63
+ generator=generator,
64
+ # callback=callback_dynamic, # Optional: enable for granular progress updates
65
+ # callback_steps=1
66
+ ).images[0]
67
+
68
+ return image
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ torchvision
4
+ torchaudio
5
+ numpy
6
+ Pillow
7
+ requests
8
+ accelerate
9
+ git+https://github.com/huggingface/transformers
10
+ git+https://github.com/huggingface/diffusers
11
+ sentencepiece
12
+ tokenizers
13
+ datasets
14
+ scipy
15
+ joblib
16
+ opencv-python
17
+ matplotlib
18
+ pandas
19
+ openpyxl
20
+ PyPDF2
21
+ python-docx
utils.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import time
3
+
4
+ def get_random_seed():
5
+ """
6
+ Generates a random seed based on system time.
7
+ """
8
+ return random.randint(0, 2**32 - 1)