dream2589632147 commited on
Commit
2c61901
·
verified ·
1 Parent(s): 8aa34e2

Upload 2 files

Browse files
Files changed (2) hide show
  1. requirements.py +632 -0
  2. requirements.txt +13 -0
requirements.py ADDED
@@ -0,0 +1,632 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import random
4
+ import torch
5
+ import spaces
6
+
7
+ from PIL import Image
8
+ from diffusers import FlowMatchEulerDiscreteScheduler
9
+ from optimization import optimize_pipeline_
10
+ from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
11
+ from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
12
+ from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
13
+
14
+ import math
15
+ from huggingface_hub import hf_hub_download
16
+ from safetensors.torch import load_file
17
+
18
+ from PIL import Image
19
+ import os
20
+ import gradio as gr
21
+ from gradio_client import Client, handle_file
22
+ import tempfile
23
+ from typing import Optional, Tuple, Any
24
+
25
+
26
+ # --- Model Loading ---
27
+ dtype = torch.bfloat16
28
+ device = "cuda" if torch.cuda.is_available() else "cpu"
29
+
30
+ pipe = QwenImageEditPlusPipeline.from_pretrained(
31
+ "Qwen/Qwen-Image-Edit-2509",
32
+ transformer=QwenImageTransformer2DModel.from_pretrained(
33
+ "linoyts/Qwen-Image-Edit-Rapid-AIO",
34
+ subfolder='transformer',
35
+ torch_dtype=dtype,
36
+ device_map='cuda'
37
+ ),
38
+ torch_dtype=dtype
39
+ ).to(device)
40
+
41
+ pipe.load_lora_weights(
42
+ "dx8152/Qwen-Edit-2509-Multiple-angles",
43
+ weight_name="镜头转换.safetensors",
44
+ adapter_name="angles"
45
+ )
46
+
47
+
48
+ pipe.set_adapters(["angles"], adapter_weights=[1.])
49
+ pipe.fuse_lora(adapter_names=["angles"], lora_scale=1.25)
50
+ pipe.unload_lora_weights()
51
+
52
+ pipe.transformer.__class__ = QwenImageTransformer2DModel
53
+ pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
54
+
55
+ optimize_pipeline_(
56
+ pipe,
57
+ image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))],
58
+ prompt="prompt"
59
+ )
60
+
61
+ MAX_SEED = np.iinfo(np.int32).max
62
+
63
+
64
+ def _generate_video_segment(
65
+ input_image_path: str,
66
+ output_image_path: str,
67
+ prompt: str,
68
+ request: gr.Request
69
+ ) -> str:
70
+ """
71
+ Generate a single video segment between two frames by calling an external
72
+ Wan 2.2 image-to-video service hosted on Hugging Face Spaces.
73
+
74
+ This helper function is used internally when the user asks to create
75
+ a video between the input and output images.
76
+
77
+ Args:
78
+ input_image_path (str):
79
+ Path to the starting frame image on disk.
80
+ output_image_path (str):
81
+ Path to the ending frame image on disk.
82
+ prompt (str):
83
+ Text prompt describing the camera movement / transition.
84
+ request (gr.Request):
85
+ Gradio request object, used here to forward the `x-ip-token`
86
+ header to the downstream Space for authentication/rate limiting.
87
+
88
+ Returns:
89
+ str:
90
+ A string returned by the external service, usually a URL or path
91
+ to the generated video.
92
+ """
93
+ x_ip_token = request.headers['x-ip-token']
94
+ video_client = Client(
95
+ "multimodalart/wan-2-2-first-last-frame",
96
+ headers={"x-ip-token": x_ip_token}
97
+ )
98
+ result = video_client.predict(
99
+ start_image_pil=handle_file(input_image_path),
100
+ end_image_pil=handle_file(output_image_path),
101
+ prompt=prompt,
102
+ api_name="/generate_video",
103
+ )
104
+ return result[0]["video"]
105
+
106
+
107
+ def build_camera_prompt(
108
+ rotate_deg: float = 0.0,
109
+ move_forward: float = 0.0,
110
+ vertical_tilt: float = 0.0,
111
+ wideangle: bool = False
112
+ ) -> str:
113
+ """
114
+ Build a camera movement prompt based on the chosen controls.
115
+
116
+ This converts the provided control values into a prompt instruction with the corresponding trigger words for the multiple-angles LoRA.
117
+
118
+ Args:
119
+ rotate_deg (float, optional):
120
+ Horizontal rotation in degrees. Positive values rotate left,
121
+ negative values rotate right. Defaults to 0.0.
122
+ move_forward (float, optional):
123
+ Forward movement / zoom factor. Larger values imply moving the
124
+ camera closer or into a close-up. Defaults to 0.0.
125
+ vertical_tilt (float, optional):
126
+ Vertical angle of the camera:
127
+ - Negative ≈ bird's-eye view
128
+ - Positive ≈ worm's-eye view
129
+ Defaults to 0.0.
130
+ wideangle (bool, optional):
131
+ Whether to switch to a wide-angle lens style. Defaults to False.
132
+
133
+ Returns:
134
+ str:
135
+ A text prompt describing the camera motion. If no controls are
136
+ active, returns `"no camera movement"`.
137
+ """
138
+ prompt_parts = []
139
+
140
+ # Rotation
141
+ if rotate_deg != 0:
142
+ direction = "left" if rotate_deg > 0 else "right"
143
+ if direction == "left":
144
+ prompt_parts.append(
145
+ f"将镜头向左旋转{abs(rotate_deg)}度 Rotate the camera {abs(rotate_deg)} degrees to the left."
146
+ )
147
+ else:
148
+ prompt_parts.append(
149
+ f"将镜头向右旋转{abs(rotate_deg)}度 Rotate the camera {abs(rotate_deg)} degrees to the right."
150
+ )
151
+
152
+ # Move forward / close-up
153
+ if move_forward > 5:
154
+ prompt_parts.append("将镜头转为特写镜头 Turn the camera to a close-up.")
155
+ elif move_forward >= 1:
156
+ prompt_parts.append("将镜头向前移动 Move the camera forward.")
157
+
158
+ # Vertical tilt
159
+ if vertical_tilt <= -1:
160
+ prompt_parts.append("将相机转向鸟瞰视角 Turn the camera to a bird's-eye view.")
161
+ elif vertical_tilt >= 1:
162
+ prompt_parts.append("将相机切换到仰视视角 Turn the camera to a worm's-eye view.")
163
+
164
+ # Lens option
165
+ if wideangle:
166
+ prompt_parts.append(" 将镜头转为广角镜头 Turn the camera to a wide-angle lens.")
167
+
168
+ final_prompt = " ".join(prompt_parts).strip()
169
+ return final_prompt if final_prompt else "no camera movement"
170
+
171
+
172
+ @spaces.GPU
173
+ def infer_camera_edit(
174
+ image: Optional[Image.Image] = None,
175
+ rotate_deg: float = 0.0,
176
+ move_forward: float = 0.0,
177
+ vertical_tilt: float = 0.0,
178
+ wideangle: bool = False,
179
+ seed: int = 0,
180
+ randomize_seed: bool = True,
181
+ true_guidance_scale: float = 1.0,
182
+ num_inference_steps: int = 4,
183
+ height: Optional[int] = None,
184
+ width: Optional[int] = None,
185
+ prev_output: Optional[Image.Image] = None,
186
+ ) -> Tuple[Image.Image, int, str]:
187
+ """
188
+ Edit the camera angles/view of an image with Qwen Image Edit 2509 and dx8152's Qwen-Edit-2509-Multiple-angles LoRA.
189
+
190
+ Applies a camera-style transformation (rotation, zoom, tilt, lens)
191
+ to an input image.
192
+
193
+ Args:
194
+ image (PIL.Image.Image | None, optional):
195
+ Input image to edit. If `None`, the function will instead try to
196
+ use `prev_output`. At least one of `image` or `prev_output` must
197
+ be available. Defaults to None.
198
+ rotate_deg (float, optional):
199
+ Horizontal rotation in degrees (-90, -45, 0, 45, 90). Positive values rotate
200
+ to the left, negative to the right. Defaults to 0.0.
201
+ move_forward (float, optional):
202
+ Forward movement / zoom factor (0, 5, 10). Higher values move the
203
+ camera closer; values >5 switch to a close-up style. Defaults to 0.0.
204
+ vertical_tilt (float, optional):
205
+ Vertical tilt (-1 to 1). -1 ≈ bird's-eye view, +1 ≈ worm's-eye view.
206
+ Defaults to 0.0.
207
+ wideangle (bool, optional):
208
+ Whether to use a wide-angle lens style. Defaults to False.
209
+ seed (int, optional):
210
+ Random seed for the generation. Ignored if `randomize_seed=True`.
211
+ Defaults to 0.
212
+ randomize_seed (bool, optional):
213
+ If True, a random seed (0..MAX_SEED) is chosen per call.
214
+ Defaults to True.
215
+ true_guidance_scale (float, optional):
216
+ CFG / guidance scale controlling prompt adherence.
217
+ Defaults to 1.0 since the demo is using a distilled transformer for faster inference.
218
+ num_inference_steps (int, optional):
219
+ Number of inference steps. Defaults to 4.
220
+ height (int, optional):
221
+ Output image height. Must typically be a multiple of 8.
222
+ If set to 0, the model will infer a size. Defaults to 1024 if none is provided.
223
+ width (int, optional):
224
+ Output image width. Must typically be a multiple of 8.
225
+ If set to 0, the model will infer a size. Defaults to 1024 if none is provided.
226
+ prev_output (PIL.Image.Image | None, optional):
227
+ Previous output image to use as input when no new image is uploaded.
228
+ Defaults to None.
229
+
230
+ Returns:
231
+ Tuple[PIL.Image.Image, int, str]:
232
+ - The edited output image.
233
+ - The actual seed used for generation.
234
+ - The constructed camera prompt string.
235
+ """
236
+ progress = gr.Progress(track_tqdm=True)
237
+
238
+ prompt = build_camera_prompt(rotate_deg, move_forward, vertical_tilt, wideangle)
239
+ print(f"Generated Prompt: {prompt}")
240
+
241
+ if randomize_seed:
242
+ seed = random.randint(0, MAX_SEED)
243
+ generator = torch.Generator(device=device).manual_seed(seed)
244
+
245
+ # Choose input image (prefer uploaded, else last output)
246
+ pil_images = []
247
+ if image is not None:
248
+ if isinstance(image, Image.Image):
249
+ pil_images.append(image.convert("RGB"))
250
+ elif hasattr(image, "name"):
251
+ pil_images.append(Image.open(image.name).convert("RGB"))
252
+ elif prev_output:
253
+ pil_images.append(prev_output.convert("RGB"))
254
+
255
+ if len(pil_images) == 0:
256
+ raise gr.Error("Please upload an image first.")
257
+
258
+ if prompt == "no camera movement":
259
+ return image, seed, prompt
260
+
261
+ result = pipe(
262
+ image=pil_images,
263
+ prompt=prompt,
264
+ height=height if height != 0 else None,
265
+ width=width if width != 0 else None,
266
+ num_inference_steps=num_inference_steps,
267
+ generator=generator,
268
+ true_cfg_scale=true_guidance_scale,
269
+ num_images_per_prompt=1,
270
+ ).images[0]
271
+
272
+ return result, seed, prompt
273
+
274
+
275
+ def create_video_between_images(
276
+ input_image: Optional[Image.Image],
277
+ output_image: Optional[np.ndarray],
278
+ prompt: str,
279
+ request: gr.Request
280
+ ) -> str:
281
+ """
282
+ Create a short transition video between the input and output images via the
283
+ Wan 2.2 first-last-frame Space.
284
+
285
+ Args:
286
+ input_image (PIL.Image.Image | None):
287
+ Starting frame image (the original / previous view).
288
+ output_image (numpy.ndarray | None):
289
+ Ending frame image - the output image with the the edited camera angles.
290
+ prompt (str):
291
+ The camera movement prompt used to describe the transition.
292
+ request (gr.Request):
293
+ Gradio request object, used to forward the `x-ip-token` header
294
+ to the video generation app.
295
+
296
+ Returns:
297
+ str:
298
+ a path pointing to the generated video.
299
+
300
+ Raises:
301
+ gr.Error:
302
+ If either image is missing or if the video generation fails.
303
+ """
304
+ if input_image is None or output_image is None:
305
+ raise gr.Error("Both input and output images are required to create a video.")
306
+
307
+ try:
308
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
309
+ input_image.save(tmp.name)
310
+ input_image_path = tmp.name
311
+
312
+ output_pil = Image.fromarray(output_image.astype('uint8'))
313
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
314
+ output_pil.save(tmp.name)
315
+ output_image_path = tmp.name
316
+
317
+ video_path = _generate_video_segment(
318
+ input_image_path,
319
+ output_image_path,
320
+ prompt if prompt else "Camera movement transformation",
321
+ request
322
+ )
323
+ return video_path
324
+ except Exception as e:
325
+ raise gr.Error(f"Video generation failed: {e}")
326
+
327
+
328
+ # --- UI ---
329
+ css = '''#col-container { max-width: 800px; margin: 0 auto; }
330
+ .dark .progress-text{color: white !important}
331
+ #examples{max-width: 800px; margin: 0 auto; }'''
332
+
333
+
334
+ def reset_all() -> list:
335
+ """
336
+ Reset all camera control knobs and flags to their default values.
337
+
338
+ This is used by the "Reset" button to set:
339
+ - rotate_deg = 0
340
+ - move_forward = 0
341
+ - vertical_tilt = 0
342
+ - wideangle = False
343
+ - is_reset = True
344
+
345
+ Returns:
346
+ list:
347
+ A list of values matching the order of the reset outputs:
348
+ [rotate_deg, move_forward, vertical_tilt, wideangle, is_reset, True]
349
+ """
350
+ return [0, 0, 0, 0, False, True]
351
+
352
+
353
+ def end_reset() -> bool:
354
+ """
355
+ Mark the end of a reset cycle.
356
+
357
+ This helper is chained after `reset_all` to set the internal
358
+ `is_reset` flag back to False, so that live inference can resume.
359
+
360
+ Returns:
361
+ bool:
362
+ Always returns False.
363
+ """
364
+ return False
365
+
366
+
367
+ def update_dimensions_on_upload(
368
+ image: Optional[Image.Image]
369
+ ) -> Tuple[int, int]:
370
+ """
371
+ Compute recommended (width, height) for the output resolution when an
372
+ image is uploaded while preserveing the aspect ratio.
373
+
374
+ Args:
375
+ image (PIL.Image.Image | None):
376
+ The uploaded image. If `None`, defaults to (1024, 1024).
377
+
378
+ Returns:
379
+ Tuple[int, int]:
380
+ The new (width, height).
381
+ """
382
+ if image is None:
383
+ return 1024, 1024
384
+
385
+ original_width, original_height = image.size
386
+
387
+ if original_width > original_height:
388
+ new_width = 1024
389
+ aspect_ratio = original_height / original_width
390
+ new_height = int(new_width * aspect_ratio)
391
+ else:
392
+ new_height = 1024
393
+ aspect_ratio = original_width / original_height
394
+ new_width = int(new_height * aspect_ratio)
395
+
396
+ # Ensure dimensions are multiples of 8
397
+ new_width = (new_width // 8) * 8
398
+ new_height = (new_height // 8) * 8
399
+
400
+ return new_width, new_height
401
+
402
+
403
+ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
404
+ with gr.Column(elem_id="col-container"):
405
+ gr.Markdown("## 🎬 Qwen Image Edit — Camera Angle Control")
406
+ gr.Markdown("""
407
+ Qwen Image Edit 2509 for Camera Control ✨
408
+ Using [dx8152's Qwen-Edit-2509-Multiple-angles LoRA](https://huggingface.co/dx8152/Qwen-Edit-2509-Multiple-angles) and [Phr00t/Qwen-Image-Edit-Rapid-AIO](https://huggingface.co/Phr00t/Qwen-Image-Edit-Rapid-AIO/tree/main) for 4-step inference 💨
409
+ """
410
+ )
411
+
412
+ with gr.Row():
413
+ with gr.Column():
414
+ image = gr.Image(label="Input Image", type="pil")
415
+ prev_output = gr.Image(value=None, visible=False)
416
+ is_reset = gr.Checkbox(value=False, visible=False)
417
+
418
+ with gr.Tab("Camera Controls"):
419
+ rotate_deg = gr.Slider(
420
+ label="Rotate Right-Left (degrees °)",
421
+ minimum=-90,
422
+ maximum=90,
423
+ step=45,
424
+ value=0
425
+ )
426
+ move_forward = gr.Slider(
427
+ label="Move Forward → Close-Up",
428
+ minimum=0,
429
+ maximum=10,
430
+ step=5,
431
+ value=0
432
+ )
433
+ vertical_tilt = gr.Slider(
434
+ label="Vertical Angle (Bird ↔ Worm)",
435
+ minimum=-1,
436
+ maximum=1,
437
+ step=1,
438
+ value=0
439
+ )
440
+ wideangle = gr.Checkbox(label="Wide-Angle Lens", value=False)
441
+ with gr.Row():
442
+ reset_btn = gr.Button("Reset")
443
+ run_btn = gr.Button("Generate", variant="primary")
444
+
445
+ with gr.Accordion("Advanced Settings", open=False):
446
+ seed = gr.Slider(
447
+ label="Seed",
448
+ minimum=0,
449
+ maximum=MAX_SEED,
450
+ step=1,
451
+ value=0
452
+ )
453
+ randomize_seed = gr.Checkbox(
454
+ label="Randomize Seed",
455
+ value=True
456
+ )
457
+ true_guidance_scale = gr.Slider(
458
+ label="True Guidance Scale",
459
+ minimum=1.0,
460
+ maximum=10.0,
461
+ step=0.1,
462
+ value=1.0
463
+ )
464
+ num_inference_steps = gr.Slider(
465
+ label="Inference Steps",
466
+ minimum=1,
467
+ maximum=40,
468
+ step=1,
469
+ value=4
470
+ )
471
+ height = gr.Slider(
472
+ label="Height",
473
+ minimum=256,
474
+ maximum=2048,
475
+ step=8,
476
+ value=1024
477
+ )
478
+ width = gr.Slider(
479
+ label="Width",
480
+ minimum=256,
481
+ maximum=2048,
482
+ step=8,
483
+ value=1024
484
+ )
485
+
486
+ with gr.Column():
487
+ result = gr.Image(label="Output Image", interactive=False)
488
+ prompt_preview = gr.Textbox(label="Processed Prompt", interactive=False)
489
+ create_video_button = gr.Button(
490
+ "🎥 Create Video Between Images",
491
+ variant="secondary",
492
+ visible=False
493
+ )
494
+ with gr.Group(visible=False) as video_group:
495
+ video_output = gr.Video(
496
+ label="Generated Video",
497
+ show_download_button=True,
498
+ autoplay=True
499
+ )
500
+
501
+ inputs = [
502
+ image, rotate_deg, move_forward,
503
+ vertical_tilt, wideangle,
504
+ seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width, prev_output
505
+ ]
506
+ outputs = [result, seed, prompt_preview]
507
+
508
+ # Reset behavior
509
+ reset_btn.click(
510
+ fn=reset_all,
511
+ inputs=None,
512
+ outputs=[rotate_deg, move_forward, vertical_tilt, wideangle, is_reset],
513
+ queue=False
514
+ ).then(fn=end_reset, inputs=None, outputs=[is_reset], queue=False)
515
+
516
+ # Manual generation with video button visibility control
517
+ def infer_and_show_video_button(*args: Any):
518
+ """
519
+ Wrapper around `infer_camera_edit` that also controls the visibility
520
+ of the 'Create Video Between Images' button.
521
+
522
+ The first argument in `args` is expected to be the input image; if both
523
+ input and output images are present, the video button is shown.
524
+
525
+ Args:
526
+ *args:
527
+ Positional arguments forwarded directly to `infer_camera_edit`.
528
+
529
+ Returns:
530
+ tuple:
531
+ (output_image, seed, prompt, video_button_visibility_update)
532
+ """
533
+ result_img, result_seed, result_prompt = infer_camera_edit(*args)
534
+ # Show video button if we have both input and output images
535
+ show_button = args[0] is not None and result_img is not None
536
+ return result_img, result_seed, result_prompt, gr.update(visible=show_button)
537
+
538
+ run_event = run_btn.click(
539
+ fn=infer_and_show_video_button,
540
+ inputs=inputs,
541
+ outputs=outputs + [create_video_button]
542
+ )
543
+
544
+ # Video creation
545
+ create_video_button.click(
546
+ fn=lambda: gr.update(visible=True),
547
+ outputs=[video_group],
548
+ api_name=False
549
+ ).then(
550
+ fn=create_video_between_images,
551
+ inputs=[image, result, prompt_preview],
552
+ outputs=[video_output],
553
+ api_name=False
554
+ )
555
+
556
+ # Examples
557
+ gr.Examples(
558
+ examples=[
559
+ ["tool_of_the_sea.png", 90, 0, 0, False, 0, True, 1.0, 4, 568, 1024],
560
+ ["monkey.jpg", -90, 0, 0, False, 0, True, 1.0, 4, 704, 1024],
561
+ ["metropolis.jpg", 0, 0, -1, False, 0, True, 1.0, 4, 816, 1024],
562
+ ["disaster_girl.jpg", -45, 0, 1, False, 0, True, 1.0, 4, 768, 1024],
563
+ ["grumpy.png", 90, 0, 1, False, 0, True, 1.0, 4, 576, 1024]
564
+ ],
565
+ inputs=[
566
+ image, rotate_deg, move_forward,
567
+ vertical_tilt, wideangle,
568
+ seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width
569
+ ],
570
+ outputs=outputs,
571
+ fn=infer_camera_edit,
572
+ cache_examples="lazy",
573
+ elem_id="examples"
574
+ )
575
+
576
+ # Image upload triggers dimension update and control reset
577
+ image.upload(
578
+ fn=update_dimensions_on_upload,
579
+ inputs=[image],
580
+ outputs=[width, height]
581
+ ).then(
582
+ fn=reset_all,
583
+ inputs=None,
584
+ outputs=[rotate_deg, move_forward, vertical_tilt, wideangle, is_reset],
585
+ queue=False
586
+ ).then(
587
+ fn=end_reset,
588
+ inputs=None,
589
+ outputs=[is_reset],
590
+ queue=False
591
+ )
592
+
593
+ # Live updates
594
+ def maybe_infer(
595
+ is_reset: bool,
596
+ progress: gr.Progress = gr.Progress(track_tqdm=True),
597
+ *args: Any
598
+ ):
599
+ if is_reset:
600
+ return gr.update(), gr.update(), gr.update(), gr.update()
601
+ else:
602
+ result_img, result_seed, result_prompt = infer_camera_edit(*args)
603
+ # Show video button if we have both input and output
604
+ show_button = args[0] is not None and result_img is not None
605
+ return result_img, result_seed, result_prompt, gr.update(visible=show_button)
606
+
607
+ control_inputs = [
608
+ image, rotate_deg, move_forward,
609
+ vertical_tilt, wideangle,
610
+ seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width, prev_output
611
+ ]
612
+ control_inputs_with_flag = [is_reset] + control_inputs
613
+
614
+ for control in [rotate_deg, move_forward, vertical_tilt]:
615
+ control.release(
616
+ fn=maybe_infer,
617
+ inputs=control_inputs_with_flag,
618
+ outputs=outputs + [create_video_button]
619
+ )
620
+
621
+ wideangle.input(
622
+ fn=maybe_infer,
623
+ inputs=control_inputs_with_flag,
624
+ outputs=outputs + [create_video_button]
625
+ )
626
+
627
+ run_event.then(lambda img, *_: img, inputs=[result], outputs=[prev_output])
628
+
629
+ gr.api(infer_camera_edit, api_name="infer_edit_camera_angles")
630
+ gr.api(create_video_between_images, api_name="create_video_between_images")
631
+
632
+ demo.launch(mcp_server=True, show_api=True)
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/huggingface/diffusers.git
2
+
3
+
4
+
5
+ transformers
6
+ accelerate
7
+ safetensors
8
+ sentencepiece
9
+ dashscope
10
+ kernels
11
+ torchvision
12
+ peft
13
+ torchao==0.11.0