Spaces:
Running
on
Zero
Running
on
Zero
jake
commited on
Commit
ยท
82bd956
1
Parent(s):
4814b61
TF
Browse files- MMaDA/inference/gradio_multimodal_demo_inst.py +28 -0
- app.py +9 -1
MMaDA/inference/gradio_multimodal_demo_inst.py
CHANGED
|
@@ -467,6 +467,33 @@ def _load_t2i_examples():
|
|
| 467 |
return [[line] for line in lines]
|
| 468 |
|
| 469 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
def _load_media_examples(subdir: str, suffixes):
|
| 471 |
target_dir = DEMO_ROOT / subdir
|
| 472 |
if not target_dir.exists():
|
|
@@ -481,6 +508,7 @@ def _load_media_examples(subdir: str, suffixes):
|
|
| 481 |
T2S_EXAMPLES = _load_t2s_examples()
|
| 482 |
CHAT_EXAMPLES = _load_chat_examples()
|
| 483 |
T2I_EXAMPLES = _load_t2i_examples()
|
|
|
|
| 484 |
S2T_EXAMPLES = _load_media_examples("s2t", {".wav", ".mp3", ".flac", ".ogg"})
|
| 485 |
V2T_EXAMPLES = _load_media_examples("v2t", {".mp4", ".mov", ".avi", ".webm"})
|
| 486 |
S2S_EXAMPLES = _load_media_examples("s2s", {".wav", ".mp3", ".flac", ".ogg"})
|
|
|
|
| 467 |
return [[line] for line in lines]
|
| 468 |
|
| 469 |
|
| 470 |
+
def _load_i2i_examples():
|
| 471 |
+
d = ASSET_ROOT / "i2i"
|
| 472 |
+
if not d.exists():
|
| 473 |
+
return []
|
| 474 |
+
|
| 475 |
+
# ์ด๋ฏธ์ง ํ์ผ๋ค (image1.jpeg, image2.png, ...)
|
| 476 |
+
image_files = sorted(
|
| 477 |
+
[p for p in d.iterdir() if p.suffix.lower() in {".png", ".jpg", ".jpeg", ".webp"}]
|
| 478 |
+
)
|
| 479 |
+
# ํ
์คํธ ํ์ผ๋ค (text1.txt, text2.txt, ...)
|
| 480 |
+
text_files = sorted(
|
| 481 |
+
[p for p in d.iterdir() if p.suffix.lower() == ".txt"]
|
| 482 |
+
)
|
| 483 |
+
|
| 484 |
+
n = min(len(image_files), len(text_files))
|
| 485 |
+
examples = []
|
| 486 |
+
for i in range(n):
|
| 487 |
+
img_path = image_files[i]
|
| 488 |
+
txt_path = text_files[i]
|
| 489 |
+
instruction = txt_path.read_text(encoding="utf-8").strip()
|
| 490 |
+
if not instruction:
|
| 491 |
+
continue
|
| 492 |
+
# Gradio Examples ํ์: [image, instruction_text]
|
| 493 |
+
examples.append([str(img_path), instruction])
|
| 494 |
+
return examples
|
| 495 |
+
|
| 496 |
+
|
| 497 |
def _load_media_examples(subdir: str, suffixes):
|
| 498 |
target_dir = DEMO_ROOT / subdir
|
| 499 |
if not target_dir.exists():
|
|
|
|
| 508 |
T2S_EXAMPLES = _load_t2s_examples()
|
| 509 |
CHAT_EXAMPLES = _load_chat_examples()
|
| 510 |
T2I_EXAMPLES = _load_t2i_examples()
|
| 511 |
+
I2I_EXAMPLES = _load_i2i_examples()
|
| 512 |
S2T_EXAMPLES = _load_media_examples("s2t", {".wav", ".mp3", ".flac", ".ogg"})
|
| 513 |
V2T_EXAMPLES = _load_media_examples("v2t", {".mp4", ".mov", ".avi", ".webm"})
|
| 514 |
S2S_EXAMPLES = _load_media_examples("s2s", {".wav", ".mp3", ".flac", ".ogg"})
|
app.py
CHANGED
|
@@ -618,13 +618,21 @@ with gr.Blocks(
|
|
| 618 |
i2i_timesteps = gr.Slider(4, 128, value=18, step=2, label="Timesteps")
|
| 619 |
i2i_temperature = gr.Slider(0.0, 2.0, value=1.0, step=0.05, label="Sampling temperature")
|
| 620 |
i2i_guidance = gr.Slider(0.0, 8.0, value=3.5, step=0.1, label="CFG scale")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 621 |
i2i_btn = gr.Button("Apply edit", variant="primary")
|
| 622 |
i2i_btn.click(
|
| 623 |
i2i_handler,
|
| 624 |
inputs=[i2i_instr, i2i_image_in, i2i_timesteps, i2i_temperature, i2i_guidance],
|
| 625 |
outputs=[i2i_image_out, i2i_status],
|
| 626 |
)
|
| 627 |
-
|
| 628 |
# ---- I2S ----
|
| 629 |
with gr.Tab("Image โ Speech (I2S)"):
|
| 630 |
i2s_image_in = gr.Image(type="pil", label="Image input", sources=["upload"])
|
|
|
|
| 618 |
i2i_timesteps = gr.Slider(4, 128, value=18, step=2, label="Timesteps")
|
| 619 |
i2i_temperature = gr.Slider(0.0, 2.0, value=1.0, step=0.05, label="Sampling temperature")
|
| 620 |
i2i_guidance = gr.Slider(0.0, 8.0, value=3.5, step=0.1, label="CFG scale")
|
| 621 |
+
|
| 622 |
+
if I2I_EXAMPLES:
|
| 623 |
+
with gr.Accordion("Sample edits", open=False):
|
| 624 |
+
gr.Examples(
|
| 625 |
+
examples=I2I_EXAMPLES,
|
| 626 |
+
inputs=[i2i_image_in, i2i_instr],
|
| 627 |
+
examples_per_page=4,
|
| 628 |
+
)
|
| 629 |
i2i_btn = gr.Button("Apply edit", variant="primary")
|
| 630 |
i2i_btn.click(
|
| 631 |
i2i_handler,
|
| 632 |
inputs=[i2i_instr, i2i_image_in, i2i_timesteps, i2i_temperature, i2i_guidance],
|
| 633 |
outputs=[i2i_image_out, i2i_status],
|
| 634 |
)
|
| 635 |
+
|
| 636 |
# ---- I2S ----
|
| 637 |
with gr.Tab("Image โ Speech (I2S)"):
|
| 638 |
i2s_image_in = gr.Image(type="pil", label="Image input", sources=["upload"])
|