Spaces:

jaeikkim
/

AIDAS-Omni-Modal-Diffusion

Running on Zero

jake commited on Nov 20, 2025

Commit

82bd956

1 Parent(s): 4814b61

TF

Files changed (2) hide show

MMaDA/inference/gradio_multimodal_demo_inst.py CHANGED Viewed

@@ -467,6 +467,33 @@ def _load_t2i_examples():
     return [[line] for line in lines]
 def _load_media_examples(subdir: str, suffixes):
     target_dir = DEMO_ROOT / subdir
     if not target_dir.exists():
@@ -481,6 +508,7 @@ def _load_media_examples(subdir: str, suffixes):
 T2S_EXAMPLES = _load_t2s_examples()
 CHAT_EXAMPLES = _load_chat_examples()
 T2I_EXAMPLES = _load_t2i_examples()
 S2T_EXAMPLES = _load_media_examples("s2t", {".wav", ".mp3", ".flac", ".ogg"})
 V2T_EXAMPLES = _load_media_examples("v2t", {".mp4", ".mov", ".avi", ".webm"})
 S2S_EXAMPLES = _load_media_examples("s2s", {".wav", ".mp3", ".flac", ".ogg"})

     return [[line] for line in lines]
+def _load_i2i_examples():
+    d = ASSET_ROOT / "i2i"
+    if not d.exists():
+        return []
+    # 이미지 파일들 (image1.jpeg, image2.png, ...)
+    image_files = sorted(
+        [p for p in d.iterdir() if p.suffix.lower() in {".png", ".jpg", ".jpeg", ".webp"}]
+    )
+    # 텍스트 파일들 (text1.txt, text2.txt, ...)
+    text_files = sorted(
+        [p for p in d.iterdir() if p.suffix.lower() == ".txt"]
+    )
+    n = min(len(image_files), len(text_files))
+    examples = []
+    for i in range(n):
+        img_path = image_files[i]
+        txt_path = text_files[i]
+        instruction = txt_path.read_text(encoding="utf-8").strip()
+        if not instruction:
+            continue
+        # Gradio Examples 형식: [image, instruction_text]
+        examples.append([str(img_path), instruction])
+    return examples
 def _load_media_examples(subdir: str, suffixes):
     target_dir = DEMO_ROOT / subdir
     if not target_dir.exists():
 T2S_EXAMPLES = _load_t2s_examples()
 CHAT_EXAMPLES = _load_chat_examples()
 T2I_EXAMPLES = _load_t2i_examples()
+I2I_EXAMPLES = _load_i2i_examples()
 S2T_EXAMPLES = _load_media_examples("s2t", {".wav", ".mp3", ".flac", ".ogg"})
 V2T_EXAMPLES = _load_media_examples("v2t", {".mp4", ".mov", ".avi", ".webm"})
 S2S_EXAMPLES = _load_media_examples("s2s", {".wav", ".mp3", ".flac", ".ogg"})

app.py CHANGED Viewed

@@ -618,13 +618,21 @@ with gr.Blocks(
             i2i_timesteps = gr.Slider(4, 128, value=18, step=2, label="Timesteps")
             i2i_temperature = gr.Slider(0.0, 2.0, value=1.0, step=0.05, label="Sampling temperature")
             i2i_guidance = gr.Slider(0.0, 8.0, value=3.5, step=0.1, label="CFG scale")
         i2i_btn = gr.Button("Apply edit", variant="primary")
         i2i_btn.click(
             i2i_handler,
             inputs=[i2i_instr, i2i_image_in, i2i_timesteps, i2i_temperature, i2i_guidance],
             outputs=[i2i_image_out, i2i_status],
         )
     # ---- I2S ----
     with gr.Tab("Image → Speech (I2S)"):
         i2s_image_in = gr.Image(type="pil", label="Image input", sources=["upload"])

             i2i_timesteps = gr.Slider(4, 128, value=18, step=2, label="Timesteps")
             i2i_temperature = gr.Slider(0.0, 2.0, value=1.0, step=0.05, label="Sampling temperature")
             i2i_guidance = gr.Slider(0.0, 8.0, value=3.5, step=0.1, label="CFG scale")
+        if I2I_EXAMPLES:
+            with gr.Accordion("Sample edits", open=False):
+                gr.Examples(
+                    examples=I2I_EXAMPLES,
+                    inputs=[i2i_image_in, i2i_instr],
+                    examples_per_page=4,
+                )
         i2i_btn = gr.Button("Apply edit", variant="primary")
         i2i_btn.click(
             i2i_handler,
             inputs=[i2i_instr, i2i_image_in, i2i_timesteps, i2i_temperature, i2i_guidance],
             outputs=[i2i_image_out, i2i_status],
         )
     # ---- I2S ----
     with gr.Tab("Image → Speech (I2S)"):
         i2s_image_in = gr.Image(type="pil", label="Image input", sources=["upload"])