jake commited on
Commit
82bd956
ยท
1 Parent(s): 4814b61
MMaDA/inference/gradio_multimodal_demo_inst.py CHANGED
@@ -467,6 +467,33 @@ def _load_t2i_examples():
467
  return [[line] for line in lines]
468
 
469
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
470
  def _load_media_examples(subdir: str, suffixes):
471
  target_dir = DEMO_ROOT / subdir
472
  if not target_dir.exists():
@@ -481,6 +508,7 @@ def _load_media_examples(subdir: str, suffixes):
481
  T2S_EXAMPLES = _load_t2s_examples()
482
  CHAT_EXAMPLES = _load_chat_examples()
483
  T2I_EXAMPLES = _load_t2i_examples()
 
484
  S2T_EXAMPLES = _load_media_examples("s2t", {".wav", ".mp3", ".flac", ".ogg"})
485
  V2T_EXAMPLES = _load_media_examples("v2t", {".mp4", ".mov", ".avi", ".webm"})
486
  S2S_EXAMPLES = _load_media_examples("s2s", {".wav", ".mp3", ".flac", ".ogg"})
 
467
  return [[line] for line in lines]
468
 
469
 
470
+ def _load_i2i_examples():
471
+ d = ASSET_ROOT / "i2i"
472
+ if not d.exists():
473
+ return []
474
+
475
+ # ์ด๋ฏธ์ง€ ํŒŒ์ผ๋“ค (image1.jpeg, image2.png, ...)
476
+ image_files = sorted(
477
+ [p for p in d.iterdir() if p.suffix.lower() in {".png", ".jpg", ".jpeg", ".webp"}]
478
+ )
479
+ # ํ…์ŠคํŠธ ํŒŒ์ผ๋“ค (text1.txt, text2.txt, ...)
480
+ text_files = sorted(
481
+ [p for p in d.iterdir() if p.suffix.lower() == ".txt"]
482
+ )
483
+
484
+ n = min(len(image_files), len(text_files))
485
+ examples = []
486
+ for i in range(n):
487
+ img_path = image_files[i]
488
+ txt_path = text_files[i]
489
+ instruction = txt_path.read_text(encoding="utf-8").strip()
490
+ if not instruction:
491
+ continue
492
+ # Gradio Examples ํ˜•์‹: [image, instruction_text]
493
+ examples.append([str(img_path), instruction])
494
+ return examples
495
+
496
+
497
  def _load_media_examples(subdir: str, suffixes):
498
  target_dir = DEMO_ROOT / subdir
499
  if not target_dir.exists():
 
508
  T2S_EXAMPLES = _load_t2s_examples()
509
  CHAT_EXAMPLES = _load_chat_examples()
510
  T2I_EXAMPLES = _load_t2i_examples()
511
+ I2I_EXAMPLES = _load_i2i_examples()
512
  S2T_EXAMPLES = _load_media_examples("s2t", {".wav", ".mp3", ".flac", ".ogg"})
513
  V2T_EXAMPLES = _load_media_examples("v2t", {".mp4", ".mov", ".avi", ".webm"})
514
  S2S_EXAMPLES = _load_media_examples("s2s", {".wav", ".mp3", ".flac", ".ogg"})
app.py CHANGED
@@ -618,13 +618,21 @@ with gr.Blocks(
618
  i2i_timesteps = gr.Slider(4, 128, value=18, step=2, label="Timesteps")
619
  i2i_temperature = gr.Slider(0.0, 2.0, value=1.0, step=0.05, label="Sampling temperature")
620
  i2i_guidance = gr.Slider(0.0, 8.0, value=3.5, step=0.1, label="CFG scale")
 
 
 
 
 
 
 
 
621
  i2i_btn = gr.Button("Apply edit", variant="primary")
622
  i2i_btn.click(
623
  i2i_handler,
624
  inputs=[i2i_instr, i2i_image_in, i2i_timesteps, i2i_temperature, i2i_guidance],
625
  outputs=[i2i_image_out, i2i_status],
626
  )
627
-
628
  # ---- I2S ----
629
  with gr.Tab("Image โ†’ Speech (I2S)"):
630
  i2s_image_in = gr.Image(type="pil", label="Image input", sources=["upload"])
 
618
  i2i_timesteps = gr.Slider(4, 128, value=18, step=2, label="Timesteps")
619
  i2i_temperature = gr.Slider(0.0, 2.0, value=1.0, step=0.05, label="Sampling temperature")
620
  i2i_guidance = gr.Slider(0.0, 8.0, value=3.5, step=0.1, label="CFG scale")
621
+
622
+ if I2I_EXAMPLES:
623
+ with gr.Accordion("Sample edits", open=False):
624
+ gr.Examples(
625
+ examples=I2I_EXAMPLES,
626
+ inputs=[i2i_image_in, i2i_instr],
627
+ examples_per_page=4,
628
+ )
629
  i2i_btn = gr.Button("Apply edit", variant="primary")
630
  i2i_btn.click(
631
  i2i_handler,
632
  inputs=[i2i_instr, i2i_image_in, i2i_timesteps, i2i_temperature, i2i_guidance],
633
  outputs=[i2i_image_out, i2i_status],
634
  )
635
+
636
  # ---- I2S ----
637
  with gr.Tab("Image โ†’ Speech (I2S)"):
638
  i2s_image_in = gr.Image(type="pil", label="Image input", sources=["upload"])