Spaces:
Running
on
Zero
Running
on
Zero
feat: Enhance video composition functionality with detailed docstring and improved command handling
Browse files
app.py
CHANGED
|
@@ -190,19 +190,29 @@ YOUR RESPONSE:
|
|
| 190 |
# Find content between ```sh or ```bash and the next ```
|
| 191 |
import re
|
| 192 |
|
| 193 |
-
command_match = re.search(
|
|
|
|
|
|
|
| 194 |
if command_match:
|
| 195 |
command = command_match.group(1).strip()
|
| 196 |
else:
|
| 197 |
# Try to find a line that starts with ffmpeg
|
| 198 |
-
ffmpeg_lines = [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
if ffmpeg_lines:
|
| 200 |
command = ffmpeg_lines[0]
|
| 201 |
else:
|
| 202 |
command = content.replace("\n", "")
|
| 203 |
else:
|
| 204 |
# Try to find a line that starts with ffmpeg
|
| 205 |
-
ffmpeg_lines = [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
if ffmpeg_lines:
|
| 207 |
command = ffmpeg_lines[0]
|
| 208 |
else:
|
|
@@ -220,13 +230,40 @@ YOUR RESPONSE:
|
|
| 220 |
def execute_ffmpeg_command(args, temp_dir, output_file_path):
|
| 221 |
"""Execute FFmpeg command with GPU acceleration"""
|
| 222 |
final_command = args + ["-y", output_file_path]
|
| 223 |
-
print(
|
| 224 |
-
f"\n=== EXECUTING FFMPEG COMMAND ===\nffmpeg {' '.join(final_command[1:])}\n"
|
| 225 |
-
)
|
| 226 |
subprocess.run(final_command, cwd=temp_dir)
|
| 227 |
return output_file_path
|
| 228 |
|
| 229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
def update(
|
| 231 |
files,
|
| 232 |
prompt,
|
|
@@ -291,7 +328,9 @@ def update(
|
|
| 291 |
execute_ffmpeg_command(args, temp_dir, output_file_path)
|
| 292 |
# Extract just the command for display
|
| 293 |
command_for_display = f"ffmpeg {' '.join(args[1:])} -y output.mp4"
|
| 294 |
-
generated_command =
|
|
|
|
|
|
|
| 295 |
return output_file_path, gr.update(value=generated_command)
|
| 296 |
except Exception as e:
|
| 297 |
attempts += 1
|
|
@@ -300,6 +339,26 @@ def update(
|
|
| 300 |
raise gr.Error(e)
|
| 301 |
|
| 302 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
with gr.Blocks() as demo:
|
| 304 |
gr.Markdown(
|
| 305 |
"""
|
|
@@ -358,13 +417,13 @@ with gr.Blocks() as demo:
|
|
| 358 |
examples=[
|
| 359 |
[
|
| 360 |
["./examples/ai_talk.wav", "./examples/bg-image.png"],
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
"Use the image as the background with a waveform visualization for the audio positioned in center of the video. Make sure the waveform has a max height of 250 pixels.",
|
| 369 |
0.7,
|
| 370 |
0.1,
|
|
@@ -380,13 +439,13 @@ with gr.Blocks() as demo:
|
|
| 380 |
"./examples/cat6.jpeg",
|
| 381 |
"./examples/heat-wave.mp3",
|
| 382 |
],
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
outputs=[generated_video, generated_command],
|
| 391 |
fn=update,
|
| 392 |
run_on_click=True,
|
|
@@ -402,5 +461,9 @@ with gr.Blocks() as demo:
|
|
| 402 |
""",
|
| 403 |
)
|
| 404 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 405 |
demo.queue(default_concurrency_limit=200)
|
| 406 |
-
demo.launch(show_api=False, ssr_mode=False)
|
|
|
|
| 190 |
# Find content between ```sh or ```bash and the next ```
|
| 191 |
import re
|
| 192 |
|
| 193 |
+
command_match = re.search(
|
| 194 |
+
r"```(?:sh|bash)?\n(.*?)\n```", content, re.DOTALL
|
| 195 |
+
)
|
| 196 |
if command_match:
|
| 197 |
command = command_match.group(1).strip()
|
| 198 |
else:
|
| 199 |
# Try to find a line that starts with ffmpeg
|
| 200 |
+
ffmpeg_lines = [
|
| 201 |
+
line.strip()
|
| 202 |
+
for line in content.split("\n")
|
| 203 |
+
if line.strip().startswith("ffmpeg")
|
| 204 |
+
]
|
| 205 |
if ffmpeg_lines:
|
| 206 |
command = ffmpeg_lines[0]
|
| 207 |
else:
|
| 208 |
command = content.replace("\n", "")
|
| 209 |
else:
|
| 210 |
# Try to find a line that starts with ffmpeg
|
| 211 |
+
ffmpeg_lines = [
|
| 212 |
+
line.strip()
|
| 213 |
+
for line in content.split("\n")
|
| 214 |
+
if line.strip().startswith("ffmpeg")
|
| 215 |
+
]
|
| 216 |
if ffmpeg_lines:
|
| 217 |
command = ffmpeg_lines[0]
|
| 218 |
else:
|
|
|
|
| 230 |
def execute_ffmpeg_command(args, temp_dir, output_file_path):
|
| 231 |
"""Execute FFmpeg command with GPU acceleration"""
|
| 232 |
final_command = args + ["-y", output_file_path]
|
| 233 |
+
print(f"\n=== EXECUTING FFMPEG COMMAND ===\nffmpeg {' '.join(final_command[1:])}\n")
|
|
|
|
|
|
|
| 234 |
subprocess.run(final_command, cwd=temp_dir)
|
| 235 |
return output_file_path
|
| 236 |
|
| 237 |
|
| 238 |
+
def compose_video(
|
| 239 |
+
prompt: str,
|
| 240 |
+
files: list = None,
|
| 241 |
+
top_p: float = 0.7,
|
| 242 |
+
temperature: float = 0.1,
|
| 243 |
+
model_choice: str = "deepseek-ai/DeepSeek-V3",
|
| 244 |
+
) -> str:
|
| 245 |
+
"""
|
| 246 |
+
Compose a video from media assets using natural language instructions.
|
| 247 |
+
|
| 248 |
+
This tool generates FFmpeg commands using AI and executes them to create videos
|
| 249 |
+
from uploaded images, videos, and audio files based on natural language descriptions.
|
| 250 |
+
|
| 251 |
+
Args:
|
| 252 |
+
prompt (str): Natural language instructions for video composition (e.g., "Create a slideshow with background music")
|
| 253 |
+
files (list, optional): List of media files (images, videos, audio) to use
|
| 254 |
+
top_p (float): Top-p sampling parameter for AI model (0.0-1.0, default: 0.7)
|
| 255 |
+
temperature (float): Temperature parameter for AI model creativity (0.0-5.0, default: 0.1)
|
| 256 |
+
model_choice (str): AI model to use for command generation (default: "deepseek-ai/DeepSeek-V3")
|
| 257 |
+
|
| 258 |
+
Returns:
|
| 259 |
+
str: Path to the generated video file
|
| 260 |
+
|
| 261 |
+
Example:
|
| 262 |
+
compose_video("Create a 10-second slideshow from the images with fade transitions", files=[img1, img2, img3])
|
| 263 |
+
"""
|
| 264 |
+
return update(files or [], prompt, top_p, temperature, model_choice)
|
| 265 |
+
|
| 266 |
+
|
| 267 |
def update(
|
| 268 |
files,
|
| 269 |
prompt,
|
|
|
|
| 328 |
execute_ffmpeg_command(args, temp_dir, output_file_path)
|
| 329 |
# Extract just the command for display
|
| 330 |
command_for_display = f"ffmpeg {' '.join(args[1:])} -y output.mp4"
|
| 331 |
+
generated_command = (
|
| 332 |
+
f"### Generated Command\n```bash\n{command_for_display}\n```"
|
| 333 |
+
)
|
| 334 |
return output_file_path, gr.update(value=generated_command)
|
| 335 |
except Exception as e:
|
| 336 |
attempts += 1
|
|
|
|
| 339 |
raise gr.Error(e)
|
| 340 |
|
| 341 |
|
| 342 |
+
# Create MCP-compatible interface
|
| 343 |
+
mcp_interface = gr.Interface(
|
| 344 |
+
fn=compose_video,
|
| 345 |
+
inputs=[
|
| 346 |
+
gr.Textbox(
|
| 347 |
+
value="Create a slideshow with background music",
|
| 348 |
+
label="Video Composition Instructions",
|
| 349 |
+
),
|
| 350 |
+
gr.File(file_count="multiple", label="Media Files", file_types=allowed_medias),
|
| 351 |
+
gr.Slider(0.0, 1.0, value=0.7, label="Top-p"),
|
| 352 |
+
gr.Slider(0.0, 5.0, value=0.1, label="Temperature"),
|
| 353 |
+
gr.Radio(
|
| 354 |
+
choices=list(MODELS.keys()), value=list(MODELS.keys())[0], label="Model"
|
| 355 |
+
),
|
| 356 |
+
],
|
| 357 |
+
outputs=gr.Video(label="Generated Video"),
|
| 358 |
+
title="AI Video Composer MCP Tool",
|
| 359 |
+
description="Compose videos from media assets using natural language",
|
| 360 |
+
)
|
| 361 |
+
|
| 362 |
with gr.Blocks() as demo:
|
| 363 |
gr.Markdown(
|
| 364 |
"""
|
|
|
|
| 417 |
examples=[
|
| 418 |
[
|
| 419 |
["./examples/ai_talk.wav", "./examples/bg-image.png"],
|
| 420 |
+
"Use the image as the background with a waveform visualization for the audio positioned in center of the video.",
|
| 421 |
+
0.7,
|
| 422 |
+
0.1,
|
| 423 |
+
list(MODELS.keys())[0],
|
| 424 |
+
],
|
| 425 |
+
[
|
| 426 |
+
["./examples/ai_talk.wav", "./examples/bg-image.png"],
|
| 427 |
"Use the image as the background with a waveform visualization for the audio positioned in center of the video. Make sure the waveform has a max height of 250 pixels.",
|
| 428 |
0.7,
|
| 429 |
0.1,
|
|
|
|
| 439 |
"./examples/cat6.jpeg",
|
| 440 |
"./examples/heat-wave.mp3",
|
| 441 |
],
|
| 442 |
+
"Create a 3x2 grid of the cat images with the audio as background music. Make the video duration match the audio duration.",
|
| 443 |
+
0.7,
|
| 444 |
+
0.1,
|
| 445 |
+
list(MODELS.keys())[0],
|
| 446 |
+
],
|
| 447 |
+
],
|
| 448 |
+
inputs=[user_files, user_prompt, top_p, temperature, model_choice],
|
| 449 |
outputs=[generated_video, generated_command],
|
| 450 |
fn=update,
|
| 451 |
run_on_click=True,
|
|
|
|
| 461 |
""",
|
| 462 |
)
|
| 463 |
|
| 464 |
+
# Launch MCP interface for tool access
|
| 465 |
+
mcp_interface.queue(default_concurrency_limit=200)
|
| 466 |
+
|
| 467 |
+
# Launch main demo
|
| 468 |
demo.queue(default_concurrency_limit=200)
|
| 469 |
+
demo.launch(show_api=False, ssr_mode=False, mcp_server=True)
|