jam-f5

Running on Zero

App Files Files Community

jam-f5 / app.py

zameer2662

Update app.py

ef83232 verified 4 months ago

raw

history blame contribute delete

3.4 kB

	import spaces
	import gradio as gr
	from f5_tts.infer.utils_infer import remove_silence_for_generated_wav
	from f5_tts.api import F5TTS
	import tempfile
	import os

	# Initialize F5TTS inside a GPU-decorated function to avoid CUDA init in main process
	@spaces.GPU
	def initialize_f5tts():
	return F5TTS()

	# Global variable to hold the model (will be initialized lazily)
	f5tts = None

	@spaces.GPU
	def run_tts(ref_audio, ref_text, gen_text, remove_silence=False):
	global f5tts
	if f5tts is None:
	f5tts = F5TTS()

	output_wav_path = tempfile.mktemp(suffix=".wav")

	wav, sr, _ = f5tts.infer(
	ref_file=ref_audio,
	ref_text=ref_text,
	gen_text=gen_text,
	file_wave=output_wav_path,
	remove_silence=remove_silence,
	)

	return output_wav_path

	# Create the main interface
	with gr.Blocks(
	title="🗣️ F5-TTS \| Integrated By Muhammad Zameer ul Hassan",
	theme=gr.themes.Default(),
	css="""
	footer {visibility: hidden}
	.gradio-container .prose {display: none !important}

	/* Custom Footer */
	.custom-footer {
	position: fixed;
	bottom: 0;
	left: 0;
	right: 0;
	background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
	color: white;
	text-align: center;
	padding: 10px 0;
	font-size: 14px;
	z-index: 1000;
	box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
	}

	.custom-footer a {
	color: #ffffff;
	text-decoration: none;
	font-weight: bold;
	}

	.custom-footer a:hover {
	color: #f0f0f0;
	text-decoration: underline;
	}

	/* Add margin to prevent content overlap */
	.gradio-container {
	margin-bottom: 60px;
	}
	"""
	) as demo:
	gr.Markdown("""
	# 🗣️ F5-TTS Text-to-Speech

	Upload a reference voice, give reference and generation text, and hear it in the same voice!

	Instructions:
	1. Upload a reference audio file (preferably 3-10 seconds)
	2. Enter the text that corresponds to your reference audio
	3. Enter the text you want to generate in the same voice
	4. Optionally enable silence removal for cleaner output
	""")

	with gr.Row():
	with gr.Column():
	ref_audio = gr.Audio(label="Reference Audio", type="filepath")
	ref_text = gr.Textbox(
	label="Reference Text",
	placeholder="Enter the text spoken in the reference audio...",
	lines=2
	)
	gen_text = gr.Textbox(
	label="Generation Text",
	placeholder="Enter the text you want to generate...",
	lines=3
	)
	remove_silence = gr.Checkbox(label="Remove Silence from Output", value=False)

	generate_btn = gr.Button("Generate Speech", variant="primary", size="lg")

	with gr.Column():
	output_audio = gr.Audio(label="Generated Speech")

	generate_btn.click(
	fn=run_tts,
	inputs=[ref_audio, ref_text, gen_text, remove_silence],
	outputs=output_audio
	)

	# Custom Footer
	gr.HTML("""
	<div class="custom-footer">
	<p>🗣️ F5-TTS by Muhammad Zameer ul Hassan \|
	Powered by F5-TTS \|
	</p>
	</div>
	""")

	if __name__ == "__main__":
	demo.launch(show_api=False)