Spaces:

inesc-id
/

CAMOES-ASR-DEMO

Sleeping

CAMOES-ASR-DEMO / app.py

update

2e1ef95 about 1 month ago

1.63 kB

	import gradio as gr
	import whisperx

	# -----------------------------

	# Device and compute settings

	# -----------------------------

	device = "cpu" # Free-tier Spaces only have CPU
	compute_type = "int8" # float16 only works on GPU

	# -----------------------------

	# Load WhisperX model

	# -----------------------------

	model_name = "inesc-id/WhisperLv3-EP-X" # Portuguese fine-tuned Whisper model
	model = whisperx.load_model(
	model_name,
	device=device,
	compute_type=compute_type,
	language="pt",
	task="transcribe"
	)

	# -----------------------------

	# Transcription function

	# -----------------------------

	def transcribe(audio_file):
	# Load audio and resample to 16 kHz
	audio = whisperx.load_audio(audio_file, sr=16000)


	# Transcribe
	outputs = model.transcribe(audio, batch_size=4, language="pt", task="transcribe")

	# Concatenate segments
	if outputs['segments']:
	text = " ".join(segment['text'] for segment in outputs['segments'])
	else:
	text = ""

	return text


	# -----------------------------

	# Gradio interface

	# -----------------------------

	demo = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
	outputs="text",
	title="CAMÕES European Portuguese Automatic Speech Recognition Demo",
	description="""
	This is a demo for CAMÕES, a Whisper Model fine-tuned on around 420h of European Portuguese by the HLT lab at INESC-ID.

	The model being used here is "WhisperLv3-X". For more details about CAMÕES check out the [paper here](https://arxiv.org/abs/2508.19721).
	""")

	demo.launch()