CAMOES-ASR-DEMO / app.py
Miamoto's picture
update
2e1ef95
import gradio as gr
import whisperx
# -----------------------------
# Device and compute settings
# -----------------------------
device = "cpu" # Free-tier Spaces only have CPU
compute_type = "int8" # float16 only works on GPU
# -----------------------------
# Load WhisperX model
# -----------------------------
model_name = "inesc-id/WhisperLv3-EP-X" # Portuguese fine-tuned Whisper model
model = whisperx.load_model(
model_name,
device=device,
compute_type=compute_type,
language="pt",
task="transcribe"
)
# -----------------------------
# Transcription function
# -----------------------------
def transcribe(audio_file):
# Load audio and resample to 16 kHz
audio = whisperx.load_audio(audio_file, sr=16000)
# Transcribe
outputs = model.transcribe(audio, batch_size=4, language="pt", task="transcribe")
# Concatenate segments
if outputs['segments']:
text = " ".join(segment['text'] for segment in outputs['segments'])
else:
text = ""
return text
# -----------------------------
# Gradio interface
# -----------------------------
demo = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
outputs="text",
title="CAMÕES European Portuguese Automatic Speech Recognition Demo",
description="""
This is a demo for **CAMÕES**, a Whisper Model fine-tuned on around 420h of European Portuguese by the HLT lab at INESC-ID.
The model being used here is "WhisperLv3-X". For more details about CAMÕES check out the [paper here](https://arxiv.org/abs/2508.19721).
""")
demo.launch()