Spaces:
Sleeping
Sleeping
File size: 1,627 Bytes
220065d 101cb32 220065d 101cb32 220065d 101cb32 3c7cd09 1b12a9c 101cb32 3c7cd09 101cb32 3c7cd09 101cb32 1b12a9c 101cb32 220065d 3c7cd09 1b12a9c 37f1adc 2e1ef95 37f1adc 21677f0 37f1adc 220065d 101cb32 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import gradio as gr
import whisperx
# -----------------------------
# Device and compute settings
# -----------------------------
device = "cpu" # Free-tier Spaces only have CPU
compute_type = "int8" # float16 only works on GPU
# -----------------------------
# Load WhisperX model
# -----------------------------
model_name = "inesc-id/WhisperLv3-EP-X" # Portuguese fine-tuned Whisper model
model = whisperx.load_model(
model_name,
device=device,
compute_type=compute_type,
language="pt",
task="transcribe"
)
# -----------------------------
# Transcription function
# -----------------------------
def transcribe(audio_file):
# Load audio and resample to 16 kHz
audio = whisperx.load_audio(audio_file, sr=16000)
# Transcribe
outputs = model.transcribe(audio, batch_size=4, language="pt", task="transcribe")
# Concatenate segments
if outputs['segments']:
text = " ".join(segment['text'] for segment in outputs['segments'])
else:
text = ""
return text
# -----------------------------
# Gradio interface
# -----------------------------
demo = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
outputs="text",
title="CAMÕES European Portuguese Automatic Speech Recognition Demo",
description="""
This is a demo for **CAMÕES**, a Whisper Model fine-tuned on around 420h of European Portuguese by the HLT lab at INESC-ID.
The model being used here is "WhisperLv3-X". For more details about CAMÕES check out the [paper here](https://arxiv.org/abs/2508.19721).
""")
demo.launch()
|