File size: 1,627 Bytes
220065d
101cb32
220065d
101cb32
220065d
101cb32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c7cd09
1b12a9c
101cb32
 
3c7cd09
101cb32
 
 
 
 
3c7cd09
101cb32
1b12a9c
101cb32
 
 
 
 
 
220065d
 
3c7cd09
 
 
1b12a9c
37f1adc
2e1ef95
37f1adc
21677f0
37f1adc
220065d
 
101cb32
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import gradio as gr
import whisperx

# -----------------------------

# Device and compute settings

# -----------------------------

device = "cpu"            # Free-tier Spaces only have CPU
compute_type = "int8"     # float16 only works on GPU

# -----------------------------

# Load WhisperX model

# -----------------------------

model_name = "inesc-id/WhisperLv3-EP-X"  # Portuguese fine-tuned Whisper model
model = whisperx.load_model(
model_name,
device=device,
compute_type=compute_type,
language="pt",
task="transcribe"
)

# -----------------------------

# Transcription function

# -----------------------------

def transcribe(audio_file):
    # Load audio and resample to 16 kHz
    audio = whisperx.load_audio(audio_file, sr=16000)
    
    
    # Transcribe
    outputs = model.transcribe(audio, batch_size=4, language="pt", task="transcribe")
    
    # Concatenate segments
    if outputs['segments']:
        text = " ".join(segment['text'] for segment in outputs['segments'])
    else:
        text = ""
    
    return text


# -----------------------------

# Gradio interface

# -----------------------------

demo = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
outputs="text",
title="CAMÕES European Portuguese Automatic Speech Recognition Demo",
description="""
This is a demo for **CAMÕES**, a Whisper Model fine-tuned on around 420h of European Portuguese by the HLT lab at INESC-ID.

The model being used here is "WhisperLv3-X". For more details about CAMÕES check out the [paper here](https://arxiv.org/abs/2508.19721).
""")

demo.launch()