Spaces:
Sleeping
Sleeping
app updated
Browse files
app.py
CHANGED
|
@@ -7,18 +7,29 @@ processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3")
|
|
| 7 |
model = WhisperForConditionalGeneration.from_pretrained("inesc-id/WhisperLv3-FT")
|
| 8 |
|
| 9 |
def transcribe(audio):
|
|
|
|
| 10 |
speech, _ = librosa.load(audio, sr=16000)
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
demo = gr.Interface(
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
| 21 |
)
|
| 22 |
|
| 23 |
demo.launch()
|
| 24 |
-
|
|
|
|
| 7 |
model = WhisperForConditionalGeneration.from_pretrained("inesc-id/WhisperLv3-FT")
|
| 8 |
|
| 9 |
def transcribe(audio):
|
| 10 |
+
# Load and resample audio to 16 kHz
|
| 11 |
speech, _ = librosa.load(audio, sr=16000)
|
| 12 |
+
|
| 13 |
+
# Split audio into 30s chunks
|
| 14 |
+
chunk_size = 30 * 16000
|
| 15 |
+
texts = []
|
| 16 |
+
|
| 17 |
+
for start in range(0, len(speech), chunk_size):
|
| 18 |
+
chunk = speech[start:start + chunk_size]
|
| 19 |
+
inputs = processor(chunk, return_tensors="pt")
|
| 20 |
+
predicted_ids = model.generate(**inputs)
|
| 21 |
+
text = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
| 22 |
+
texts.append(text)
|
| 23 |
+
|
| 24 |
+
# Combine all chunks
|
| 25 |
+
return " ".join(texts)
|
| 26 |
|
| 27 |
demo = gr.Interface(
|
| 28 |
+
fn=transcribe,
|
| 29 |
+
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
|
| 30 |
+
outputs="text",
|
| 31 |
+
title="CAMÕES Whisper Demo",
|
| 32 |
+
description="Upload or record audio and get transcription. Supports files longer than 30 seconds."
|
| 33 |
)
|
| 34 |
|
| 35 |
demo.launch()
|
|
|