Miamoto commited on
Commit
3c7cd09
·
1 Parent(s): b858a56

app updated

Browse files
Files changed (1) hide show
  1. app.py +20 -9
app.py CHANGED
@@ -7,18 +7,29 @@ processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3")
7
  model = WhisperForConditionalGeneration.from_pretrained("inesc-id/WhisperLv3-FT")
8
 
9
  def transcribe(audio):
 
10
  speech, _ = librosa.load(audio, sr=16000)
11
- inputs = processor(speech, return_tensors="pt")
12
- predicted_ids = model.generate(**inputs)
13
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
14
- return transcription
 
 
 
 
 
 
 
 
 
 
15
 
16
  demo = gr.Interface(
17
- fn=transcribe,
18
- inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
19
- outputs="text",
20
- title="Custom Whisper ASR Demo",
 
21
  )
22
 
23
  demo.launch()
24
-
 
7
  model = WhisperForConditionalGeneration.from_pretrained("inesc-id/WhisperLv3-FT")
8
 
9
  def transcribe(audio):
10
+ # Load and resample audio to 16 kHz
11
  speech, _ = librosa.load(audio, sr=16000)
12
+
13
+ # Split audio into 30s chunks
14
+ chunk_size = 30 * 16000
15
+ texts = []
16
+
17
+ for start in range(0, len(speech), chunk_size):
18
+ chunk = speech[start:start + chunk_size]
19
+ inputs = processor(chunk, return_tensors="pt")
20
+ predicted_ids = model.generate(**inputs)
21
+ text = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
22
+ texts.append(text)
23
+
24
+ # Combine all chunks
25
+ return " ".join(texts)
26
 
27
  demo = gr.Interface(
28
+ fn=transcribe,
29
+ inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
30
+ outputs="text",
31
+ title="CAMÕES Whisper Demo",
32
+ description="Upload or record audio and get transcription. Supports files longer than 30 seconds."
33
  )
34
 
35
  demo.launch()