Spaces:

nvidia
/

parakeet-tdt_ctc-1.1b

Running on Zero

nithinraok commited on Sep 2, 2024

Commit

ca74027

verified ·

1 Parent(s): 6d6c247

Update nemo_align.py

Files changed (1) hide show

nemo_align.py CHANGED Viewed

@@ -437,6 +437,7 @@ def get_start_end_for_segments(word_timestamps):
 def align_tdt_to_ctc_timestamps(tdt_txt, model, audio_filepath):
     if isinstance(model, EncDecHybridRNNTCTCModel):
         ctc_cfg = CTCDecodingConfig()
         ctc_cfg.decoding = "greedy_batch"
@@ -458,7 +459,7 @@ def align_tdt_to_ctc_timestamps(tdt_txt, model, audio_filepath):
     log_probs_list_batch = [hypotheses[0].y_sequence]
     T_list_batch = [hypotheses[0].y_sequence.shape[0]]
-    ctc_pred_text = hypotheses[0].text if tdt_txt is not None else tdt_txt
     utt_obj = get_utt_obj(
             ctc_pred_text,

 def align_tdt_to_ctc_timestamps(tdt_txt, model, audio_filepath):
+    tdt_txt = tdt_txt[0][0] if tdt_txt is not None else tdt_txt
     if isinstance(model, EncDecHybridRNNTCTCModel):
         ctc_cfg = CTCDecodingConfig()
         ctc_cfg.decoding = "greedy_batch"
     log_probs_list_batch = [hypotheses[0].y_sequence]
     T_list_batch = [hypotheses[0].y_sequence.shape[0]]
+    ctc_pred_text = hypotheses[0].text if tdt_txt is None else tdt_txt
     utt_obj = get_utt_obj(
             ctc_pred_text,