Spaces:
Running
on
Zero
Running
on
Zero
Update nemo_align.py
Browse files- nemo_align.py +2 -1
nemo_align.py
CHANGED
|
@@ -437,6 +437,7 @@ def get_start_end_for_segments(word_timestamps):
|
|
| 437 |
|
| 438 |
|
| 439 |
def align_tdt_to_ctc_timestamps(tdt_txt, model, audio_filepath):
|
|
|
|
| 440 |
if isinstance(model, EncDecHybridRNNTCTCModel):
|
| 441 |
ctc_cfg = CTCDecodingConfig()
|
| 442 |
ctc_cfg.decoding = "greedy_batch"
|
|
@@ -458,7 +459,7 @@ def align_tdt_to_ctc_timestamps(tdt_txt, model, audio_filepath):
|
|
| 458 |
|
| 459 |
log_probs_list_batch = [hypotheses[0].y_sequence]
|
| 460 |
T_list_batch = [hypotheses[0].y_sequence.shape[0]]
|
| 461 |
-
ctc_pred_text = hypotheses[0].text if tdt_txt is
|
| 462 |
|
| 463 |
utt_obj = get_utt_obj(
|
| 464 |
ctc_pred_text,
|
|
|
|
| 437 |
|
| 438 |
|
| 439 |
def align_tdt_to_ctc_timestamps(tdt_txt, model, audio_filepath):
|
| 440 |
+
tdt_txt = tdt_txt[0][0] if tdt_txt is not None else tdt_txt
|
| 441 |
if isinstance(model, EncDecHybridRNNTCTCModel):
|
| 442 |
ctc_cfg = CTCDecodingConfig()
|
| 443 |
ctc_cfg.decoding = "greedy_batch"
|
|
|
|
| 459 |
|
| 460 |
log_probs_list_batch = [hypotheses[0].y_sequence]
|
| 461 |
T_list_batch = [hypotheses[0].y_sequence.shape[0]]
|
| 462 |
+
ctc_pred_text = hypotheses[0].text if tdt_txt is None else tdt_txt
|
| 463 |
|
| 464 |
utt_obj = get_utt_obj(
|
| 465 |
ctc_pred_text,
|