AIDAS-Omni-Modal-Diffusion / MMaDA /inference_emova.py
jaeikkim
Reinit Space without binary assets
7bfbdc3
import sys
import json
import torch
from models.modeling_emova_speech_tokenizer import EMOVASpeechTokenizer
import soundfile as sf
import re
input_json_path = "/home/work/AIDAS/t2s_logs/librispeech_result.json"
output_dir = "/home/work/AIDAS/t2s_logs/decoded_wav/"
import os
os.makedirs(output_dir, exist_ok=True)
device = "cuda" if torch.cuda.is_available() else "cpu"
vq_model = EMOVASpeechTokenizer.from_pretrained("Emova-ollm/emova_speech_tokenizer_hf").to(device)
vq_model.eval()
# tokens = vq_model.encode("/home/work/AIDAS/data/audio/commonvoice/cv-corpus-22.0-2025-06-20/en/clips/common_voice_en_619035.mp3")
# numbers = tokens.tolist()[0]
# print(numbers)
"""
μ•„κΉŒ 135080 λ‚˜μ˜¨ λ’€ μ‹œν€€μŠ€λ“€ λ‹€ λŠκ²Όμ—ˆλŠ”λ°
λ‹€μ‹œ ν•˜λ‹ˆκΉŒ 또 μ•ˆλŠκ²¨μš” ;;;;;;
"""
#### ORIGINAL
numbers = [135080] * 50
# 12초 짜리
# (NOT SUCH A SERIOUS PROBLEM IN NEVADA BUT IN OTHER PARTS OF THE COUNTRY WHERE
# WHERE GOVERNORS AND STATE LEGISLATORS HAVE OPTED OUT OF THE AFFORDABLE CARE ACT AND
# SO MANY OF THEIR OWN PEOPLE ARE LEFT BEHIND.")
# 135080μ—μ„œ λŠκΉ€ (λ‹€ μŒμ†Œ 토큰)
# 또 λŠκΈ°λ‚˜ 확인
numbers = [135081, 135080, 135081, 135666, 136883, 138099, 134597, 134768, 135402,
137951, 135928, 135038, 135109, 135436, 135484, 136375, 138057, 136523,
135883, 135042, 135038, 138177, 134603, 134599, 137728, 138203, 137115,
138121, 135563, 135039, 135426, 136773, 136133, 136197, 135677, 134903,
134843, 135244, 137737, 136583, 137606, 138057, 136588, 137610, 138427,
138020, 137003, 137458, 138032, 136750, 135282, 137416, 136743, 135329,
136875, 137832, 137401, 136387, 138248, 137990, 135985, 138462, 138364,
138012, 137068, 135674, 135688, 134792, 135436, 138566, 137349, 136267,
136259, 135802, 137285, 135621, 134662, 134661, 134654, 135738, 137440,
138005, 135029, 135174, 134792, 137416, 138558, 136378, 136443, 135149,
136133, 136068, 137076, 136500, 138413, 136125, 136253, 138345, 138132,
136620, 135675, 135674, 135600, 135080, 135081, 135666, 135665, 137653,
138309, 134650, 135102, 136760, 136761, 136250, 135665, 135600, 135592,
135080, 135081, 135593, 135666, 136178, 135601, 137661, 138245, 134650,
134591, 135102, 136761, 136250, 135666, 135602, 135666, 136308, 136639,
135622, 134794, 135241, 137415, 136263, 137403, 138099, 136262, 136267,
136259, 135747, 134784, 134839, 135738, 135674, 135602, 136178, 138498,
138636, 138628, 138500, 138363, 136250, 135610, 135602, 136186, 136379,
135040, 135535, 138285, 137271, 134676, 137950, 136312, 137095, 137413,
135332, 137346, 137409, 136972, 135041, 135030, 137095, 138622, 135124,
135842, 138349, 136264, 136779, 134838, 135224, 136589, 136085, 134721,
136391, 136833, 136250, 136177, 136754, 135725, 134904, 135355, 135428,
136838, 136880, 137951, 138351, 137985, 136892, 138346, 135870, 134614,
134623, 134624, 135210, 135283, 136938, 138398, 136250, 135601, 135080,
135592, 135658, 136178, 136753, 135238, 136456, 138559, 138629, 138496,
136250, 138056, 135883, 135365, 136125, 134573, 134581, 137275, 138346,
138055, 136908, 138248, 138120, 136073, 137026, 135205, 136702, 134714,
134584, 134591, 135103, 136191, 136761, 137519, 135339, 135868, 135411,
135775, 136865, 136817, 135601, 135592, 135080, 135081, 135666, 136690,
138634, 138628, 138355, 136754, 135993, 135542, 135085, 134571, 135091,
135667, 136379, 137611, 136262, 134892, 136028, 137083, 137827, 137108,
136178, 135174, 134792, 137416, 137473, 138565, 138309, 134592, 135614,
136185, 136186, 135086, 134571, 135164, 137588, 135803, 135739, 135875,
137285, 138151, 136596, 135068, 134772, 134916, 137797, 138119, 136073,
136186, 135166, 134661, 136248, 135737, 137095, 136325, 134700, 134659,
134917, 136430, 138518, 137265, 138248, 138314, 136188, 136598, 137133,
134552, 134546, 134547, 134555, 136420, 137580, 138108, 138436, 138354,
136762, 135610, 135081, 135080]
# numbers = [134552] * 50
### MID POSITIONED-FUCKING 135080 REMOVED
# numbers = [135081, 135080, 135081, 135666, 136883, 138099, 134597, 134768, 135402,
# 137951, 135928, 135038, 135109, 135436, 135484, 136375, 138057, 136523,
# 135883, 135042, 135038, 138177, 134603, 134599, 137728, 138203, 137115,
# 138121, 135563, 135039, 135426, 136773, 136133, 136197, 135677, 134903,
# 134843, 135244, 137737, 136583, 137606, 138057, 136588, 137610, 138427,
# 138020, 137003, 137458, 138032, 136750, 135282, 137416, 136743, 135329,
# 136875, 137832, 137401, 136387, 138248, 137990, 135985, 138462, 138364,
# 138012, 137068, 135674, 135688, 134792, 135436, 138566, 137349, 136267,
# 136259, 135802, 137285, 135621, 134662, 134661, 134654, 135738, 137440,
# 138005, 135029, 135174, 134792, 137416, 138558, 136378, 136443, 135149,
# 136133, 136068, 137076, 136500, 138413, 136125, 136253, 138345, 138132,
# 136620, 135675, 135674, 135600, 135081, 135666, 135665, 137653,
# 138309, 134650, 135102, 136760, 136761, 136250, 135665, 135600, 135592,
# 135081, 135593, 135666, 136178, 135601, 137661, 138245, 134650,
# 134591, 135102, 136761, 136250, 135666, 135602, 135666, 136308, 136639,
# 135622, 134794, 135241, 137415, 136263, 137403, 138099, 136262, 136267,
# 136259, 135747, 134784, 134839, 135738, 135674, 135602, 136178, 138498,
# 138636, 138628, 138500, 138363, 136250, 135610, 135602, 136186, 136379,
# 135040, 135535, 138285, 137271, 134676, 137950, 136312, 137095, 137413,
# 135332, 137346, 137409, 136972, 135041, 135030, 137095, 138622, 135124,
# 135842, 138349, 136264, 136779, 134838, 135224, 136589, 136085, 134721,
# 136391, 136833, 136250, 136177, 136754, 135725, 134904, 135355, 135428,
# 136838, 136880, 137951, 138351, 137985, 136892, 138346, 135870, 134614,
# 134623, 134624, 135210, 135283, 136938, 138398, 136250, 135601,
# 135592, 135658, 136178, 136753, 135238, 136456, 138559, 138629, 138496,
# 136250, 138056, 135883, 135365, 136125, 134573, 134581, 137275, 138346,
# 138055, 136908, 138248, 138120, 136073, 137026, 135205, 136702, 134714,
# 134584, 134591, 135103, 136191, 136761, 137519, 135339, 135868, 135411,
# 135775, 136865, 136817, 135601, 135592, 135081, 135666, 136690,
# 138634, 138628, 138355, 136754, 135993, 135542, 135085, 134571, 135091,
# 135667, 136379, 137611, 136262, 134892, 136028, 137083, 137827, 137108,
# 136178, 135174, 134792, 137416, 137473, 138565, 138309, 134592, 135614,
# 136185, 136186, 135086, 134571, 135164, 137588, 135803, 135739, 135875,
# 137285, 138151, 136596, 135068, 134772, 134916, 137797, 138119, 136073,
# 136186, 135166, 134661, 136248, 135737, 137095, 136325, 134700, 134659,
# 134917, 136430, 138518, 137265, 138248, 138314, 136188, 136598, 137133,
# 134552, 134546, 134547, 134555, 136420, 137580, 138108, 138436, 138354,
# 136762, 135610, 135081, 135080]
# ### ALL-FUCKING 135080 / 135081 REMOVED
# numbers = [ 135666, 136883, 138099, 134597, 134768, 135402,
# 137951, 135928, 135038, 135109, 135436, 135484, 136375, 138057, 136523,
# 135883, 135042, 135038, 138177, 134603, 134599, 137728, 138203, 137115,
# 138121, 135563, 135039, 135426, 136773, 136133, 136197, 135677, 134903,
# 134843, 135244, 137737, 136583, 137606, 138057, 136588, 137610, 138427,
# 138020, 137003, 137458, 138032, 136750, 135282, 137416, 136743, 135329,
# 136875, 137832, 137401, 136387, 138248, 137990, 135985, 138462, 138364,
# 138012, 137068, 135674, 135688, 134792, 135436, 138566, 137349, 136267,
# 136259, 135802, 137285, 135621, 134662, 134661, 134654, 135738, 137440,
# 138005, 135029, 135174, 134792, 137416, 138558, 136378, 136443, 135149,
# 136133, 136068, 137076, 136500, 138413, 136125, 136253, 138345, 138132,
# 136620, 135675, 135674, 135600, 135666, 135665, 137653,
# 138309, 134650, 135102, 136760, 136761, 136250, 135665, 135600, 135592,
# 135593, 135666, 136178, 135601, 137661, 138245, 134650,
# 134591, 135102, 136761, 136250, 135666, 135602, 135666, 136308, 136639,
# 135622, 134794, 135241, 137415, 136263, 137403, 138099, 136262, 136267,
# 136259, 135747, 134784, 134839, 135738, 135674, 135602, 136178, 138498,
# 138636, 138628, 138500, 138363, 136250, 135610, 135602, 136186, 136379,
# 135040, 135535, 138285, 137271, 134676, 137950, 136312, 137095, 137413,
# 135332, 137346, 137409, 136972, 135041, 135030, 137095, 138622, 135124,
# 135842, 138349, 136264, 136779, 134838, 135224, 136589, 136085, 134721,
# 136391, 136833, 136250, 136177, 136754, 135725, 134904, 135355, 135428,
# 136838, 136880, 137951, 138351, 137985, 136892, 138346, 135870, 134614,
# 134623, 134624, 135210, 135283, 136938, 138398, 136250, 135601,
# 135592, 135658, 136178, 136753, 135238, 136456, 138559, 138629, 138496,
# 136250, 138056, 135883, 135365, 136125, 134573, 134581, 137275, 138346,
# 138055, 136908, 138248, 138120, 136073, 137026, 135205, 136702, 134714,
# 134584, 134591, 135103, 136191, 136761, 137519, 135339, 135868, 135411,
# 135775, 136865, 136817, 135601, 135592, 135666, 136690,
# 138634, 138628, 138355, 136754, 135993, 135542, 135085, 134571, 135091,
# 135667, 136379, 137611, 136262, 134892, 136028, 137083, 137827, 137108,
# 136178, 135174, 134792, 137416, 137473, 138565, 138309, 134592, 135614,
# 136185, 136186, 135086, 134571, 135164, 137588, 135803, 135739, 135875,
# 137285, 138151, 136596, 135068, 134772, 134916, 137797, 138119, 136073,
# 136186, 135166, 134661, 136248, 135737, 137095, 136325, 134700, 134659,
# 134917, 136430, 138518, 137265, 138248, 138314, 136188, 136598, 137133,
# 134552, 134546, 134547, 134555, 136420, 137580, 138108, 138436, 138354,
# 136762, 135610, ]
### κ°•μ œ μ£Όμž… 뭐지 μ™œ μ•ˆλ©ˆμΆ”μ§€?
# numbers = [ 135666, 136883, 138099, 134597, 134768, 135402,
# 137951, 135928, 135038, 135109, 135436, 135484, 136375, 138057, 136523,
# 135883, 135042, 135038, 138177, 134603, 134599, 137728, 138203, 137115,
# 138121, 135563, 135039, 135426, 136773, 136133, 136197, 135677, 134903,
# 134843, 135244, 137737, 136583, 137606, 138057, 136588, 137610, 138427,
# 138020, 137003, 137458, 138032, 136750, 135282, 137416, 136743, 135329,
# 136875, 137832, 137401, 136387, 138248, 137990, 135985, 138462, 138364,
# 138012, 137068, 135674, 135688, 134792, 135436, 138566, 137349, 136267,
# 136259, 135802, 137285, 135621, 134662, 134661, 134654, 135080, 135081, 135738, 137440,
# 138005, 135029, 135174, 134792, 137416, 138558, 136378, 136443, 135149,
# 136133, 136068, 137076, 136500, 138413, 136125, 136253, 138345, 138132,
# 136620, 135675, 135674, 135600, 135666, 135665, 137653,
# 138309, 134650, 135102, 136760, 136761, 136250, 135665, 135600, 135592,
# 135593, 135666, 136178, 135601, 137661, 138245, 134650,
# 134591, 135102, 136761, 136250, 135666, 135602, 135666, 136308, 136639,
# 135622, 134794, 135241, 137415, 136263, 137403, 138099, 136262, 136267,
# 136259, 135747, 134784, 134839, 135738, 135674, 135602, 136178, 138498,
# 138636, 138628, 138500, 138363, 136250, 135610, 135602, 136186, 136379,
# 135040, 135535, 138285, 137271, 134676, 137950, 136312, 137095, 137413,
# 135332, 137346, 137409, 136972, 135041, 135030, 137095, 138622, 135124,
# 135842, 138349, 136264, 136779, 134838, 135224, 136589, 136085, 134721,
# 136391, 136833, 136250, 136177, 136754, 135725, 134904, 135355, 135428,
# 136838, 136880, 137951, 138351, 137985, 136892, 138346, 135870, 134614,
# 134623, 134624, 135210, 135283, 136938, 138398, 136250, 135601,
# 135592, 135658, 136178, 136753, 135238, 136456, 138559, 138629, 138496,
# 136250, 138056, 135883, 135365, 136125, 134573, 134581, 137275, 138346,
# 138055, 136908, 138248, 138120, 136073, 137026, 135205, 136702, 134714,
# 134584, 134591, 135103, 136191, 136761, 137519, 135339, 135868, 135411,
# 135775, 136865, 136817, 135601, 135592, 135666, 136690,
# 138634, 138628, 138355, 136754, 135993, 135542, 135085, 134571, 135091,
# 135667, 136379, 137611, 136262, 134892, 136028, 137083, 137827, 137108,
# 136178, 135174, 134792, 137416, 137473, 138565, 138309, 134592, 135614,
# 136185, 136186, 135086, 134571, 135164, 137588, 135803, 135739, 135875,
# 137285, 138151, 136596, 135068, 134772, 134916, 137797, 138119, 136073,
# 136186, 135166, 134661, 136248, 135737, 137095, 136325, 134700, 134659,
# 134917, 136430, 138518, 137265, 138248, 138314, 136188, 136598, 137133,
# 134552, 134546, 134547, 134555, 136420, 137580, 138108, 138436, 138354,
# 136762, 135610, ]
# TEST
# numbers = [135081, 135080, 135081, 135666, 136883, 138099, 134597, 134768, 135402,
# 137951, 135928, 135038, 135109, 135436, 135484, 136375, 138057, 136523,
# 135883, 135042, 135038, 138177, 135061, 135061, 135061, 135061, 135061,
# 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061,
# 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061,
# 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061,
# 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061,
# 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061,
# 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061,
# 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061, 135061,
# 135061, 135061, 135061, 135061, 135061,
# 135592, 135658, 136178, 136753, 135238, 136456, 138559, 138629, 138496,
# 136250, 138056, 135883, 135365, 136125, 134573, 134581, 137275, 138346,
# 138055, 136908, 138248, 138120, 136073, 137026, 135205, 136702, 134714,
# 134584, 134591, 135103, 136191, 136761, 137519, 135339, 135868, 135411,
# 135775, 136865, 136817, 135601, 135592, 135080, 135081, 135666, 136690,
# 138634, 138628, 138355, 136754, 135993, 135542, 135085, 134571, 135091,
# 135667, 136379, 137611, 136262, 134892, 136028, 137083, 137827, 137108,
# 136178, 135174, 134792, 137416, 137473, 138565, 138309, 134592, 135614,
# 136185, 136186, 135086, 134571, 135164, 137588, 135803, 135739, 135875,
# 137285, 138151, 136596, 135068, 134772, 134916, 137797, 138119, 136073,
# 136186, 135166, 134661, 136248, 135737, 137095, 136325, 134700, 134659,
# 134917, 136430, 138518, 137265, 138248, 138314, 136188, 136598, 137133,
# 134552, 134546, 134547, 134555, 136420, 137580, 138108, 138436, 138354,
# 136762, 135610, 135081, 135080]
print(numbers)
# 'NOT SUCH A SERIOUS PROBLEM IN NEVADA BUT IN OTHER PARTS OF THE COUNTRY WHERE WHERE GOVERNORS AND STATE LEGISLATORS HAVE OPTED OUT OF THE AFFORDABLE CARE ACT AND SO MANY OF THEIR OWN PEOPLE ARE LEFT BEHIND.
offset = 134541
# FUCKING 135080 == 539 in EMOVA
speech_tokens = [f"<|speech_{n - offset}|>" for n in numbers]
# speech_tokens = [f"<|speech_{n}|>" for n in numbers]
token_str = "".join(speech_tokens)
# token_str = "<|speech_540|><|speech_539|><|speech_1053|><|speech_1068|><|speech_3121|><|speech_3256|><|speech_1479|><|speech_3055|><|speech_4088|><|speech_4027|><|speech_1653|><|speech_1049|><|speech_38|><|speech_2045|><|speech_3814|><|speech_2156|><|speech_3121|><|speech_2600|><|speech_2807|><|speech_3582|><|speech_3579|><|speech_3567|><|speech_1902|><|speech_1545|><|speech_2048|><|speech_1246|><|speech_3750|><|speech_2220|><|speech_2042|><|speech_1784|><|speech_95|><|speech_376|><|speech_2449|><|speech_3921|><|speech_4024|><|speech_3954|><|speech_569|><|speech_2593|><|speech_2079|><|speech_1583|><|speech_1911|><|speech_3071|><|speech_2430|><|speech_1197|><|speech_1702|><|speech_2559|><|speech_2811|><|speech_1651|><|speech_166|><|speech_1531|><|speech_1405|><|speech_3197|><|speech_3834|><|speech_2290|><|speech_1138|><|speech_3733|><|speech_3087|><|speech_2287|><|speech_3808|><|speech_568|><|speech_566|><|speech_1311|><|speech_3551|><|speech_3951|><|speech_3758|><|speech_2852|><|speech_3624|><|speech_2579|><|speech_2569|><|speech_3609|><|speech_371|><|speech_1774|><|speech_3694|><|speech_1576|><|speech_24|><|speech_26|><|speech_860|><|speech_3856|><|speech_1721|><|speech_2238|><|speech_4094|><|speech_4079|><|speech_3959|><|speech_3758|><|speech_1451|><|speech_497|><|speech_761|><|speech_231|><|speech_167|><|speech_2791|><|speech_3822|><|speech_2222|><|speech_1141|><|speech_548|><|speech_539|><|speech_540|><|speech_1052|><|speech_1125|><|speech_1701|><|speech_1444|><|speech_1001|><|speech_1056|><|speech_533|><|speech_614|><|speech_1126|><|speech_1254|><|speech_2215|><|speech_2097|><|speech_1145|><|speech_2721|><|speech_71|><|speech_591|><|speech_1004|><|speech_497|><|speech_1325|><|speech_1646|><|speech_1061|><|speech_540|><|speech_539|><|speech_1052|><|speech_1053|><|speech_2589|><|speech_3611|><|speech_2578|><|speech_1068|><|speech_1787|><|speech_2291|><|speech_2042|><|speech_544|><|speech_17|><|speech_995|><|speech_986|><|speech_3786|><|speech_3612|><|speech_552|><|speech_609|><|speech_3558|><|speech_2859|><|speech_2065|><|speech_1575|><|speech_1645|><|speech_824|><|speech_1144|><|speech_2800|><|speech_350|><|speech_2007|><|speech_3559|><|speech_3887|><|speech_3813|><|speech_2221|><|speech_1133|><|speech_549|><|speech_540|>"
print(token_str)
with torch.no_grad():
output_wav_path = f"/home/work/AIDAS/emova_outputs/gt_test.wav"
condition = 'gender-female_emotion-neutral_speed-normal_pitch-normal'
vq_model.decode(
token_str,
condition=condition,
output_wav_file=output_wav_path
)