Spaces:

cuhgrel
/

nemo-tts-api

Running

App Files Files Community

cuhgrel commited on Sep 30, 2025

Commit

793ce85

1 Parent(s): 5ed11a5

updated the app.py

Browse files

Files changed (1) hide show

app.py +14 -56

app.py CHANGED Viewed

@@ -5,7 +5,8 @@ import logging
 from fastapi import FastAPI, HTTPException, status
 from pydantic import BaseModel
 from nemo.collections.tts.models import FastPitchModel, HifiGanModel
-from omegaconf import OmegaConf, open_dict
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -39,65 +40,22 @@ def load_models():
         models['en'].eval()
         logger.info("English model loaded successfully")
-        # Load the Bikol Spectrogram Generator with configuration override
         logger.info("Loading Bikol FastPitch model...")
-        try:
-            # First attempt: Try loading with strict=False
-            models['bikol'] = FastPitchModel.restore_from(
-                "models/fastpitch_bikol_fixed.nemo",
-                strict=False
-            ).to(device)
-            models['bikol'].eval()
-            logger.info("Bikol model loaded successfully")
-        except Exception as e:
-            logger.warning(f"First attempt failed: {e}")
-            logger.info("Attempting to load Bikol model with config override...")
-            # Second attempt: Override the text_tokenizer config to remove g2p parameter
-            try:
-                # Create a config override that removes the problematic g2p parameter
-                override_config = OmegaConf.create({
-                    'text_tokenizer': {
-                        '_target_': 'nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers.BaseCharsTokenizer',
-                        'pad_with_space': True
-                    }
-                })
-                models['bikol'] = FastPitchModel.restore_from(
-                    "models/fastpitch_bikol_fixed.nemo",
-                    override_config_path=override_config,
-                    strict=False
-                ).to(device)
-                models['bikol'].eval()
-                logger.info("Bikol model loaded successfully with config override")
-            except Exception as e2:
-                logger.error(f"Failed to load Bikol model with override: {e2}")
-                # Third attempt: Try modifying the saved config
-                logger.info("Attempting alternative loading method...")
-                try:
-                    # Load model with map_location to avoid device issues
-                    models['bikol'] = FastPitchModel.restore_from(
-                        "models/fastpitch_bikol_fixed.nemo",
-                        map_location=device,
-                        strict=False
-                    )
-                    models['bikol'].eval()
-                    logger.info("Bikol model loaded with map_location")
-                except Exception as e3:
-                    logger.error(f"All attempts to load Bikol model failed: {e3}")
-                    logger.error("Bikol language will not be available")
-                    # Don't raise - allow app to start with just English
-        logger.info("Model loading complete. Available models: " + ", ".join(models.keys()))
     except Exception as e:
         logger.error(f"FATAL: Could not load models. Error: {e}")
         import traceback
         traceback.print_exc()
-        # Allow app to start even if models fail - better for debugging
 # --- 3. Define API Request and Response Models ---
 class TTSRequest(BaseModel):
@@ -110,7 +68,7 @@ def synthesize_speech(request: TTSRequest):
     """
     Generates speech from text using the selected language model.
     """
-    if not models:
         raise HTTPException(
             status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
             detail="Models are not loaded yet. Please try again in a moment."

 from fastapi import FastAPI, HTTPException, status
 from pydantic import BaseModel
 from nemo.collections.tts.models import FastPitchModel, HifiGanModel
+# Omegaconf is no longer needed here since we aren't creating overrides
+# from omegaconf import OmegaConf, open_dict
 # Configure logging
 logging.basicConfig(level=logging.INFO)
         models['en'].eval()
         logger.info("English model loaded successfully")
+        # Load the CORRECTED Bikol Spectrogram Generator
         logger.info("Loading Bikol FastPitch model...")
+        # This is the only line needed now. Replace the filename with your new .nemo file.
+        models['bikol'] = FastPitchModel.restore_from("models/fastpitch_bikol_corrected.nemo").to(device)
+        models['bikol'].eval()
+        logger.info("Bikol model loaded successfully")
     except Exception as e:
         logger.error(f"FATAL: Could not load models. Error: {e}")
         import traceback
         traceback.print_exc()
+        # You might want the app to fail completely if models don't load
+        # raise e
+    logger.info("Model loading complete. Available models: " + ", ".join(models.keys()))
 # --- 3. Define API Request and Response Models ---
 class TTSRequest(BaseModel):
     """
     Generates speech from text using the selected language model.
     """
+    if not models or 'hifigan' not in models:
         raise HTTPException(
             status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
             detail="Models are not loaded yet. Please try again in a moment."