SidML's picture
Initial Upload
5a65ad6 verified
"""
Configuration settings for the Speech Translation System
"""
import os
from pathlib import Path
# Project paths
PROJECT_ROOT = Path(__file__).parent.parent
DATA_DIR = PROJECT_ROOT / "data"
MODELS_DIR = PROJECT_ROOT / "models"
VOICE_SAMPLES_DIR = DATA_DIR / "voice_samples"
SAMPLES_DIR = DATA_DIR / "samples"
# Ensure directories exist
for dir_path in [DATA_DIR, MODELS_DIR, VOICE_SAMPLES_DIR, SAMPLES_DIR]:
dir_path.mkdir(exist_ok=True)
# Speech Recognition Settings
WHISPER_MODEL_SIZE = "small" # Options: tiny, base, small, medium, large (small recommended for Hindi)
WHISPER_DEVICE = "auto" # auto, cpu, cuda
# Translation Settings
DEFAULT_TRANSLATION_SERVICE = "google" # google, local
SUPPORTED_LANGUAGES = {
"en": "English",
"es": "Spanish",
"fr": "French",
"de": "German",
"it": "Italian",
"pt": "Portuguese",
"ru": "Russian",
"ja": "Japanese",
"ko": "Korean",
"zh": "Chinese",
"ar": "Arabic",
"hi": "Hindi"
}
# Voice Cloning Settings
TTS_MODEL = "tts_models/multilingual/multi-dataset/xtts_v2"
VOICE_CLONE_SAMPLES_MIN = 3 # Minimum voice samples needed
VOICE_CLONE_DURATION_MIN = 10 # Minimum duration in seconds
# Audio Processing Settings
SAMPLE_RATE = 22050
MAX_AUDIO_DURATION = 300 # 5 minutes maximum
AUDIO_FORMATS = [".wav", ".mp3", ".m4a", ".flac", ".ogg"]
# API Settings
API_HOST = "localhost"
API_PORT = 8000
MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB
# Logging
LOG_LEVEL = "INFO"
LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"