Spaces:

jlazoflores
/

universal_translator

Sleeping

App Files Files Community

joelazo commited on 11 days ago

Commit

3605935

1 Parent(s): a452a32

Initial commit. This is the code for the first version of the universal translator.

Browse files

Files changed (11) hide show

.gitignore +299 -0
README.md +305 -2
REFACTORING_NOTES.md +254 -0
config.py +215 -0
gradio_themes.md +345 -0
pyproject.toml +33 -0
requirements.txt +8 -0
translation_service.py +170 -0
translator.py +360 -0
uv.lock +0 -0
voice_handler.py +359 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,299 @@

+# Universal Translator - Git Ignore File
+# ============================================================================
+# Python
+# ============================================================================
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+Pipfile.lock
+# poetry
+poetry.lock
+# pdm
+.pdm.toml
+# PEP 582
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# ============================================================================
+# IDEs and Editors
+# ============================================================================
+# Visual Studio Code
+.vscode/
+*.code-workspace
+# PyCharm
+.idea/
+*.iml
+*.ipr
+*.iws
+# Sublime Text
+*.sublime-project
+*.sublime-workspace
+# Vim
+*.swp
+*.swo
+*~
+# Emacs
+*~
+\#*\#
+/.emacs.desktop
+/.emacs.desktop.lock
+*.elc
+# Eclipse
+.project
+.pydevproject
+.settings/
+# ============================================================================
+# Operating System
+# ============================================================================
+# macOS
+.DS_Store
+.AppleDouble
+.LSOverride
+._*
+.Spotlight-V100
+.Trashes
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+# Windows
+Thumbs.db
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+*.stackdump
+[Dd]esktop.ini
+$RECYCLE.BIN/
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+*.lnk
+# Linux
+*~
+.fuse_hidden*
+.directory
+.Trash-*
+.nfs*
+# ============================================================================
+# Project Specific
+# ============================================================================
+# Environment variables and secrets
+.env
+.env.local
+.env.*.local
+*.key
+*.pem
+secrets.json
+credentials.json
+# Temporary audio files generated by TTS/STT
+*.mp3
+*.wav
+*.ogg
+*.flac
+/tmp/
+/temp/
+# Gradio temporary files
+flagged/
+gradio_cached_examples/
+# Hugging Face cache
+.cache/
+huggingface/
+# Model cache
+models/
+*.bin
+*.safetensors
+# UV lock file (optionally ignore if you want fresh resolves)
+# uv.lock
+# Build artifacts
+*.whl
+# Log files
+*.log
+logs/
+# Database files
+*.db
+*.sqlite
+*.sqlite3
+# Backup files
+*.bak
+*.backup
+*~
+# ============================================================================
+# Documentation
+# ============================================================================
+# Generated documentation
+docs/_build/
+docs/build/
+site/
+# ============================================================================
+# Miscellaneous
+# ============================================================================
+# Archives
+*.zip
+*.tar
+*.tar.gz
+*.rar
+*.7z
+# Large files
+*.iso
+*.dmg
+# Private notes
+NOTES.md
+TODO.md
+PRIVATE.md

README.md CHANGED Viewed

@@ -1,2 +1,305 @@
-# universal_translator
-Apertus based AI agent that can translate any language to any base language in realtime.

+# Universal Translator
+A real-time language translation application powered by **Apertus-70B** (supporting 1000+ languages) with voice input and output capabilities.
+## Features
+- **1000+ Languages Support** - Powered by swiss-ai/Apertus-70B-Instruct-2509
+- **Automatic Language Detection** - Automatically detects the source language
+- **Dual Input Modes**:
+  - Text input via text box
+  - Voice input via microphone (Speech-to-Text)
+- **Dual Output Modes**:
+  - Translated text display
+  - Audio output (Text-to-Speech)
+- **Multiple Provider Options**:
+  - STT: OpenAI Whisper API, Local Whisper (tiny/base)
+  - TTS: OpenAI TTS, Edge-TTS (free), gTTS (free)
+- **Customizable Translation**:
+  - Adjustable temperature for literal vs creative translations
+  - Configurable max tokens
+  - Multiple voice styles for audio output
+## Prerequisites
+- Python 3.10 or higher
+- [UV](https://docs.astral.sh/uv/) package manager
+- OpenAI API key (for Whisper STT and TTS)
+- Hugging Face token (for Apertus model access)
+## Installation
+### 1. Install UV
+If you don't have UV installed, install it first:
+**macOS/Linux:**
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+**Windows:**
+```powershell
+powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
+```
+Or via pip:
+```bash
+pip install uv
+```
+### 2. Clone the Repository
+```bash
+git clone <your-repo-url>
+cd universal_translator
+```
+### 3. Install Dependencies with UV
+UV will automatically create a virtual environment and install all dependencies:
+```bash
+uv sync
+```
+This will:
+- Create a `.venv` directory with a Python virtual environment
+- Install all dependencies from `pyproject.toml`
+- Lock versions in `uv.lock`
+### 4. Set Up Environment Variables
+Create a `.env` file in the project root:
+```bash
+# .env
+OPENAI_API_KEY=your_openai_api_key_here
+HF_TOKEN=your_huggingface_token_here
+```
+**Getting API Keys:**
+- **OpenAI API Key**: Get it from [OpenAI Platform](https://platform.openai.com/api-keys)
+- **Hugging Face Token**: Get it from [Hugging Face Settings](https://huggingface.co/settings/tokens)
+## Usage
+### Run with UV
+```bash
+uv run python translator.py
+```
+Or activate the virtual environment first:
+```bash
+# Activate virtual environment
+source .venv/bin/activate  # macOS/Linux
+# or
+.venv\Scripts\activate     # Windows
+# Run the app
+python translator.py
+```
+### Using the Application
+1. **Open your browser** - The app will automatically open at `http://localhost:7860`
+2. **Select Target Language** - Choose the language you want to translate to from the dropdown (20+ popular languages available)
+3. **Choose Input Method**:
+   - **Text**: Type or paste text in any language in the input box and click "Translate"
+   - **Voice**: Click the microphone icon, speak, then click "Translate Voice"
+4. **View Results**:
+   - **Detected Source Language** - Shows which language was detected
+   - **Translated Text** - Displays the translation
+   - **Audio Output** - Plays the translation (if voice output is enabled)
+5. **Adjust Settings** (optional):
+   - **Temperature**: 0.0-1.0 (lower = more literal, higher = more creative)
+   - **Max Tokens**: Control translation length
+   - **STT Provider**: Choose speech-to-text provider
+   - **TTS Provider**: Choose text-to-speech provider and voice
+## Project Structure
+```
+universal_translator/
+├── translator.py             # Gradio UI layer (presentation)
+├── translation_service.py    # Core translation logic (business layer)
+├── config.py                 # Configuration constants and settings
+├── voice_handler.py          # STT/TTS provider implementations
+├── pyproject.toml            # Project configuration and dependencies
+├── uv.lock                   # Locked dependency versions
+├── requirements.txt          # Alternative pip requirements
+├── .env                      # Environment variables (create this)
+├── gradio_themes.md          # Gradio 6.0 theming guide
+└── README.md                 # This file
+```
+### Architecture
+The application follows a layered architecture for better maintainability:
+- **UI Layer** (`translator.py`) - Handles all Gradio interface components and user interactions
+- **Business Logic** (`translation_service.py`) - Core translation engine and language detection
+- **Configuration** (`config.py`) - Centralized settings, constants, and default values
+- **Voice Services** (`voice_handler.py`) - Speech-to-text and text-to-speech providers
+## Supported Languages
+The app includes 20+ popular languages in the dropdown, including:
+- Spanish, French, German, Italian, Portuguese
+- Chinese (Simplified), Japanese, Korean
+- Arabic, Russian, Hindi, Turkish
+- Dutch, Polish, Swedish, Greek
+- Hebrew, Thai, Vietnamese, Indonesian
+- English
+**Note**: While the dropdown shows popular languages, Apertus-70B supports 1000+ languages. You can translate to/from any language by typing the language name.
+## Customization
+All configuration settings are centralized in `config.py`. You can easily customize:
+### Modifying Settings
+Edit `config.py` to change:
+**Languages:**
+```python
+# Add more languages to the dropdown
+LanguageConfig.POPULAR_LANGUAGES["Portuguese (Brazil)"] = "pt-br"
+# Change default target language
+LanguageConfig.DEFAULT_TARGET_LANGUAGE = "French"
+```
+**Model Settings:**
+```python
+# Use a different translation model
+ModelConfig.MODEL_NAME = "your-model-name"
+# Adjust default parameters
+ModelConfig.DEFAULT_TEMPERATURE = 0.5
+ModelConfig.DEFAULT_MAX_TOKENS = 2048
+```
+**UI Appearance:**
+```python
+# Change theme colors
+UIConfig.THEME_PRIMARY_HUE = "purple"
+UIConfig.THEME_SECONDARY_HUE = "pink"
+# Add custom CSS
+UIConfig.CUSTOM_CSS = """
+    .gradio-container {
+        background: linear-gradient(to bottom, #f0f0f0, #ffffff);
+    }
+"""
+```
+**Voice Settings:**
+```python
+# Change default providers
+VoiceConfig.DEFAULT_TTS_PROVIDER = "Edge-TTS (Free)"
+VoiceConfig.DEFAULT_TTS_VOICE = "en-US-JennyNeural"
+```
+## Configuration
+### Translation Settings
+- **Temperature** (0.0-1.0):
+  - `0.1-0.3`: More literal, word-for-word translations
+  - `0.4-0.6`: Balanced
+  - `0.7-1.0`: More creative, natural-sounding translations
+- **Max Tokens** (128-2048):
+  - Controls maximum length of translation
+  - Higher values for longer texts
+### Voice Settings
+**STT Providers:**
+- **OpenAI Whisper API** (Recommended): Fast, accurate, requires API key
+- **Local Whisper (Tiny)**: Free, runs locally, lower accuracy
+- **Local Whisper (Base)**: Free, runs locally, better accuracy
+**TTS Providers:**
+- **OpenAI TTS** (Recommended): High quality, natural voices, requires API key
+- **Edge-TTS (Free)**: Good quality, free, no API key needed
+- **gTTS (Free)**: Basic quality, free, no API key needed
+**TTS Voices** (OpenAI):
+- `alloy`, `echo`, `fable`, `onyx`, `nova`, `shimmer`
+## Cost Considerations
+- **Apertus Model**: Free (hosted on Hugging Face)
+- **OpenAI Whisper API**: ~$0.006 per minute of audio
+- **OpenAI TTS**: ~$0.015 per 1,000 characters
+- **Free alternatives**: Local Whisper, Edge-TTS, gTTS
+## Troubleshooting
+### "Module not found" errors
+```bash
+uv sync
+```
+### API Key errors
+Ensure your `.env` file is in the project root and contains valid keys:
+```bash
+cat .env  # Check if keys are set
+```
+### Hugging Face model access
+Make sure you have access to the Apertus model and your HF token has read permissions.
+### Audio input not working
+- Check browser permissions for microphone access
+- Ensure microphone is properly connected
+## Development
+### Add new dependencies
+```bash
+uv add package-name
+```
+### Update dependencies
+```bash
+uv sync --upgrade
+```
+### Run with different Python version
+```bash
+uv run --python 3.11 python translator.py
+```
+## License
+[Add your license here]
+## Acknowledgments
+- **Apertus-70B** by swiss-ai for multilingual translation
+- **OpenAI** for Whisper and TTS APIs
+- **Gradio** for the web interface
+- **UV** by Astral for fast Python package management
+## Support
+For issues and questions, please open an issue in the repository.

REFACTORING_NOTES.md ADDED Viewed

	@@ -0,0 +1,254 @@

+# Refactoring Notes
+## Overview
+The Universal Translator codebase has been refactored to follow a clean, layered architecture that separates concerns and improves maintainability.
+## Changes Made
+### 1. Created `config.py` - Configuration Module
+**Purpose:** Centralize all configuration constants and settings
+**Contents:**
+- `ModelConfig` - Translation model settings
+- `LanguageConfig` - Supported languages and mappings
+- `VoiceConfig` - STT/TTS default settings
+- `UIConfig` - Gradio UI appearance and behavior
+- `PromptConfig` - Translation prompt templates
+- `ErrorMessages` - Standardized error messages
+- Helper functions for language lookups
+**Benefits:**
+- Single source of truth for all settings
+- Easy to modify without touching business logic
+- Better organization and discoverability
+- Type-safe constants
+### 2. Created `translation_service.py` - Business Logic Module
+**Purpose:** Extract core translation functionality from UI layer
+**Components:**
+#### `LanguageDetector`
+- Handles language detection
+- Returns language codes and names
+- Isolated error handling
+#### `TranslationEngine`
+- Manages HuggingFace InferenceClient
+- Handles translation requests
+- Formats prompts and processes responses
+#### `TranslationService`
+- High-level API for UI layer
+- Coordinates detection and translation
+- Returns formatted results
+**Benefits:**
+- Translation logic can be tested independently
+- Can be reused in different interfaces (CLI, API, etc.)
+- Clear separation of concerns
+- Easier to swap translation providers
+### 3. Refactored `translator.py` - UI Module
+**Purpose:** Focus purely on Gradio UI and user interaction
+**Remaining Responsibilities:**
+- Gradio component creation and layout
+- Event handler wiring
+- Voice processing (STT/TTS integration)
+- UI state management
+**Removed:**
+- Language detection logic → `translation_service.py`
+- Translation logic → `translation_service.py`
+- Configuration constants → `config.py`
+- Prompt templates → `config.py`
+**Benefits:**
+- Cleaner, more readable code
+- UI changes don't affect business logic
+- Easier to create alternative interfaces
+- Improved testability
+### 4. Updated `pyproject.toml`
+**Changes:**
+- Added new modules to `only-include` list
+- Package now includes all necessary files
+### 5. Updated `README.md`
+**Additions:**
+- New project structure diagram
+- Architecture explanation
+- Customization guide for `config.py`
+- Examples of common modifications
+## Architecture Diagram
+```
+┌─────────────────────────────────────────┐
+│         User Interface Layer            │
+│          (translator.py)                │
+│  - Gradio components                    │
+│  - Event handlers                       │
+│  - Voice I/O coordination               │
+└──────────────┬──────────────────────────┘
+               │
+               ↓
+┌─────────────────────────────────────────┐
+│       Business Logic Layer              │
+│     (translation_service.py)            │
+│  - LanguageDetector                     │
+│  - TranslationEngine                    │
+│  - TranslationService                   │
+└──────────────┬──────────────────────────┘
+               │
+               ↓
+┌─────────────────────────────────────────┐
+│      Configuration Layer                │
+│          (config.py)                    │
+│  - Model settings                       │
+│  - Language mappings                    │
+│  - UI configuration                     │
+│  - Voice settings                       │
+└─────────────────────────────────────────┘
+     ┌────────────────────────────────┐
+     │    Voice Services              │
+     │   (voice_handler.py)           │
+     │  - STT providers               │
+     │  - TTS providers               │
+     └────────────────────────────────┘
+```
+## Benefits of the Refactoring
+### 1. **Maintainability**
+- Changes to UI don't affect business logic
+- Configuration changes isolated to one file
+- Clear module boundaries
+### 2. **Testability**
+- Business logic can be unit tested separately
+- Mock dependencies easily
+- Test UI and logic independently
+### 3. **Extensibility**
+- Easy to add new translation providers
+- Can create CLI, API, or other interfaces
+- Simple to add new languages or settings
+### 4. **Readability**
+- Each module has a clear, single purpose
+- Reduced file sizes
+- Better code organization
+### 5. **Reusability**
+- `TranslationService` can be imported by other apps
+- `config.py` can be extended for new features
+- Voice handlers already modular
+## Migration Guide
+### Before Refactoring
+```python
+# Everything in translator.py
+POPULAR_LANGUAGES = {...}
+LANGUAGE_NAMES = {...}
+model_name = "..."
+def detect_language(text):
+    # detection logic
+    pass
+def translate_text(text, target):
+    # translation logic
+    pass
+# UI code mixed with business logic
+```
+### After Refactoring
+```python
+# config.py
+class LanguageConfig:
+    POPULAR_LANGUAGES = {...}
+    LANGUAGE_NAMES = {...}
+# translation_service.py
+class TranslationService:
+    def translate_text(self, text, target):
+        # isolated business logic
+        pass
+# translator.py
+from config import LanguageConfig
+from translation_service import TranslationService
+translation_service = TranslationService()
+# Pure UI code
+```
+## Future Improvements
+### Possible Enhancements
+1. **Add Tests**
+   - Unit tests for `TranslationService`
+   - Integration tests for UI
+   - Mock external APIs
+2. **Add Logging**
+   - Structured logging for debugging
+   - Performance monitoring
+   - Error tracking
+3. **Create CLI Interface**
+   - Reuse `TranslationService`
+   - Command-line tool for batch translation
+4. **Add REST API**
+   - FastAPI or Flask wrapper
+   - Reuse `TranslationService`
+   - Enable programmatic access
+5. **Configuration File Support**
+   - Load settings from YAML/JSON
+   - Environment-based configs
+   - User preferences
+6. **Add Caching**
+   - Cache translations
+   - Reduce API calls
+   - Improve performance
+## Testing the Refactored Code
+### Quick Test
+```bash
+# Verify imports and structure
+uv run python -c "from translator import create_ui; create_ui()"
+```
+### Full Test
+```bash
+# Run the application
+uv run python translator.py
+```
+### Expected Behavior
+- App should launch normally
+- All features should work as before
+- Performance should be similar or better
+- Configuration changes should take effect immediately
+## Notes
+- All functionality preserved
+- No breaking changes to user experience
+- Backward compatible (same entry point)
+- Ready for future enhancements

config.py ADDED Viewed

	@@ -0,0 +1,215 @@

+"""
+Configuration Module
+Contains all configuration constants for the Universal Translator application.
+"""
+# ============================================================================
+# Model Configuration
+# ============================================================================
+class ModelConfig:
+    """Configuration for translation models."""
+    # Primary translation model
+    MODEL_NAME = "swiss-ai/Apertus-70B-Instruct-2509"
+    SHORT_MODEL_NAME = "Apertus Universal Translator"
+    # Model parameters
+    DEFAULT_MAX_TOKENS = 1024
+    DEFAULT_TEMPERATURE = 0.3
+    MIN_TEMPERATURE = 0.0
+    MAX_TEMPERATURE = 1.0
+    # Temperature recommendations
+    TEMP_LITERAL = 0.1  # More literal, word-for-word translations
+    TEMP_BALANCED = 0.3  # Balanced (default)
+    TEMP_CREATIVE = 0.7  # More creative, natural-sounding translations
+# ============================================================================
+# Language Configuration
+# ============================================================================
+class LanguageConfig:
+    """Configuration for supported languages."""
+    # Popular languages for the dropdown (display_name: language_code)
+    POPULAR_LANGUAGES = {
+        "Spanish": "es",
+        "French": "fr",
+        "German": "de",
+        "Italian": "it",
+        "Portuguese": "pt",
+        "Chinese (Simplified)": "zh-cn",
+        "Japanese": "ja",
+        "Korean": "ko",
+        "Arabic": "ar",
+        "Russian": "ru",
+        "Hindi": "hi",
+        "Turkish": "tr",
+        "Dutch": "nl",
+        "Polish": "pl",
+        "Swedish": "sv",
+        "Greek": "el",
+        "Hebrew": "he",
+        "Thai": "th",
+        "Vietnamese": "vi",
+        "Indonesian": "id",
+        "English": "en"
+    }
+    # Language code to full name mapping (for detection display)
+    LANGUAGE_NAMES = {
+        "en": "English",
+        "es": "Spanish",
+        "fr": "French",
+        "de": "German",
+        "it": "Italian",
+        "pt": "Portuguese",
+        "zh-cn": "Chinese",
+        "ja": "Japanese",
+        "ko": "Korean",
+        "ar": "Arabic",
+        "ru": "Russian",
+        "hi": "Hindi",
+        "tr": "Turkish",
+        "nl": "Dutch",
+        "pl": "Polish",
+        "sv": "Swedish",
+        "el": "Greek",
+        "he": "Hebrew",
+        "th": "Thai",
+        "vi": "Vietnamese",
+        "id": "Indonesian"
+    }
+    # Default target language
+    DEFAULT_TARGET_LANGUAGE = "Spanish"
+# ============================================================================
+# Voice Configuration
+# ============================================================================
+class VoiceConfig:
+    """Configuration for speech-to-text and text-to-speech."""
+    # Default providers
+    DEFAULT_STT_PROVIDER = "OpenAI Whisper API"
+    DEFAULT_TTS_PROVIDER = "OpenAI TTS"
+    DEFAULT_TTS_VOICE = "nova"
+    # Voice output enabled by default
+    DEFAULT_VOICE_OUTPUT_ENABLED = True
+    # OpenAI TTS voices
+    OPENAI_TTS_VOICES = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
+# ============================================================================
+# UI Configuration
+# ============================================================================
+class UIConfig:
+    """Configuration for the Gradio UI."""
+    # App metadata
+    APP_TITLE = "Universal Translator"
+    APP_DESCRIPTION = "Translate text or speech to any language with automatic source language detection"
+    # Theme settings
+    THEME_PRIMARY_HUE = "blue"
+    THEME_SECONDARY_HUE = "cyan"
+    THEME_NEUTRAL_HUE = "slate"
+    THEME_FONT = "Inter, system-ui, sans-serif"
+    # Custom CSS
+    CUSTOM_CSS = """
+    .gradio-container {
+        font-family: 'Inter', system-ui, sans-serif;
+    }
+    """
+    # Input/Output dimensions
+    INPUT_TEXT_LINES = 4
+    OUTPUT_TEXT_LINES = 4
+    # Slider configurations
+    MAX_TOKENS_MIN = 128
+    MAX_TOKENS_MAX = 2048
+    MAX_TOKENS_STEP = 128
+    TEMPERATURE_MIN = 0.0
+    TEMPERATURE_MAX = 1.0
+    TEMPERATURE_STEP = 0.1
+    # Launch settings
+    SHARE_LINK = False
+    OPEN_IN_BROWSER = True
+# ============================================================================
+# Translation Prompts
+# ============================================================================
+class PromptConfig:
+    """Configuration for translation prompts."""
+    SYSTEM_PROMPT_TEMPLATE = """You are a professional translator. Your task is to translate the given text to {target_language}.
+Provide ONLY the translation, without any explanations, notes, or additional text.
+Maintain the tone, style, and meaning of the original text."""
+# ============================================================================
+# Error Messages
+# ============================================================================
+class ErrorMessages:
+    """Standard error messages."""
+    NO_INPUT = "No input provided"
+    NO_AUDIO_INPUT = "No audio input"
+    TRANSCRIPTION_FAILED = "Transcription failed"
+    TRANSLATION_ERROR = "Translation Error: {error}"
+    TRANSCRIPTION_ERROR = "[Transcription Error: {error}]"
+    LANGUAGE_DETECTION_FAILED = "Unknown"
+# ============================================================================
+# Helper Functions
+# ============================================================================
+def get_language_name(lang_code: str) -> str:
+    """
+    Get the full language name from a language code.
+    Args:
+        lang_code: Language code (e.g., 'en', 'es')
+    Returns:
+        Full language name or uppercase code if not found
+    """
+    return LanguageConfig.LANGUAGE_NAMES.get(lang_code, lang_code.upper())
+def get_popular_languages_list() -> list[str]:
+    """
+    Get a list of popular language names for dropdowns.
+    Returns:
+        List of language display names
+    """
+    return list(LanguageConfig.POPULAR_LANGUAGES.keys())
+def get_language_code(language_name: str) -> str:
+    """
+    Get the language code from a display name.
+    Args:
+        language_name: Display name (e.g., 'Spanish', 'French')
+    Returns:
+        Language code or the original name if not found
+    """
+    return LanguageConfig.POPULAR_LANGUAGES.get(language_name, language_name.lower())

gradio_themes.md ADDED Viewed

	@@ -0,0 +1,345 @@

+# Gradio 6.0 Theming Guide
+## Key Change in Gradio 6.0
+In Gradio 6.0, the `theme`, `css`, and `css_paths` parameters have **moved from the `Blocks()` constructor to the `launch()` method**.
+### Old Way (Gradio 5.x)
+```python
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    # components
+    pass
+demo.launch()
+```
+### New Way (Gradio 6.0+)
+```python
+with gr.Blocks() as demo:
+    # components
+    pass
+demo.launch(theme=gr.themes.Soft())
+```
+---
+## 1. Built-in Themes
+Gradio 6.0 includes 5 prebuilt themes:
+```python
+import gradio as gr
+with gr.Blocks() as demo:
+    # your components
+    pass
+# Choose one of these themes
+demo.launch(theme=gr.themes.Base())
+demo.launch(theme=gr.themes.Default())
+demo.launch(theme=gr.themes.Glass())
+demo.launch(theme=gr.themes.Monochrome())
+demo.launch(theme=gr.themes.Soft())
+```
+---
+## 2. Customizing Built-in Themes
+### Basic Customization
+Customize colors, fonts, and sizing:
+```python
+theme = gr.themes.Soft(
+    primary_hue="blue",       # Main accent color (buttons, links)
+    secondary_hue="cyan",     # Secondary elements
+    neutral_hue="slate",      # Text and backgrounds
+    font="Inter, system-ui, sans-serif",
+    font_mono="Fira Code, monospace"
+)
+demo.launch(theme=theme)
+```
+### Available Color Names
+`slate`, `gray`, `zinc`, `neutral`, `stone`, `red`, `orange`, `amber`, `yellow`, `lime`, `green`, `emerald`, `teal`, `cyan`, `sky`, `blue`, `indigo`, `violet`, `purple`, `fuchsia`, `pink`, `rose`
+### Sizing Options
+```python
+theme = gr.themes.Soft(
+    spacing_size=gr.themes.sizes.spacing_lg,    # Spacing/padding
+    radius_size=gr.themes.sizes.radius_md,      # Corner roundness
+    text_size=gr.themes.sizes.text_md           # Font size
+)
+```
+Available sizes:
+- **Spacing:** `spacing_sm`, `spacing_md`, `spacing_lg`
+- **Radius:** `radius_none`, `radius_sm`, `radius_md`, `radius_lg`
+- **Text:** `text_sm`, `text_md`, `text_lg`
+---
+## 3. Advanced Theme Customization
+### Using `.set()` for CSS Variables
+```python
+theme = gr.themes.Soft(
+    primary_hue="indigo",
+    secondary_hue="cyan"
+).set(
+    # Button styling
+    button_primary_background_fill="*primary_500",
+    button_primary_background_fill_hover="*primary_600",
+    button_primary_text_color="white",
+    # Slider colors
+    slider_color="*secondary_500",
+    # Loader/spinner color
+    loader_color="*primary_400",
+    # Background gradients
+    body_background_fill="linear-gradient(to bottom, #f0f0f0, #ffffff)",
+    # Borders
+    border_color_primary="*primary_300",
+)
+demo.launch(theme=theme)
+```
+---
+## 4. Custom CSS
+### Inline CSS String
+```python
+custom_css = """
+.gradio-container {
+    font-family: 'Inter', system-ui, sans-serif;
+    max-width: 1200px;
+    margin: 0 auto;
+}
+.my-custom-button {
+    background: linear-gradient(90deg, #667eea 0%, #764ba2 100%) !important;
+    border: none !important;
+    color: white !important;
+}
+/* Override default styles with !important if needed */
+.gr-button {
+    border-radius: 8px !important;
+}
+"""
+demo.launch(
+    theme=gr.themes.Soft(),
+    css=custom_css
+)
+```
+### External CSS Files
+```python
+from pathlib import Path
+demo.launch(
+    theme=gr.themes.Soft(),
+    css_paths=[
+        Path("styles/main.css"),
+        Path("styles/custom.css")
+    ]
+)
+```
+---
+## 5. Complete Example
+### Full Implementation with Theme and Custom CSS
+```python
+import gradio as gr
+# Build your interface
+with gr.Blocks(title="My App") as demo:
+    gr.Markdown("# My Custom Themed App")
+    with gr.Row():
+        input_text = gr.Textbox(label="Input", elem_classes="custom-input")
+        output_text = gr.Textbox(label="Output")
+    submit_btn = gr.Button("Submit", variant="primary", elem_classes="custom-button")
+def main():
+    # Create custom theme
+    theme = gr.themes.Soft(
+        primary_hue="blue",
+        secondary_hue="cyan",
+        neutral_hue="slate",
+        spacing_size=gr.themes.sizes.spacing_md,
+        radius_size=gr.themes.sizes.radius_lg,
+        font="Inter, system-ui, sans-serif"
+    ).set(
+        button_primary_background_fill="*primary_600",
+        button_primary_background_fill_hover="*primary_700",
+        slider_color="*secondary_500"
+    )
+    # Custom CSS
+    custom_css = """
+    .gradio-container {
+        max-width: 1400px;
+        margin: 0 auto;
+    }
+    .custom-button {
+        font-weight: 600 !important;
+        transition: all 0.3s ease !important;
+    }
+    .custom-input textarea {
+        border: 2px solid #e5e7eb !important;
+        border-radius: 8px !important;
+    }
+    """
+    # Launch with theme and CSS
+    demo.launch(
+        share=False,
+        inbrowser=True,
+        theme=theme,
+        css=custom_css
+    )
+if __name__ == "__main__":
+    main()
+```
+---
+## 6. Theme Constructor Parameters
+### All Available Parameters
+```python
+gr.themes.Soft(
+    # Colors (use color names or gr.themes.colors objects)
+    primary_hue="blue",           # Main accent color
+    secondary_hue="cyan",         # Secondary elements
+    neutral_hue="slate",          # Text and backgrounds
+    # Sizing
+    spacing_size=gr.themes.sizes.spacing_md,
+    radius_size=gr.themes.sizes.radius_md,
+    text_size=gr.themes.sizes.text_md,
+    # Fonts
+    font="system-ui, sans-serif",
+    font_mono="monospace"
+)
+```
+---
+## 7. Tips and Best Practices
+### General Tips
+1. **Use `!important` in custom CSS** - Gradio's default styles may need to be overridden
+2. **Base class is `.gradio-container`** - Target this for app-wide styling
+3. **Query selectors are not guaranteed** - Gradio's internal structure may change between versions
+4. **Test your theme** - Colors may appear differently on different displays
+### Debugging CSS
+```python
+# Add borders to see element boundaries
+custom_css = """
+* {
+    border: 1px solid red !important;
+}
+"""
+```
+### Performance
+- Keep CSS minimal for faster loading
+- Use CSS files for large stylesheets instead of inline strings
+- Avoid complex selectors and deep nesting
+---
+## 8. Common Styling Use Cases
+### Dark Mode
+```python
+theme = gr.themes.Monochrome(
+    neutral_hue="slate"
+).set(
+    body_background_fill="#1a1a1a",
+    body_text_color="#ffffff",
+    input_background_fill="#2d2d2d",
+    button_primary_background_fill="#4a9eff"
+)
+```
+### Compact Layout
+```python
+theme = gr.themes.Soft(
+    spacing_size=gr.themes.sizes.spacing_sm,
+    radius_size=gr.themes.sizes.radius_sm,
+    text_size=gr.themes.sizes.text_sm
+)
+```
+### Colorful/Vibrant
+```python
+theme = gr.themes.Soft(
+    primary_hue="pink",
+    secondary_hue="purple"
+).set(
+    button_primary_background_fill="linear-gradient(90deg, #667eea 0%, #764ba2 100%)"
+)
+```
+### Professional/Corporate
+```python
+theme = gr.themes.Default(
+    primary_hue="blue",
+    secondary_hue="slate",
+    neutral_hue="gray"
+).set(
+    body_background_fill="#ffffff",
+    button_primary_background_fill="#0066cc",
+    border_color_primary="#d1d5db"
+)
+```
+---
+## Resources
+- [Gradio Custom CSS and JS Guide](https://www.gradio.app/guides/custom-CSS-and-JS)
+- [Gradio 6 Migration Guide](https://www.gradio.app/main/guides/gradio-6-migration-guide)
+- [Gradio Theming Guide](https://www.gradio.app/guides/theming-guide)
+- [Gradio Themes Documentation](https://www.gradio.app/docs/gradio/themes)
+- [Gradio Blocks Documentation](https://www.gradio.app/docs/gradio/blocks)
+---
+## Version Information
+- This guide is for **Gradio 6.0+**
+- For Gradio 5.x and earlier, themes are set in the `Blocks()` constructor instead of `launch()`
+- Check your Gradio version: `python -c "import gradio; print(gradio.__version__)"`

pyproject.toml ADDED Viewed

	@@ -0,0 +1,33 @@

+[project]
+name = "universal-translator"
+version = "0.1.0"
+description = "Universal language translator powered by Apertus-70B with voice input/output support"
+authors = [
+    {name = "Your Name", email = "[email protected]"}
+]
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "edge-tts>=7.2.3",
+    "gradio>=5.0.0",
+    "gtts>=2.5.4",
+    "huggingface-hub>=1.1.4",
+    "langdetect>=1.0.9",
+    "openai>=2.8.0",
+    "openai-whisper>=20250625",
+    "python-dotenv>=1.2.1",
+]
+[project.scripts]
+translator = "translator:main"
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["."]
+only-include = ["translator.py", "translation_service.py", "voice_handler.py", "config.py"]
+[dependency-groups]
+dev = []

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio
+huggingface_hub
+python-dotenv
+openai
+edge-tts
+openai-whisper
+gtts
+langdetect

translation_service.py ADDED Viewed

	@@ -0,0 +1,170 @@

+"""
+Translation Service Module
+Contains the core translation logic separated from the UI.
+"""
+from typing import Tuple, Optional
+from huggingface_hub import InferenceClient
+from langdetect import detect, LangDetectException
+from config import (
+    ModelConfig,
+    LanguageConfig,
+    PromptConfig,
+    ErrorMessages,
+    get_language_name
+)
+class LanguageDetector:
+    """Handles language detection for input text."""
+    @staticmethod
+    def detect_language(text: str) -> Tuple[str, str]:
+        """
+        Detect the language of the input text.
+        Args:
+            text: Input text to detect language
+        Returns:
+            Tuple of (language_code, language_name)
+        """
+        try:
+            lang_code = detect(text)
+            lang_name = get_language_name(lang_code)
+            return lang_code, lang_name
+        except LangDetectException:
+            return "unknown", ErrorMessages.LANGUAGE_DETECTION_FAILED
+class TranslationEngine:
+    """Handles translation using the Apertus model."""
+    def __init__(self, model_name: Optional[str] = None):
+        """
+        Initialize the translation engine.
+        Args:
+            model_name: Optional model name override
+        """
+        self.model_name = model_name or ModelConfig.MODEL_NAME
+        self.client = InferenceClient(model=self.model_name)
+        self.language_detector = LanguageDetector()
+    def translate(
+        self,
+        text: str,
+        target_language: str,
+        max_tokens: int = None,
+        temperature: float = None
+    ) -> Tuple[str, str, str]:
+        """
+        Translate text to target language.
+        Args:
+            text: Text to translate
+            target_language: Target language name (e.g., 'Spanish', 'French')
+            max_tokens: Maximum tokens for response (defaults to config value)
+            temperature: Model temperature (defaults to config value)
+        Returns:
+            Tuple of (translated_text, source_lang_code, source_lang_name)
+        """
+        if not text.strip():
+            return "", "unknown", ErrorMessages.LANGUAGE_DETECTION_FAILED
+        # Use defaults from config if not provided
+        max_tokens = max_tokens or ModelConfig.DEFAULT_MAX_TOKENS
+        temperature = temperature or ModelConfig.DEFAULT_TEMPERATURE
+        # Detect source language
+        source_lang_code, source_lang_name = self.language_detector.detect_language(text)
+        # Create translation prompt
+        system_prompt = PromptConfig.SYSTEM_PROMPT_TEMPLATE.format(
+            target_language=target_language
+        )
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": text}
+        ]
+        try:
+            # Call the translation model
+            response = self.client.chat_completion(
+                messages=messages,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                stream=False
+            )
+            translated_text = response.choices[0].message.content.strip()
+            return translated_text, source_lang_code, source_lang_name
+        except Exception as e:
+            error_message = ErrorMessages.TRANSLATION_ERROR.format(error=str(e))
+            return error_message, source_lang_code, source_lang_name
+class TranslationService:
+    """
+    High-level translation service that orchestrates translation and voice processing.
+    """
+    def __init__(self, model_name: Optional[str] = None):
+        """
+        Initialize the translation service.
+        Args:
+            model_name: Optional model name override
+        """
+        self.engine = TranslationEngine(model_name)
+    def translate_text(
+        self,
+        text: str,
+        target_language: str,
+        max_tokens: Optional[int] = None,
+        temperature: Optional[float] = None
+    ) -> Tuple[str, str]:
+        """
+        Translate text and return formatted results.
+        Args:
+            text: Text to translate
+            target_language: Target language name
+            max_tokens: Maximum tokens for response
+            temperature: Model temperature
+        Returns:
+            Tuple of (translated_text, detected_language_info)
+        """
+        if not text.strip():
+            return "", ErrorMessages.NO_INPUT
+        translated_text, source_code, source_name = self.engine.translate(
+            text, target_language, max_tokens, temperature
+        )
+        # Format detected language info
+        detected_info = f"Detected: {source_name} ({source_code})"
+        return translated_text, detected_info
+    def detect_language_only(self, text: str) -> str:
+        """
+        Detect language without translating.
+        Args:
+            text: Text to detect language
+        Returns:
+            Formatted language detection string
+        """
+        if not text.strip():
+            return ErrorMessages.NO_INPUT
+        source_code, source_name = self.engine.language_detector.detect_language(text)
+        return f"Detected: {source_name} ({source_code})"

translator.py ADDED Viewed

	@@ -0,0 +1,360 @@

+"""
+Universal Translator - Main UI Module
+Gradio-based user interface for the translation application.
+"""
+import gradio as gr
+from dotenv import load_dotenv
+from config import (
+    ModelConfig,
+    LanguageConfig,
+    VoiceConfig,
+    UIConfig,
+    get_popular_languages_list
+)
+from translation_service import TranslationService
+from voice_handler import (
+    create_stt_provider,
+    create_tts_provider,
+    get_available_stt_providers,
+    get_available_tts_providers
+)
+# Load environment variables
+load_dotenv(override=True)
+# Initialize translation service
+translation_service = TranslationService()
+# ============================================================================
+# Voice Processing Functions
+# ============================================================================
+def transcribe_audio(audio_path, stt_provider_name):
+    """
+    Transcribe audio to text using selected STT provider.
+    Args:
+        audio_path: Path to audio file
+        stt_provider_name: Name of STT provider
+    Returns:
+        Transcribed text or error message
+    """
+    if audio_path is None:
+        return ""
+    try:
+        stt_provider = create_stt_provider(stt_provider_name)
+        text = stt_provider.transcribe(audio_path)
+        return text
+    except Exception as e:
+        return f"[Transcription Error: {str(e)}]"
+def synthesize_speech(text, tts_provider_name, tts_voice):
+    """
+    Synthesize text to speech using selected TTS provider.
+    Args:
+        text: Text to synthesize
+        tts_provider_name: Name of TTS provider
+        tts_voice: Voice to use
+    Returns:
+        Path to generated audio file or None if failed
+    """
+    if not text or not text.strip():
+        return None
+    try:
+        tts_provider = create_tts_provider(tts_provider_name, voice=tts_voice)
+        audio_path = tts_provider.synthesize(text)
+        return audio_path
+    except Exception as e:
+        print(f"TTS Error: {str(e)}")
+        return None
+# ============================================================================
+# Translation Handler Functions
+# ============================================================================
+def process_translation(
+    input_text,
+    target_language,
+    max_tokens,
+    temperature,
+    enable_tts,
+    tts_provider_name,
+    tts_voice
+):
+    """
+    Process translation request with text input.
+    Args:
+        input_text: Text to translate
+        target_language: Target language for translation
+        max_tokens: Maximum tokens for translation
+        temperature: Model temperature
+        enable_tts: Whether to generate speech output
+        tts_provider_name: TTS provider name
+        tts_voice: TTS voice
+    Returns:
+        Tuple of (translated_text, detected_language_info, audio_output)
+    """
+    if not input_text.strip():
+        return "", "No input provided", None
+    # Translate the text using the service
+    translated_text, detected_info = translation_service.translate_text(
+        input_text,
+        target_language,
+        max_tokens,
+        temperature
+    )
+    # Generate speech if enabled
+    audio_output = None
+    if enable_tts and translated_text and not translated_text.startswith("Translation Error"):
+        audio_output = synthesize_speech(translated_text, tts_provider_name, tts_voice)
+    return translated_text, detected_info, audio_output
+def process_voice_translation(
+    audio,
+    target_language,
+    stt_provider_name,
+    max_tokens,
+    temperature,
+    enable_tts,
+    tts_provider_name,
+    tts_voice
+):
+    """
+    Process translation request with voice input.
+    Args:
+        audio: Audio file from microphone
+        target_language: Target language for translation
+        stt_provider_name: STT provider name
+        max_tokens: Maximum tokens for translation
+        temperature: Model temperature
+        enable_tts: Whether to generate speech output
+        tts_provider_name: TTS provider name
+        tts_voice: TTS voice
+    Returns:
+        Tuple of (input_text, translated_text, detected_language_info, audio_output)
+    """
+    if audio is None:
+        return "", "", "No audio input", None
+    # Transcribe audio
+    input_text = transcribe_audio(audio, stt_provider_name)
+    if not input_text or input_text.startswith("[Transcription Error"):
+        return input_text, "", "Transcription failed", None
+    # Translate the transcribed text
+    translated_text, detected_info, audio_output = process_translation(
+        input_text,
+        target_language,
+        max_tokens,
+        temperature,
+        enable_tts,
+        tts_provider_name,
+        tts_voice
+    )
+    return input_text, translated_text, detected_info, audio_output
+# ============================================================================
+# Gradio UI
+# ============================================================================
+def create_ui():
+    """Create and configure the Gradio UI."""
+    with gr.Blocks(title=UIConfig.APP_TITLE) as demo:
+        # Header
+        gr.Markdown(f"# {ModelConfig.SHORT_MODEL_NAME}")
+        gr.Markdown(f"Powered by **{ModelConfig.MODEL_NAME}** - Supporting 1000+ Languages 🌍")
+        gr.Markdown(UIConfig.APP_DESCRIPTION)
+        with gr.Row():
+            # Main content column
+            with gr.Column(scale=3):
+                # Target language selector
+                target_language = gr.Dropdown(
+                    choices=get_popular_languages_list(),
+                    value=LanguageConfig.DEFAULT_TARGET_LANGUAGE,
+                    label="Target Language",
+                    info="Select the language to translate to"
+                )
+                # Text input section
+                gr.Markdown("### Text Input")
+                with gr.Row():
+                    input_text = gr.Textbox(
+                        label="Enter text to translate",
+                        placeholder="Type or paste text in any language...",
+                        lines=UIConfig.INPUT_TEXT_LINES
+                    )
+                with gr.Row():
+                    translate_btn = gr.Button("Translate", variant="primary", scale=2)
+                    clear_btn = gr.Button("Clear", scale=1)
+                # Voice input section
+                gr.Markdown("### Voice Input")
+                with gr.Row():
+                    with gr.Column(scale=3):
+                        voice_input = gr.Audio(
+                            sources=["microphone"],
+                            type="filepath",
+                            label="Record Audio (Click to speak)"
+                        )
+                    with gr.Column(scale=1):
+                        voice_translate_btn = gr.Button("Translate Voice", variant="secondary")
+                # Output section
+                gr.Markdown("### Translation Output")
+                detected_language = gr.Textbox(
+                    label="Detected Source Language",
+                    interactive=False
+                )
+                translated_text = gr.Textbox(
+                    label="Translated Text",
+                    lines=UIConfig.OUTPUT_TEXT_LINES,
+                    interactive=False
+                )
+                # Voice output section
+                voice_output = gr.Audio(
+                    label="Translated Audio",
+                    autoplay=True,
+                    visible=True
+                )
+            # Settings panel
+            with gr.Column(scale=1):
+                gr.Markdown("### Translation Settings")
+                max_tokens = gr.Slider(
+                    minimum=UIConfig.MAX_TOKENS_MIN,
+                    maximum=UIConfig.MAX_TOKENS_MAX,
+                    value=ModelConfig.DEFAULT_MAX_TOKENS,
+                    step=UIConfig.MAX_TOKENS_STEP,
+                    label="Max Tokens",
+                    info="Maximum length of translation"
+                )
+                temperature = gr.Slider(
+                    minimum=UIConfig.TEMPERATURE_MIN,
+                    maximum=UIConfig.TEMPERATURE_MAX,
+                    value=ModelConfig.DEFAULT_TEMPERATURE,
+                    step=UIConfig.TEMPERATURE_STEP,
+                    label="Temperature",
+                    info="Lower = more literal, Higher = more creative"
+                )
+                gr.Markdown("### Voice Settings")
+                stt_provider = gr.Dropdown(
+                    choices=get_available_stt_providers(),
+                    value=VoiceConfig.DEFAULT_STT_PROVIDER,
+                    label="Speech-to-Text Provider",
+                    info="For voice input"
+                )
+                enable_voice_output = gr.Checkbox(
+                    label="Enable Voice Output",
+                    value=VoiceConfig.DEFAULT_VOICE_OUTPUT_ENABLED,
+                    info="Generate audio for translations"
+                )
+                tts_provider = gr.Dropdown(
+                    choices=get_available_tts_providers(),
+                    value=VoiceConfig.DEFAULT_TTS_PROVIDER,
+                    label="Text-to-Speech Provider",
+                    info="For audio output"
+                )
+                tts_voice = gr.Dropdown(
+                    choices=VoiceConfig.OPENAI_TTS_VOICES,
+                    value=VoiceConfig.DEFAULT_TTS_VOICE,
+                    label="TTS Voice",
+                    info="Select voice style"
+                )
+        # Event handlers
+        translate_btn.click(
+            process_translation,
+            inputs=[
+                input_text,
+                target_language,
+                max_tokens,
+                temperature,
+                enable_voice_output,
+                tts_provider,
+                tts_voice
+            ],
+            outputs=[translated_text, detected_language, voice_output]
+        )
+        voice_translate_btn.click(
+            process_voice_translation,
+            inputs=[
+                voice_input,
+                target_language,
+                stt_provider,
+                max_tokens,
+                temperature,
+                enable_voice_output,
+                tts_provider,
+                tts_voice
+            ],
+            outputs=[input_text, translated_text, detected_language, voice_output]
+        )
+        clear_btn.click(
+            lambda: ("", "", "", None, None),
+            outputs=[input_text, translated_text, detected_language, voice_output, voice_input]
+        )
+    return demo
+# ============================================================================
+# Main Entry Point
+# ============================================================================
+def main():
+    """Main entry point for the translator app."""
+    # Create custom theme
+    theme = gr.themes.Soft(
+        primary_hue=UIConfig.THEME_PRIMARY_HUE,
+        secondary_hue=UIConfig.THEME_SECONDARY_HUE,
+        neutral_hue=UIConfig.THEME_NEUTRAL_HUE,
+        font=UIConfig.THEME_FONT
+    )
+    # Create and launch the UI
+    demo = create_ui()
+    demo.launch(
+        share=UIConfig.SHARE_LINK,
+        inbrowser=UIConfig.OPEN_IN_BROWSER,
+        theme=theme,
+        css=UIConfig.CUSTOM_CSS
+    )
+if __name__ == "__main__":
+    main()

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

voice_handler.py ADDED Viewed

	@@ -0,0 +1,359 @@

+"""
+Voice Handler Module
+Provides Speech-to-Text (STT) and Text-to-Speech (TTS) capabilities
+with multiple provider options for different cost/quality tiers.
+"""
+import os
+import tempfile
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Optional, List, Dict
+import asyncio
+# Import voice processing libraries
+from openai import OpenAI
+import whisper
+import edge_tts
+from gtts import gTTS
+# ============================================================================
+# Configuration and Cost Tiers
+# ============================================================================
+class VoiceConfig:
+    """Configuration for voice providers and their characteristics."""
+    # STT Provider definitions
+    STT_PROVIDERS = {
+        "OpenAI Whisper API": {
+            "id": "openai_whisper",
+            "cost_tier": "medium",
+            "cost_per_minute": 0.006,
+            "requires_api_key": True,
+        },
+        "Local Whisper (Tiny)": {
+            "id": "local_whisper_tiny",
+            "cost_tier": "free",
+            "cost_per_minute": 0.0,
+            "requires_api_key": False,
+        },
+        "Local Whisper (Base)": {
+            "id": "local_whisper_base",
+            "cost_tier": "free",
+            "cost_per_minute": 0.0,
+            "requires_api_key": False,
+        },
+    }
+    # TTS Provider definitions
+    TTS_PROVIDERS = {
+        "Edge-TTS (Free)": {
+            "id": "edge_tts",
+            "cost_tier": "free",
+            "cost_per_1k_chars": 0.0,
+            "requires_api_key": False,
+            "voices": [
+                "en-US-AriaNeural",
+                "en-US-GuyNeural",
+                "en-US-JennyNeural",
+                "en-GB-SoniaNeural",
+                "en-GB-RyanNeural",
+            ]
+        },
+        "OpenAI TTS": {
+            "id": "openai_tts",
+            "cost_tier": "medium",
+            "cost_per_1k_chars": 0.015,
+            "requires_api_key": True,
+            "voices": ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
+        },
+        "gTTS (Free)": {
+            "id": "gtts",
+            "cost_tier": "free",
+            "cost_per_1k_chars": 0.0,
+            "requires_api_key": False,
+            "voices": ["default"]
+        },
+    }
+    # Default selections
+    DEFAULT_STT = "OpenAI Whisper API"
+    DEFAULT_TTS = "Edge-TTS (Free)"
+    DEFAULT_TTS_VOICE = "en-US-JennyNeural"
+# ============================================================================
+# Abstract Base Classes
+# ============================================================================
+class STTProvider(ABC):
+    """Abstract base class for Speech-to-Text providers."""
+    @abstractmethod
+    def transcribe(self, audio_path: str) -> str:
+        """
+        Transcribe audio file to text.
+        Args:
+            audio_path: Path to audio file
+        Returns:
+            Transcribed text
+        """
+        pass
+class TTSProvider(ABC):
+    """Abstract base class for Text-to-Speech providers."""
+    @abstractmethod
+    def synthesize(self, text: str, output_path: Optional[str] = None) -> str:
+        """
+        Synthesize text to speech.
+        Args:
+            text: Text to convert to speech
+            output_path: Optional path to save audio file
+        Returns:
+            Path to generated audio file
+        """
+        pass
+    @abstractmethod
+    def get_available_voices(self) -> List[str]:
+        """Get list of available voices for this provider."""
+        pass
+# ============================================================================
+# STT Provider Implementations
+# ============================================================================
+class OpenAIWhisperSTT(STTProvider):
+    """OpenAI Whisper API implementation."""
+    def __init__(self, api_key: Optional[str] = None):
+        self.api_key = api_key or os.getenv("OPENAI_API_KEY")
+        if not self.api_key:
+            raise ValueError("OpenAI API key is required. Set OPENAI_API_KEY environment variable.")
+        self.client = OpenAI(api_key=self.api_key)
+    def transcribe(self, audio_path: str) -> str:
+        """Transcribe audio using OpenAI Whisper API."""
+        try:
+            with open(audio_path, "rb") as audio_file:
+                transcript = self.client.audio.transcriptions.create(
+                    model="whisper-1",
+                    file=audio_file
+                )
+            return transcript.text
+        except Exception as e:
+            raise Exception(f"OpenAI Whisper transcription failed: {str(e)}")
+class LocalWhisperSTT(STTProvider):
+    """Local Whisper model implementation."""
+    def __init__(self, model_size: str = "base"):
+        """
+        Initialize local Whisper model.
+        Args:
+            model_size: Model size (tiny, base, small, medium, large)
+        """
+        self.model_size = model_size
+        self.model = None
+    def _load_model(self):
+        """Lazy load the model."""
+        if self.model is None:
+            self.model = whisper.load_model(self.model_size)
+    def transcribe(self, audio_path: str) -> str:
+        """Transcribe audio using local Whisper model."""
+        self._load_model()
+        try:
+            result = self.model.transcribe(audio_path)
+            return result["text"]
+        except Exception as e:
+            raise Exception(f"Local Whisper transcription failed: {str(e)}")
+# ============================================================================
+# TTS Provider Implementations
+# ============================================================================
+class EdgeTTSProvider(TTSProvider):
+    """Microsoft Edge TTS implementation (free)."""
+    def __init__(self, voice: str = "en-US-JennyNeural"):
+        self.voice = voice
+    def synthesize(self, text: str, output_path: Optional[str] = None) -> str:
+        """Synthesize speech using Edge TTS."""
+        if output_path is None:
+            output_path = os.path.join(tempfile.gettempdir(), f"tts_{os.getpid()}.mp3")
+        try:
+            # Edge TTS requires async
+            async def _synthesize():
+                communicate = edge_tts.Communicate(text, self.voice)
+                await communicate.save(output_path)
+            asyncio.run(_synthesize())
+            return output_path
+        except Exception as e:
+            raise Exception(f"Edge TTS synthesis failed: {str(e)}")
+    def get_available_voices(self) -> List[str]:
+        """Get available Edge TTS voices."""
+        return VoiceConfig.TTS_PROVIDERS["Edge-TTS (Free)"]["voices"]
+class OpenAITTSProvider(TTSProvider):
+    """OpenAI TTS implementation."""
+    def __init__(self, voice: str = "nova", api_key: Optional[str] = None):
+        self.voice = voice
+        self.api_key = api_key or os.getenv("OPENAI_API_KEY")
+        if not self.api_key:
+            raise ValueError("OpenAI API key is required. Set OPENAI_API_KEY environment variable.")
+        self.client = OpenAI(api_key=self.api_key)
+    def synthesize(self, text: str, output_path: Optional[str] = None) -> str:
+        """Synthesize speech using OpenAI TTS."""
+        if output_path is None:
+            output_path = os.path.join(tempfile.gettempdir(), f"tts_{os.getpid()}.mp3")
+        try:
+            response = self.client.audio.speech.create(
+                model="tts-1",
+                voice=self.voice,
+                input=text
+            )
+            response.stream_to_file(output_path)
+            return output_path
+        except Exception as e:
+            raise Exception(f"OpenAI TTS synthesis failed: {str(e)}")
+    def get_available_voices(self) -> List[str]:
+        """Get available OpenAI TTS voices."""
+        return VoiceConfig.TTS_PROVIDERS["OpenAI TTS"]["voices"]
+class GTTSProvider(TTSProvider):
+    """Google TTS implementation (free, basic quality)."""
+    def __init__(self, voice: str = "default"):
+        self.voice = voice
+    def synthesize(self, text: str, output_path: Optional[str] = None) -> str:
+        """Synthesize speech using gTTS."""
+        if output_path is None:
+            output_path = os.path.join(tempfile.gettempdir(), f"tts_{os.getpid()}.mp3")
+        try:
+            tts = gTTS(text=text, lang='en')
+            tts.save(output_path)
+            return output_path
+        except Exception as e:
+            raise Exception(f"gTTS synthesis failed: {str(e)}")
+    def get_available_voices(self) -> List[str]:
+        """Get available gTTS voices."""
+        return VoiceConfig.TTS_PROVIDERS["gTTS (Free)"]["voices"]
+# ============================================================================
+# Factory Functions
+# ============================================================================
+def create_stt_provider(provider_name: str) -> STTProvider:
+    """
+    Create an STT provider instance.
+    Args:
+        provider_name: Name of the provider (from VoiceConfig.STT_PROVIDERS)
+    Returns:
+        STTProvider instance
+    """
+    provider_id = VoiceConfig.STT_PROVIDERS[provider_name]["id"]
+    if provider_id == "openai_whisper":
+        return OpenAIWhisperSTT()
+    elif provider_id == "local_whisper_tiny":
+        return LocalWhisperSTT(model_size="tiny")
+    elif provider_id == "local_whisper_base":
+        return LocalWhisperSTT(model_size="base")
+    else:
+        raise ValueError(f"Unknown STT provider: {provider_name}")
+def create_tts_provider(provider_name: str, voice: Optional[str] = None) -> TTSProvider:
+    """
+    Create a TTS provider instance.
+    Args:
+        provider_name: Name of the provider (from VoiceConfig.TTS_PROVIDERS)
+        voice: Optional voice name
+    Returns:
+        TTSProvider instance
+    """
+    provider_id = VoiceConfig.TTS_PROVIDERS[provider_name]["id"]
+    provider_info = VoiceConfig.TTS_PROVIDERS[provider_name]
+    # Use default voice if not specified
+    if voice is None:
+        voice = provider_info["voices"][0]
+    if provider_id == "edge_tts":
+        return EdgeTTSProvider(voice=voice)
+    elif provider_id == "openai_tts":
+        return OpenAITTSProvider(voice=voice)
+    elif provider_id == "gtts":
+        return GTTSProvider(voice=voice)
+    else:
+        raise ValueError(f"Unknown TTS provider: {provider_name}")
+def get_available_stt_providers() -> List[str]:
+    """Get list of available STT provider names."""
+    return list(VoiceConfig.STT_PROVIDERS.keys())
+def get_available_tts_providers() -> List[str]:
+    """Get list of available TTS provider names."""
+    return list(VoiceConfig.TTS_PROVIDERS.keys())
+def get_voices_for_provider(provider_name: str) -> List[str]:
+    """Get available voices for a TTS provider."""
+    if provider_name not in VoiceConfig.TTS_PROVIDERS:
+        return []
+    return VoiceConfig.TTS_PROVIDERS[provider_name]["voices"]
+def get_provider_info(provider_name: str, provider_type: str = "tts") -> Dict:
+    """
+    Get information about a provider.
+    Args:
+        provider_name: Name of the provider
+        provider_type: "stt" or "tts"
+    Returns:
+        Provider information dictionary
+    """
+    if provider_type == "tts":
+        return VoiceConfig.TTS_PROVIDERS.get(provider_name, {})
+    else:
+        return VoiceConfig.STT_PROVIDERS.get(provider_name, {})