joelazo commited on
Commit
3605935
·
1 Parent(s): a452a32

Initial commit. This is the code for the first version of the universal translator.

Browse files
Files changed (11) hide show
  1. .gitignore +299 -0
  2. README.md +305 -2
  3. REFACTORING_NOTES.md +254 -0
  4. config.py +215 -0
  5. gradio_themes.md +345 -0
  6. pyproject.toml +33 -0
  7. requirements.txt +8 -0
  8. translation_service.py +170 -0
  9. translator.py +360 -0
  10. uv.lock +0 -0
  11. voice_handler.py +359 -0
.gitignore ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Universal Translator - Git Ignore File
2
+
3
+ # ============================================================================
4
+ # Python
5
+ # ============================================================================
6
+
7
+ # Byte-compiled / optimized / DLL files
8
+ __pycache__/
9
+ *.py[cod]
10
+ *$py.class
11
+
12
+ # C extensions
13
+ *.so
14
+
15
+ # Distribution / packaging
16
+ .Python
17
+ build/
18
+ develop-eggs/
19
+ dist/
20
+ downloads/
21
+ eggs/
22
+ .eggs/
23
+ lib/
24
+ lib64/
25
+ parts/
26
+ sdist/
27
+ var/
28
+ wheels/
29
+ share/python-wheels/
30
+ *.egg-info/
31
+ .installed.cfg
32
+ *.egg
33
+ MANIFEST
34
+
35
+ # PyInstaller
36
+ *.manifest
37
+ *.spec
38
+
39
+ # Installer logs
40
+ pip-log.txt
41
+ pip-delete-this-directory.txt
42
+
43
+ # Unit test / coverage reports
44
+ htmlcov/
45
+ .tox/
46
+ .nox/
47
+ .coverage
48
+ .coverage.*
49
+ .cache
50
+ nosetests.xml
51
+ coverage.xml
52
+ *.cover
53
+ *.py,cover
54
+ .hypothesis/
55
+ .pytest_cache/
56
+ cover/
57
+
58
+ # Translations
59
+ *.mo
60
+ *.pot
61
+
62
+ # Django stuff:
63
+ *.log
64
+ local_settings.py
65
+ db.sqlite3
66
+ db.sqlite3-journal
67
+
68
+ # Flask stuff:
69
+ instance/
70
+ .webassets-cache
71
+
72
+ # Scrapy stuff:
73
+ .scrapy
74
+
75
+ # Sphinx documentation
76
+ docs/_build/
77
+
78
+ # PyBuilder
79
+ .pybuilder/
80
+ target/
81
+
82
+ # Jupyter Notebook
83
+ .ipynb_checkpoints
84
+
85
+ # IPython
86
+ profile_default/
87
+ ipython_config.py
88
+
89
+ # pyenv
90
+ .python-version
91
+
92
+ # pipenv
93
+ Pipfile.lock
94
+
95
+ # poetry
96
+ poetry.lock
97
+
98
+ # pdm
99
+ .pdm.toml
100
+
101
+ # PEP 582
102
+ __pypackages__/
103
+
104
+ # Celery stuff
105
+ celerybeat-schedule
106
+ celerybeat.pid
107
+
108
+ # SageMath parsed files
109
+ *.sage.py
110
+
111
+ # Environments
112
+ .env
113
+ .venv
114
+ env/
115
+ venv/
116
+ ENV/
117
+ env.bak/
118
+ venv.bak/
119
+
120
+ # Spyder project settings
121
+ .spyderproject
122
+ .spyproject
123
+
124
+ # Rope project settings
125
+ .ropeproject
126
+
127
+ # mkdocs documentation
128
+ /site
129
+
130
+ # mypy
131
+ .mypy_cache/
132
+ .dmypy.json
133
+ dmypy.json
134
+
135
+ # Pyre type checker
136
+ .pyre/
137
+
138
+ # pytype static type analyzer
139
+ .pytype/
140
+
141
+ # Cython debug symbols
142
+ cython_debug/
143
+
144
+ # ============================================================================
145
+ # IDEs and Editors
146
+ # ============================================================================
147
+
148
+ # Visual Studio Code
149
+ .vscode/
150
+ *.code-workspace
151
+
152
+ # PyCharm
153
+ .idea/
154
+ *.iml
155
+ *.ipr
156
+ *.iws
157
+
158
+ # Sublime Text
159
+ *.sublime-project
160
+ *.sublime-workspace
161
+
162
+ # Vim
163
+ *.swp
164
+ *.swo
165
+ *~
166
+
167
+ # Emacs
168
+ *~
169
+ \#*\#
170
+ /.emacs.desktop
171
+ /.emacs.desktop.lock
172
+ *.elc
173
+
174
+ # Eclipse
175
+ .project
176
+ .pydevproject
177
+ .settings/
178
+
179
+ # ============================================================================
180
+ # Operating System
181
+ # ============================================================================
182
+
183
+ # macOS
184
+ .DS_Store
185
+ .AppleDouble
186
+ .LSOverride
187
+ ._*
188
+ .Spotlight-V100
189
+ .Trashes
190
+ .AppleDB
191
+ .AppleDesktop
192
+ Network Trash Folder
193
+ Temporary Items
194
+ .apdisk
195
+
196
+ # Windows
197
+ Thumbs.db
198
+ Thumbs.db:encryptable
199
+ ehthumbs.db
200
+ ehthumbs_vista.db
201
+ *.stackdump
202
+ [Dd]esktop.ini
203
+ $RECYCLE.BIN/
204
+ *.cab
205
+ *.msi
206
+ *.msix
207
+ *.msm
208
+ *.msp
209
+ *.lnk
210
+
211
+ # Linux
212
+ *~
213
+ .fuse_hidden*
214
+ .directory
215
+ .Trash-*
216
+ .nfs*
217
+
218
+ # ============================================================================
219
+ # Project Specific
220
+ # ============================================================================
221
+
222
+ # Environment variables and secrets
223
+ .env
224
+ .env.local
225
+ .env.*.local
226
+ *.key
227
+ *.pem
228
+ secrets.json
229
+ credentials.json
230
+
231
+ # Temporary audio files generated by TTS/STT
232
+ *.mp3
233
+ *.wav
234
+ *.ogg
235
+ *.flac
236
+ /tmp/
237
+ /temp/
238
+
239
+ # Gradio temporary files
240
+ flagged/
241
+ gradio_cached_examples/
242
+
243
+ # Hugging Face cache
244
+ .cache/
245
+ huggingface/
246
+
247
+ # Model cache
248
+ models/
249
+ *.bin
250
+ *.safetensors
251
+
252
+ # UV lock file (optionally ignore if you want fresh resolves)
253
+ # uv.lock
254
+
255
+ # Build artifacts
256
+ *.whl
257
+
258
+ # Log files
259
+ *.log
260
+ logs/
261
+
262
+ # Database files
263
+ *.db
264
+ *.sqlite
265
+ *.sqlite3
266
+
267
+ # Backup files
268
+ *.bak
269
+ *.backup
270
+ *~
271
+
272
+ # ============================================================================
273
+ # Documentation
274
+ # ============================================================================
275
+
276
+ # Generated documentation
277
+ docs/_build/
278
+ docs/build/
279
+ site/
280
+
281
+ # ============================================================================
282
+ # Miscellaneous
283
+ # ============================================================================
284
+
285
+ # Archives
286
+ *.zip
287
+ *.tar
288
+ *.tar.gz
289
+ *.rar
290
+ *.7z
291
+
292
+ # Large files
293
+ *.iso
294
+ *.dmg
295
+
296
+ # Private notes
297
+ NOTES.md
298
+ TODO.md
299
+ PRIVATE.md
README.md CHANGED
@@ -1,2 +1,305 @@
1
- # universal_translator
2
- Apertus based AI agent that can translate any language to any base language in realtime.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Universal Translator
2
+
3
+ A real-time language translation application powered by **Apertus-70B** (supporting 1000+ languages) with voice input and output capabilities.
4
+
5
+ ## Features
6
+
7
+ - **1000+ Languages Support** - Powered by swiss-ai/Apertus-70B-Instruct-2509
8
+ - **Automatic Language Detection** - Automatically detects the source language
9
+ - **Dual Input Modes**:
10
+ - Text input via text box
11
+ - Voice input via microphone (Speech-to-Text)
12
+ - **Dual Output Modes**:
13
+ - Translated text display
14
+ - Audio output (Text-to-Speech)
15
+ - **Multiple Provider Options**:
16
+ - STT: OpenAI Whisper API, Local Whisper (tiny/base)
17
+ - TTS: OpenAI TTS, Edge-TTS (free), gTTS (free)
18
+ - **Customizable Translation**:
19
+ - Adjustable temperature for literal vs creative translations
20
+ - Configurable max tokens
21
+ - Multiple voice styles for audio output
22
+
23
+ ## Prerequisites
24
+
25
+ - Python 3.10 or higher
26
+ - [UV](https://docs.astral.sh/uv/) package manager
27
+ - OpenAI API key (for Whisper STT and TTS)
28
+ - Hugging Face token (for Apertus model access)
29
+
30
+ ## Installation
31
+
32
+ ### 1. Install UV
33
+
34
+ If you don't have UV installed, install it first:
35
+
36
+ **macOS/Linux:**
37
+ ```bash
38
+ curl -LsSf https://astral.sh/uv/install.sh | sh
39
+ ```
40
+
41
+ **Windows:**
42
+ ```powershell
43
+ powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
44
+ ```
45
+
46
+ Or via pip:
47
+ ```bash
48
+ pip install uv
49
+ ```
50
+
51
+ ### 2. Clone the Repository
52
+
53
+ ```bash
54
+ git clone <your-repo-url>
55
+ cd universal_translator
56
+ ```
57
+
58
+ ### 3. Install Dependencies with UV
59
+
60
+ UV will automatically create a virtual environment and install all dependencies:
61
+
62
+ ```bash
63
+ uv sync
64
+ ```
65
+
66
+ This will:
67
+ - Create a `.venv` directory with a Python virtual environment
68
+ - Install all dependencies from `pyproject.toml`
69
+ - Lock versions in `uv.lock`
70
+
71
+ ### 4. Set Up Environment Variables
72
+
73
+ Create a `.env` file in the project root:
74
+
75
+ ```bash
76
+ # .env
77
+ OPENAI_API_KEY=your_openai_api_key_here
78
+ HF_TOKEN=your_huggingface_token_here
79
+ ```
80
+
81
+ **Getting API Keys:**
82
+
83
+ - **OpenAI API Key**: Get it from [OpenAI Platform](https://platform.openai.com/api-keys)
84
+ - **Hugging Face Token**: Get it from [Hugging Face Settings](https://huggingface.co/settings/tokens)
85
+
86
+ ## Usage
87
+
88
+ ### Run with UV
89
+
90
+ ```bash
91
+ uv run python translator.py
92
+ ```
93
+
94
+ Or activate the virtual environment first:
95
+
96
+ ```bash
97
+ # Activate virtual environment
98
+ source .venv/bin/activate # macOS/Linux
99
+ # or
100
+ .venv\Scripts\activate # Windows
101
+
102
+ # Run the app
103
+ python translator.py
104
+ ```
105
+
106
+ ### Using the Application
107
+
108
+ 1. **Open your browser** - The app will automatically open at `http://localhost:7860`
109
+
110
+ 2. **Select Target Language** - Choose the language you want to translate to from the dropdown (20+ popular languages available)
111
+
112
+ 3. **Choose Input Method**:
113
+ - **Text**: Type or paste text in any language in the input box and click "Translate"
114
+ - **Voice**: Click the microphone icon, speak, then click "Translate Voice"
115
+
116
+ 4. **View Results**:
117
+ - **Detected Source Language** - Shows which language was detected
118
+ - **Translated Text** - Displays the translation
119
+ - **Audio Output** - Plays the translation (if voice output is enabled)
120
+
121
+ 5. **Adjust Settings** (optional):
122
+ - **Temperature**: 0.0-1.0 (lower = more literal, higher = more creative)
123
+ - **Max Tokens**: Control translation length
124
+ - **STT Provider**: Choose speech-to-text provider
125
+ - **TTS Provider**: Choose text-to-speech provider and voice
126
+
127
+ ## Project Structure
128
+
129
+ ```
130
+ universal_translator/
131
+ ├── translator.py # Gradio UI layer (presentation)
132
+ ├── translation_service.py # Core translation logic (business layer)
133
+ ├── config.py # Configuration constants and settings
134
+ ├── voice_handler.py # STT/TTS provider implementations
135
+ ├── pyproject.toml # Project configuration and dependencies
136
+ ├── uv.lock # Locked dependency versions
137
+ ├── requirements.txt # Alternative pip requirements
138
+ ├── .env # Environment variables (create this)
139
+ ├── gradio_themes.md # Gradio 6.0 theming guide
140
+ └── README.md # This file
141
+ ```
142
+
143
+ ### Architecture
144
+
145
+ The application follows a layered architecture for better maintainability:
146
+
147
+ - **UI Layer** (`translator.py`) - Handles all Gradio interface components and user interactions
148
+ - **Business Logic** (`translation_service.py`) - Core translation engine and language detection
149
+ - **Configuration** (`config.py`) - Centralized settings, constants, and default values
150
+ - **Voice Services** (`voice_handler.py`) - Speech-to-text and text-to-speech providers
151
+
152
+ ## Supported Languages
153
+
154
+ The app includes 20+ popular languages in the dropdown, including:
155
+
156
+ - Spanish, French, German, Italian, Portuguese
157
+ - Chinese (Simplified), Japanese, Korean
158
+ - Arabic, Russian, Hindi, Turkish
159
+ - Dutch, Polish, Swedish, Greek
160
+ - Hebrew, Thai, Vietnamese, Indonesian
161
+ - English
162
+
163
+ **Note**: While the dropdown shows popular languages, Apertus-70B supports 1000+ languages. You can translate to/from any language by typing the language name.
164
+
165
+ ## Customization
166
+
167
+ All configuration settings are centralized in `config.py`. You can easily customize:
168
+
169
+ ### Modifying Settings
170
+
171
+ Edit `config.py` to change:
172
+
173
+ **Languages:**
174
+ ```python
175
+ # Add more languages to the dropdown
176
+ LanguageConfig.POPULAR_LANGUAGES["Portuguese (Brazil)"] = "pt-br"
177
+
178
+ # Change default target language
179
+ LanguageConfig.DEFAULT_TARGET_LANGUAGE = "French"
180
+ ```
181
+
182
+ **Model Settings:**
183
+ ```python
184
+ # Use a different translation model
185
+ ModelConfig.MODEL_NAME = "your-model-name"
186
+
187
+ # Adjust default parameters
188
+ ModelConfig.DEFAULT_TEMPERATURE = 0.5
189
+ ModelConfig.DEFAULT_MAX_TOKENS = 2048
190
+ ```
191
+
192
+ **UI Appearance:**
193
+ ```python
194
+ # Change theme colors
195
+ UIConfig.THEME_PRIMARY_HUE = "purple"
196
+ UIConfig.THEME_SECONDARY_HUE = "pink"
197
+
198
+ # Add custom CSS
199
+ UIConfig.CUSTOM_CSS = """
200
+ .gradio-container {
201
+ background: linear-gradient(to bottom, #f0f0f0, #ffffff);
202
+ }
203
+ """
204
+ ```
205
+
206
+ **Voice Settings:**
207
+ ```python
208
+ # Change default providers
209
+ VoiceConfig.DEFAULT_TTS_PROVIDER = "Edge-TTS (Free)"
210
+ VoiceConfig.DEFAULT_TTS_VOICE = "en-US-JennyNeural"
211
+ ```
212
+
213
+ ## Configuration
214
+
215
+ ### Translation Settings
216
+
217
+ - **Temperature** (0.0-1.0):
218
+ - `0.1-0.3`: More literal, word-for-word translations
219
+ - `0.4-0.6`: Balanced
220
+ - `0.7-1.0`: More creative, natural-sounding translations
221
+
222
+ - **Max Tokens** (128-2048):
223
+ - Controls maximum length of translation
224
+ - Higher values for longer texts
225
+
226
+ ### Voice Settings
227
+
228
+ **STT Providers:**
229
+ - **OpenAI Whisper API** (Recommended): Fast, accurate, requires API key
230
+ - **Local Whisper (Tiny)**: Free, runs locally, lower accuracy
231
+ - **Local Whisper (Base)**: Free, runs locally, better accuracy
232
+
233
+ **TTS Providers:**
234
+ - **OpenAI TTS** (Recommended): High quality, natural voices, requires API key
235
+ - **Edge-TTS (Free)**: Good quality, free, no API key needed
236
+ - **gTTS (Free)**: Basic quality, free, no API key needed
237
+
238
+ **TTS Voices** (OpenAI):
239
+ - `alloy`, `echo`, `fable`, `onyx`, `nova`, `shimmer`
240
+
241
+ ## Cost Considerations
242
+
243
+ - **Apertus Model**: Free (hosted on Hugging Face)
244
+ - **OpenAI Whisper API**: ~$0.006 per minute of audio
245
+ - **OpenAI TTS**: ~$0.015 per 1,000 characters
246
+ - **Free alternatives**: Local Whisper, Edge-TTS, gTTS
247
+
248
+ ## Troubleshooting
249
+
250
+ ### "Module not found" errors
251
+
252
+ ```bash
253
+ uv sync
254
+ ```
255
+
256
+ ### API Key errors
257
+
258
+ Ensure your `.env` file is in the project root and contains valid keys:
259
+ ```bash
260
+ cat .env # Check if keys are set
261
+ ```
262
+
263
+ ### Hugging Face model access
264
+
265
+ Make sure you have access to the Apertus model and your HF token has read permissions.
266
+
267
+ ### Audio input not working
268
+
269
+ - Check browser permissions for microphone access
270
+ - Ensure microphone is properly connected
271
+
272
+ ## Development
273
+
274
+ ### Add new dependencies
275
+
276
+ ```bash
277
+ uv add package-name
278
+ ```
279
+
280
+ ### Update dependencies
281
+
282
+ ```bash
283
+ uv sync --upgrade
284
+ ```
285
+
286
+ ### Run with different Python version
287
+
288
+ ```bash
289
+ uv run --python 3.11 python translator.py
290
+ ```
291
+
292
+ ## License
293
+
294
+ [Add your license here]
295
+
296
+ ## Acknowledgments
297
+
298
+ - **Apertus-70B** by swiss-ai for multilingual translation
299
+ - **OpenAI** for Whisper and TTS APIs
300
+ - **Gradio** for the web interface
301
+ - **UV** by Astral for fast Python package management
302
+
303
+ ## Support
304
+
305
+ For issues and questions, please open an issue in the repository.
REFACTORING_NOTES.md ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Refactoring Notes
2
+
3
+ ## Overview
4
+
5
+ The Universal Translator codebase has been refactored to follow a clean, layered architecture that separates concerns and improves maintainability.
6
+
7
+ ## Changes Made
8
+
9
+ ### 1. Created `config.py` - Configuration Module
10
+
11
+ **Purpose:** Centralize all configuration constants and settings
12
+
13
+ **Contents:**
14
+ - `ModelConfig` - Translation model settings
15
+ - `LanguageConfig` - Supported languages and mappings
16
+ - `VoiceConfig` - STT/TTS default settings
17
+ - `UIConfig` - Gradio UI appearance and behavior
18
+ - `PromptConfig` - Translation prompt templates
19
+ - `ErrorMessages` - Standardized error messages
20
+ - Helper functions for language lookups
21
+
22
+ **Benefits:**
23
+ - Single source of truth for all settings
24
+ - Easy to modify without touching business logic
25
+ - Better organization and discoverability
26
+ - Type-safe constants
27
+
28
+ ### 2. Created `translation_service.py` - Business Logic Module
29
+
30
+ **Purpose:** Extract core translation functionality from UI layer
31
+
32
+ **Components:**
33
+
34
+ #### `LanguageDetector`
35
+ - Handles language detection
36
+ - Returns language codes and names
37
+ - Isolated error handling
38
+
39
+ #### `TranslationEngine`
40
+ - Manages HuggingFace InferenceClient
41
+ - Handles translation requests
42
+ - Formats prompts and processes responses
43
+
44
+ #### `TranslationService`
45
+ - High-level API for UI layer
46
+ - Coordinates detection and translation
47
+ - Returns formatted results
48
+
49
+ **Benefits:**
50
+ - Translation logic can be tested independently
51
+ - Can be reused in different interfaces (CLI, API, etc.)
52
+ - Clear separation of concerns
53
+ - Easier to swap translation providers
54
+
55
+ ### 3. Refactored `translator.py` - UI Module
56
+
57
+ **Purpose:** Focus purely on Gradio UI and user interaction
58
+
59
+ **Remaining Responsibilities:**
60
+ - Gradio component creation and layout
61
+ - Event handler wiring
62
+ - Voice processing (STT/TTS integration)
63
+ - UI state management
64
+
65
+ **Removed:**
66
+ - Language detection logic → `translation_service.py`
67
+ - Translation logic → `translation_service.py`
68
+ - Configuration constants → `config.py`
69
+ - Prompt templates → `config.py`
70
+
71
+ **Benefits:**
72
+ - Cleaner, more readable code
73
+ - UI changes don't affect business logic
74
+ - Easier to create alternative interfaces
75
+ - Improved testability
76
+
77
+ ### 4. Updated `pyproject.toml`
78
+
79
+ **Changes:**
80
+ - Added new modules to `only-include` list
81
+ - Package now includes all necessary files
82
+
83
+ ### 5. Updated `README.md`
84
+
85
+ **Additions:**
86
+ - New project structure diagram
87
+ - Architecture explanation
88
+ - Customization guide for `config.py`
89
+ - Examples of common modifications
90
+
91
+ ## Architecture Diagram
92
+
93
+ ```
94
+ ┌─────────────────────────────────────────┐
95
+ │ User Interface Layer │
96
+ │ (translator.py) │
97
+ │ - Gradio components │
98
+ │ - Event handlers │
99
+ │ - Voice I/O coordination │
100
+ └──────────────┬──────────────────────────┘
101
+
102
+
103
+ ┌─────────────────────────────────────────┐
104
+ │ Business Logic Layer │
105
+ │ (translation_service.py) │
106
+ │ - LanguageDetector │
107
+ │ - TranslationEngine │
108
+ │ - TranslationService │
109
+ └──────────────┬──────────────────────────┘
110
+
111
+
112
+ ┌─────────────────────────────────────────┐
113
+ │ Configuration Layer │
114
+ │ (config.py) │
115
+ │ - Model settings │
116
+ │ - Language mappings │
117
+ │ - UI configuration │
118
+ │ - Voice settings │
119
+ └─────────────────────────────────────────┘
120
+
121
+ ┌────────────────────────────────┐
122
+ │ Voice Services │
123
+ │ (voice_handler.py) │
124
+ │ - STT providers │
125
+ │ - TTS providers │
126
+ └────────────────────────────────┘
127
+ ```
128
+
129
+ ## Benefits of the Refactoring
130
+
131
+ ### 1. **Maintainability**
132
+ - Changes to UI don't affect business logic
133
+ - Configuration changes isolated to one file
134
+ - Clear module boundaries
135
+
136
+ ### 2. **Testability**
137
+ - Business logic can be unit tested separately
138
+ - Mock dependencies easily
139
+ - Test UI and logic independently
140
+
141
+ ### 3. **Extensibility**
142
+ - Easy to add new translation providers
143
+ - Can create CLI, API, or other interfaces
144
+ - Simple to add new languages or settings
145
+
146
+ ### 4. **Readability**
147
+ - Each module has a clear, single purpose
148
+ - Reduced file sizes
149
+ - Better code organization
150
+
151
+ ### 5. **Reusability**
152
+ - `TranslationService` can be imported by other apps
153
+ - `config.py` can be extended for new features
154
+ - Voice handlers already modular
155
+
156
+ ## Migration Guide
157
+
158
+ ### Before Refactoring
159
+ ```python
160
+ # Everything in translator.py
161
+ POPULAR_LANGUAGES = {...}
162
+ LANGUAGE_NAMES = {...}
163
+ model_name = "..."
164
+
165
+ def detect_language(text):
166
+ # detection logic
167
+ pass
168
+
169
+ def translate_text(text, target):
170
+ # translation logic
171
+ pass
172
+
173
+ # UI code mixed with business logic
174
+ ```
175
+
176
+ ### After Refactoring
177
+ ```python
178
+ # config.py
179
+ class LanguageConfig:
180
+ POPULAR_LANGUAGES = {...}
181
+ LANGUAGE_NAMES = {...}
182
+
183
+ # translation_service.py
184
+ class TranslationService:
185
+ def translate_text(self, text, target):
186
+ # isolated business logic
187
+ pass
188
+
189
+ # translator.py
190
+ from config import LanguageConfig
191
+ from translation_service import TranslationService
192
+
193
+ translation_service = TranslationService()
194
+ # Pure UI code
195
+ ```
196
+
197
+ ## Future Improvements
198
+
199
+ ### Possible Enhancements
200
+ 1. **Add Tests**
201
+ - Unit tests for `TranslationService`
202
+ - Integration tests for UI
203
+ - Mock external APIs
204
+
205
+ 2. **Add Logging**
206
+ - Structured logging for debugging
207
+ - Performance monitoring
208
+ - Error tracking
209
+
210
+ 3. **Create CLI Interface**
211
+ - Reuse `TranslationService`
212
+ - Command-line tool for batch translation
213
+
214
+ 4. **Add REST API**
215
+ - FastAPI or Flask wrapper
216
+ - Reuse `TranslationService`
217
+ - Enable programmatic access
218
+
219
+ 5. **Configuration File Support**
220
+ - Load settings from YAML/JSON
221
+ - Environment-based configs
222
+ - User preferences
223
+
224
+ 6. **Add Caching**
225
+ - Cache translations
226
+ - Reduce API calls
227
+ - Improve performance
228
+
229
+ ## Testing the Refactored Code
230
+
231
+ ### Quick Test
232
+ ```bash
233
+ # Verify imports and structure
234
+ uv run python -c "from translator import create_ui; create_ui()"
235
+ ```
236
+
237
+ ### Full Test
238
+ ```bash
239
+ # Run the application
240
+ uv run python translator.py
241
+ ```
242
+
243
+ ### Expected Behavior
244
+ - App should launch normally
245
+ - All features should work as before
246
+ - Performance should be similar or better
247
+ - Configuration changes should take effect immediately
248
+
249
+ ## Notes
250
+
251
+ - All functionality preserved
252
+ - No breaking changes to user experience
253
+ - Backward compatible (same entry point)
254
+ - Ready for future enhancements
config.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration Module
3
+ Contains all configuration constants for the Universal Translator application.
4
+ """
5
+
6
+ # ============================================================================
7
+ # Model Configuration
8
+ # ============================================================================
9
+
10
+ class ModelConfig:
11
+ """Configuration for translation models."""
12
+
13
+ # Primary translation model
14
+ MODEL_NAME = "swiss-ai/Apertus-70B-Instruct-2509"
15
+ SHORT_MODEL_NAME = "Apertus Universal Translator"
16
+
17
+ # Model parameters
18
+ DEFAULT_MAX_TOKENS = 1024
19
+ DEFAULT_TEMPERATURE = 0.3
20
+ MIN_TEMPERATURE = 0.0
21
+ MAX_TEMPERATURE = 1.0
22
+
23
+ # Temperature recommendations
24
+ TEMP_LITERAL = 0.1 # More literal, word-for-word translations
25
+ TEMP_BALANCED = 0.3 # Balanced (default)
26
+ TEMP_CREATIVE = 0.7 # More creative, natural-sounding translations
27
+
28
+
29
+ # ============================================================================
30
+ # Language Configuration
31
+ # ============================================================================
32
+
33
+ class LanguageConfig:
34
+ """Configuration for supported languages."""
35
+
36
+ # Popular languages for the dropdown (display_name: language_code)
37
+ POPULAR_LANGUAGES = {
38
+ "Spanish": "es",
39
+ "French": "fr",
40
+ "German": "de",
41
+ "Italian": "it",
42
+ "Portuguese": "pt",
43
+ "Chinese (Simplified)": "zh-cn",
44
+ "Japanese": "ja",
45
+ "Korean": "ko",
46
+ "Arabic": "ar",
47
+ "Russian": "ru",
48
+ "Hindi": "hi",
49
+ "Turkish": "tr",
50
+ "Dutch": "nl",
51
+ "Polish": "pl",
52
+ "Swedish": "sv",
53
+ "Greek": "el",
54
+ "Hebrew": "he",
55
+ "Thai": "th",
56
+ "Vietnamese": "vi",
57
+ "Indonesian": "id",
58
+ "English": "en"
59
+ }
60
+
61
+ # Language code to full name mapping (for detection display)
62
+ LANGUAGE_NAMES = {
63
+ "en": "English",
64
+ "es": "Spanish",
65
+ "fr": "French",
66
+ "de": "German",
67
+ "it": "Italian",
68
+ "pt": "Portuguese",
69
+ "zh-cn": "Chinese",
70
+ "ja": "Japanese",
71
+ "ko": "Korean",
72
+ "ar": "Arabic",
73
+ "ru": "Russian",
74
+ "hi": "Hindi",
75
+ "tr": "Turkish",
76
+ "nl": "Dutch",
77
+ "pl": "Polish",
78
+ "sv": "Swedish",
79
+ "el": "Greek",
80
+ "he": "Hebrew",
81
+ "th": "Thai",
82
+ "vi": "Vietnamese",
83
+ "id": "Indonesian"
84
+ }
85
+
86
+ # Default target language
87
+ DEFAULT_TARGET_LANGUAGE = "Spanish"
88
+
89
+
90
+ # ============================================================================
91
+ # Voice Configuration
92
+ # ============================================================================
93
+
94
+ class VoiceConfig:
95
+ """Configuration for speech-to-text and text-to-speech."""
96
+
97
+ # Default providers
98
+ DEFAULT_STT_PROVIDER = "OpenAI Whisper API"
99
+ DEFAULT_TTS_PROVIDER = "OpenAI TTS"
100
+ DEFAULT_TTS_VOICE = "nova"
101
+
102
+ # Voice output enabled by default
103
+ DEFAULT_VOICE_OUTPUT_ENABLED = True
104
+
105
+ # OpenAI TTS voices
106
+ OPENAI_TTS_VOICES = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
107
+
108
+
109
+ # ============================================================================
110
+ # UI Configuration
111
+ # ============================================================================
112
+
113
+ class UIConfig:
114
+ """Configuration for the Gradio UI."""
115
+
116
+ # App metadata
117
+ APP_TITLE = "Universal Translator"
118
+ APP_DESCRIPTION = "Translate text or speech to any language with automatic source language detection"
119
+
120
+ # Theme settings
121
+ THEME_PRIMARY_HUE = "blue"
122
+ THEME_SECONDARY_HUE = "cyan"
123
+ THEME_NEUTRAL_HUE = "slate"
124
+ THEME_FONT = "Inter, system-ui, sans-serif"
125
+
126
+ # Custom CSS
127
+ CUSTOM_CSS = """
128
+ .gradio-container {
129
+ font-family: 'Inter', system-ui, sans-serif;
130
+ }
131
+ """
132
+
133
+ # Input/Output dimensions
134
+ INPUT_TEXT_LINES = 4
135
+ OUTPUT_TEXT_LINES = 4
136
+
137
+ # Slider configurations
138
+ MAX_TOKENS_MIN = 128
139
+ MAX_TOKENS_MAX = 2048
140
+ MAX_TOKENS_STEP = 128
141
+
142
+ TEMPERATURE_MIN = 0.0
143
+ TEMPERATURE_MAX = 1.0
144
+ TEMPERATURE_STEP = 0.1
145
+
146
+ # Launch settings
147
+ SHARE_LINK = False
148
+ OPEN_IN_BROWSER = True
149
+
150
+
151
+ # ============================================================================
152
+ # Translation Prompts
153
+ # ============================================================================
154
+
155
+ class PromptConfig:
156
+ """Configuration for translation prompts."""
157
+
158
+ SYSTEM_PROMPT_TEMPLATE = """You are a professional translator. Your task is to translate the given text to {target_language}.
159
+ Provide ONLY the translation, without any explanations, notes, or additional text.
160
+ Maintain the tone, style, and meaning of the original text."""
161
+
162
+
163
+ # ============================================================================
164
+ # Error Messages
165
+ # ============================================================================
166
+
167
+ class ErrorMessages:
168
+ """Standard error messages."""
169
+
170
+ NO_INPUT = "No input provided"
171
+ NO_AUDIO_INPUT = "No audio input"
172
+ TRANSCRIPTION_FAILED = "Transcription failed"
173
+ TRANSLATION_ERROR = "Translation Error: {error}"
174
+ TRANSCRIPTION_ERROR = "[Transcription Error: {error}]"
175
+ LANGUAGE_DETECTION_FAILED = "Unknown"
176
+
177
+
178
+ # ============================================================================
179
+ # Helper Functions
180
+ # ============================================================================
181
+
182
+ def get_language_name(lang_code: str) -> str:
183
+ """
184
+ Get the full language name from a language code.
185
+
186
+ Args:
187
+ lang_code: Language code (e.g., 'en', 'es')
188
+
189
+ Returns:
190
+ Full language name or uppercase code if not found
191
+ """
192
+ return LanguageConfig.LANGUAGE_NAMES.get(lang_code, lang_code.upper())
193
+
194
+
195
+ def get_popular_languages_list() -> list[str]:
196
+ """
197
+ Get a list of popular language names for dropdowns.
198
+
199
+ Returns:
200
+ List of language display names
201
+ """
202
+ return list(LanguageConfig.POPULAR_LANGUAGES.keys())
203
+
204
+
205
+ def get_language_code(language_name: str) -> str:
206
+ """
207
+ Get the language code from a display name.
208
+
209
+ Args:
210
+ language_name: Display name (e.g., 'Spanish', 'French')
211
+
212
+ Returns:
213
+ Language code or the original name if not found
214
+ """
215
+ return LanguageConfig.POPULAR_LANGUAGES.get(language_name, language_name.lower())
gradio_themes.md ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Gradio 6.0 Theming Guide
2
+
3
+ ## Key Change in Gradio 6.0
4
+
5
+ In Gradio 6.0, the `theme`, `css`, and `css_paths` parameters have **moved from the `Blocks()` constructor to the `launch()` method**.
6
+
7
+ ### Old Way (Gradio 5.x)
8
+ ```python
9
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
10
+ # components
11
+ pass
12
+ demo.launch()
13
+ ```
14
+
15
+ ### New Way (Gradio 6.0+)
16
+ ```python
17
+ with gr.Blocks() as demo:
18
+ # components
19
+ pass
20
+ demo.launch(theme=gr.themes.Soft())
21
+ ```
22
+
23
+ ---
24
+
25
+ ## 1. Built-in Themes
26
+
27
+ Gradio 6.0 includes 5 prebuilt themes:
28
+
29
+ ```python
30
+ import gradio as gr
31
+
32
+ with gr.Blocks() as demo:
33
+ # your components
34
+ pass
35
+
36
+ # Choose one of these themes
37
+ demo.launch(theme=gr.themes.Base())
38
+ demo.launch(theme=gr.themes.Default())
39
+ demo.launch(theme=gr.themes.Glass())
40
+ demo.launch(theme=gr.themes.Monochrome())
41
+ demo.launch(theme=gr.themes.Soft())
42
+ ```
43
+
44
+ ---
45
+
46
+ ## 2. Customizing Built-in Themes
47
+
48
+ ### Basic Customization
49
+
50
+ Customize colors, fonts, and sizing:
51
+
52
+ ```python
53
+ theme = gr.themes.Soft(
54
+ primary_hue="blue", # Main accent color (buttons, links)
55
+ secondary_hue="cyan", # Secondary elements
56
+ neutral_hue="slate", # Text and backgrounds
57
+ font="Inter, system-ui, sans-serif",
58
+ font_mono="Fira Code, monospace"
59
+ )
60
+
61
+ demo.launch(theme=theme)
62
+ ```
63
+
64
+ ### Available Color Names
65
+
66
+ `slate`, `gray`, `zinc`, `neutral`, `stone`, `red`, `orange`, `amber`, `yellow`, `lime`, `green`, `emerald`, `teal`, `cyan`, `sky`, `blue`, `indigo`, `violet`, `purple`, `fuchsia`, `pink`, `rose`
67
+
68
+ ### Sizing Options
69
+
70
+ ```python
71
+ theme = gr.themes.Soft(
72
+ spacing_size=gr.themes.sizes.spacing_lg, # Spacing/padding
73
+ radius_size=gr.themes.sizes.radius_md, # Corner roundness
74
+ text_size=gr.themes.sizes.text_md # Font size
75
+ )
76
+ ```
77
+
78
+ Available sizes:
79
+ - **Spacing:** `spacing_sm`, `spacing_md`, `spacing_lg`
80
+ - **Radius:** `radius_none`, `radius_sm`, `radius_md`, `radius_lg`
81
+ - **Text:** `text_sm`, `text_md`, `text_lg`
82
+
83
+ ---
84
+
85
+ ## 3. Advanced Theme Customization
86
+
87
+ ### Using `.set()` for CSS Variables
88
+
89
+ ```python
90
+ theme = gr.themes.Soft(
91
+ primary_hue="indigo",
92
+ secondary_hue="cyan"
93
+ ).set(
94
+ # Button styling
95
+ button_primary_background_fill="*primary_500",
96
+ button_primary_background_fill_hover="*primary_600",
97
+ button_primary_text_color="white",
98
+
99
+ # Slider colors
100
+ slider_color="*secondary_500",
101
+
102
+ # Loader/spinner color
103
+ loader_color="*primary_400",
104
+
105
+ # Background gradients
106
+ body_background_fill="linear-gradient(to bottom, #f0f0f0, #ffffff)",
107
+
108
+ # Borders
109
+ border_color_primary="*primary_300",
110
+ )
111
+
112
+ demo.launch(theme=theme)
113
+ ```
114
+
115
+ ---
116
+
117
+ ## 4. Custom CSS
118
+
119
+ ### Inline CSS String
120
+
121
+ ```python
122
+ custom_css = """
123
+ .gradio-container {
124
+ font-family: 'Inter', system-ui, sans-serif;
125
+ max-width: 1200px;
126
+ margin: 0 auto;
127
+ }
128
+
129
+ .my-custom-button {
130
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%) !important;
131
+ border: none !important;
132
+ color: white !important;
133
+ }
134
+
135
+ /* Override default styles with !important if needed */
136
+ .gr-button {
137
+ border-radius: 8px !important;
138
+ }
139
+ """
140
+
141
+ demo.launch(
142
+ theme=gr.themes.Soft(),
143
+ css=custom_css
144
+ )
145
+ ```
146
+
147
+ ### External CSS Files
148
+
149
+ ```python
150
+ from pathlib import Path
151
+
152
+ demo.launch(
153
+ theme=gr.themes.Soft(),
154
+ css_paths=[
155
+ Path("styles/main.css"),
156
+ Path("styles/custom.css")
157
+ ]
158
+ )
159
+ ```
160
+
161
+ ---
162
+
163
+ ## 5. Complete Example
164
+
165
+ ### Full Implementation with Theme and Custom CSS
166
+
167
+ ```python
168
+ import gradio as gr
169
+
170
+ # Build your interface
171
+ with gr.Blocks(title="My App") as demo:
172
+ gr.Markdown("# My Custom Themed App")
173
+
174
+ with gr.Row():
175
+ input_text = gr.Textbox(label="Input", elem_classes="custom-input")
176
+ output_text = gr.Textbox(label="Output")
177
+
178
+ submit_btn = gr.Button("Submit", variant="primary", elem_classes="custom-button")
179
+
180
+ def main():
181
+ # Create custom theme
182
+ theme = gr.themes.Soft(
183
+ primary_hue="blue",
184
+ secondary_hue="cyan",
185
+ neutral_hue="slate",
186
+ spacing_size=gr.themes.sizes.spacing_md,
187
+ radius_size=gr.themes.sizes.radius_lg,
188
+ font="Inter, system-ui, sans-serif"
189
+ ).set(
190
+ button_primary_background_fill="*primary_600",
191
+ button_primary_background_fill_hover="*primary_700",
192
+ slider_color="*secondary_500"
193
+ )
194
+
195
+ # Custom CSS
196
+ custom_css = """
197
+ .gradio-container {
198
+ max-width: 1400px;
199
+ margin: 0 auto;
200
+ }
201
+
202
+ .custom-button {
203
+ font-weight: 600 !important;
204
+ transition: all 0.3s ease !important;
205
+ }
206
+
207
+ .custom-input textarea {
208
+ border: 2px solid #e5e7eb !important;
209
+ border-radius: 8px !important;
210
+ }
211
+ """
212
+
213
+ # Launch with theme and CSS
214
+ demo.launch(
215
+ share=False,
216
+ inbrowser=True,
217
+ theme=theme,
218
+ css=custom_css
219
+ )
220
+
221
+ if __name__ == "__main__":
222
+ main()
223
+ ```
224
+
225
+ ---
226
+
227
+ ## 6. Theme Constructor Parameters
228
+
229
+ ### All Available Parameters
230
+
231
+ ```python
232
+ gr.themes.Soft(
233
+ # Colors (use color names or gr.themes.colors objects)
234
+ primary_hue="blue", # Main accent color
235
+ secondary_hue="cyan", # Secondary elements
236
+ neutral_hue="slate", # Text and backgrounds
237
+
238
+ # Sizing
239
+ spacing_size=gr.themes.sizes.spacing_md,
240
+ radius_size=gr.themes.sizes.radius_md,
241
+ text_size=gr.themes.sizes.text_md,
242
+
243
+ # Fonts
244
+ font="system-ui, sans-serif",
245
+ font_mono="monospace"
246
+ )
247
+ ```
248
+
249
+ ---
250
+
251
+ ## 7. Tips and Best Practices
252
+
253
+ ### General Tips
254
+
255
+ 1. **Use `!important` in custom CSS** - Gradio's default styles may need to be overridden
256
+ 2. **Base class is `.gradio-container`** - Target this for app-wide styling
257
+ 3. **Query selectors are not guaranteed** - Gradio's internal structure may change between versions
258
+ 4. **Test your theme** - Colors may appear differently on different displays
259
+
260
+ ### Debugging CSS
261
+
262
+ ```python
263
+ # Add borders to see element boundaries
264
+ custom_css = """
265
+ * {
266
+ border: 1px solid red !important;
267
+ }
268
+ """
269
+ ```
270
+
271
+ ### Performance
272
+
273
+ - Keep CSS minimal for faster loading
274
+ - Use CSS files for large stylesheets instead of inline strings
275
+ - Avoid complex selectors and deep nesting
276
+
277
+ ---
278
+
279
+ ## 8. Common Styling Use Cases
280
+
281
+ ### Dark Mode
282
+
283
+ ```python
284
+ theme = gr.themes.Monochrome(
285
+ neutral_hue="slate"
286
+ ).set(
287
+ body_background_fill="#1a1a1a",
288
+ body_text_color="#ffffff",
289
+ input_background_fill="#2d2d2d",
290
+ button_primary_background_fill="#4a9eff"
291
+ )
292
+ ```
293
+
294
+ ### Compact Layout
295
+
296
+ ```python
297
+ theme = gr.themes.Soft(
298
+ spacing_size=gr.themes.sizes.spacing_sm,
299
+ radius_size=gr.themes.sizes.radius_sm,
300
+ text_size=gr.themes.sizes.text_sm
301
+ )
302
+ ```
303
+
304
+ ### Colorful/Vibrant
305
+
306
+ ```python
307
+ theme = gr.themes.Soft(
308
+ primary_hue="pink",
309
+ secondary_hue="purple"
310
+ ).set(
311
+ button_primary_background_fill="linear-gradient(90deg, #667eea 0%, #764ba2 100%)"
312
+ )
313
+ ```
314
+
315
+ ### Professional/Corporate
316
+
317
+ ```python
318
+ theme = gr.themes.Default(
319
+ primary_hue="blue",
320
+ secondary_hue="slate",
321
+ neutral_hue="gray"
322
+ ).set(
323
+ body_background_fill="#ffffff",
324
+ button_primary_background_fill="#0066cc",
325
+ border_color_primary="#d1d5db"
326
+ )
327
+ ```
328
+
329
+ ---
330
+
331
+ ## Resources
332
+
333
+ - [Gradio Custom CSS and JS Guide](https://www.gradio.app/guides/custom-CSS-and-JS)
334
+ - [Gradio 6 Migration Guide](https://www.gradio.app/main/guides/gradio-6-migration-guide)
335
+ - [Gradio Theming Guide](https://www.gradio.app/guides/theming-guide)
336
+ - [Gradio Themes Documentation](https://www.gradio.app/docs/gradio/themes)
337
+ - [Gradio Blocks Documentation](https://www.gradio.app/docs/gradio/blocks)
338
+
339
+ ---
340
+
341
+ ## Version Information
342
+
343
+ - This guide is for **Gradio 6.0+**
344
+ - For Gradio 5.x and earlier, themes are set in the `Blocks()` constructor instead of `launch()`
345
+ - Check your Gradio version: `python -c "import gradio; print(gradio.__version__)"`
pyproject.toml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "universal-translator"
3
+ version = "0.1.0"
4
+ description = "Universal language translator powered by Apertus-70B with voice input/output support"
5
+ authors = [
6
+ {name = "Your Name", email = "[email protected]"}
7
+ ]
8
+ readme = "README.md"
9
+ requires-python = ">=3.10"
10
+ dependencies = [
11
+ "edge-tts>=7.2.3",
12
+ "gradio>=5.0.0",
13
+ "gtts>=2.5.4",
14
+ "huggingface-hub>=1.1.4",
15
+ "langdetect>=1.0.9",
16
+ "openai>=2.8.0",
17
+ "openai-whisper>=20250625",
18
+ "python-dotenv>=1.2.1",
19
+ ]
20
+
21
+ [project.scripts]
22
+ translator = "translator:main"
23
+
24
+ [build-system]
25
+ requires = ["hatchling"]
26
+ build-backend = "hatchling.build"
27
+
28
+ [tool.hatch.build.targets.wheel]
29
+ packages = ["."]
30
+ only-include = ["translator.py", "translation_service.py", "voice_handler.py", "config.py"]
31
+
32
+ [dependency-groups]
33
+ dev = []
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ huggingface_hub
3
+ python-dotenv
4
+ openai
5
+ edge-tts
6
+ openai-whisper
7
+ gtts
8
+ langdetect
translation_service.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Translation Service Module
3
+ Contains the core translation logic separated from the UI.
4
+ """
5
+
6
+ from typing import Tuple, Optional
7
+ from huggingface_hub import InferenceClient
8
+ from langdetect import detect, LangDetectException
9
+
10
+ from config import (
11
+ ModelConfig,
12
+ LanguageConfig,
13
+ PromptConfig,
14
+ ErrorMessages,
15
+ get_language_name
16
+ )
17
+
18
+
19
+ class LanguageDetector:
20
+ """Handles language detection for input text."""
21
+
22
+ @staticmethod
23
+ def detect_language(text: str) -> Tuple[str, str]:
24
+ """
25
+ Detect the language of the input text.
26
+
27
+ Args:
28
+ text: Input text to detect language
29
+
30
+ Returns:
31
+ Tuple of (language_code, language_name)
32
+ """
33
+ try:
34
+ lang_code = detect(text)
35
+ lang_name = get_language_name(lang_code)
36
+ return lang_code, lang_name
37
+ except LangDetectException:
38
+ return "unknown", ErrorMessages.LANGUAGE_DETECTION_FAILED
39
+
40
+
41
+ class TranslationEngine:
42
+ """Handles translation using the Apertus model."""
43
+
44
+ def __init__(self, model_name: Optional[str] = None):
45
+ """
46
+ Initialize the translation engine.
47
+
48
+ Args:
49
+ model_name: Optional model name override
50
+ """
51
+ self.model_name = model_name or ModelConfig.MODEL_NAME
52
+ self.client = InferenceClient(model=self.model_name)
53
+ self.language_detector = LanguageDetector()
54
+
55
+ def translate(
56
+ self,
57
+ text: str,
58
+ target_language: str,
59
+ max_tokens: int = None,
60
+ temperature: float = None
61
+ ) -> Tuple[str, str, str]:
62
+ """
63
+ Translate text to target language.
64
+
65
+ Args:
66
+ text: Text to translate
67
+ target_language: Target language name (e.g., 'Spanish', 'French')
68
+ max_tokens: Maximum tokens for response (defaults to config value)
69
+ temperature: Model temperature (defaults to config value)
70
+
71
+ Returns:
72
+ Tuple of (translated_text, source_lang_code, source_lang_name)
73
+ """
74
+ if not text.strip():
75
+ return "", "unknown", ErrorMessages.LANGUAGE_DETECTION_FAILED
76
+
77
+ # Use defaults from config if not provided
78
+ max_tokens = max_tokens or ModelConfig.DEFAULT_MAX_TOKENS
79
+ temperature = temperature or ModelConfig.DEFAULT_TEMPERATURE
80
+
81
+ # Detect source language
82
+ source_lang_code, source_lang_name = self.language_detector.detect_language(text)
83
+
84
+ # Create translation prompt
85
+ system_prompt = PromptConfig.SYSTEM_PROMPT_TEMPLATE.format(
86
+ target_language=target_language
87
+ )
88
+
89
+ messages = [
90
+ {"role": "system", "content": system_prompt},
91
+ {"role": "user", "content": text}
92
+ ]
93
+
94
+ try:
95
+ # Call the translation model
96
+ response = self.client.chat_completion(
97
+ messages=messages,
98
+ max_tokens=max_tokens,
99
+ temperature=temperature,
100
+ stream=False
101
+ )
102
+
103
+ translated_text = response.choices[0].message.content.strip()
104
+ return translated_text, source_lang_code, source_lang_name
105
+
106
+ except Exception as e:
107
+ error_message = ErrorMessages.TRANSLATION_ERROR.format(error=str(e))
108
+ return error_message, source_lang_code, source_lang_name
109
+
110
+
111
+ class TranslationService:
112
+ """
113
+ High-level translation service that orchestrates translation and voice processing.
114
+ """
115
+
116
+ def __init__(self, model_name: Optional[str] = None):
117
+ """
118
+ Initialize the translation service.
119
+
120
+ Args:
121
+ model_name: Optional model name override
122
+ """
123
+ self.engine = TranslationEngine(model_name)
124
+
125
+ def translate_text(
126
+ self,
127
+ text: str,
128
+ target_language: str,
129
+ max_tokens: Optional[int] = None,
130
+ temperature: Optional[float] = None
131
+ ) -> Tuple[str, str]:
132
+ """
133
+ Translate text and return formatted results.
134
+
135
+ Args:
136
+ text: Text to translate
137
+ target_language: Target language name
138
+ max_tokens: Maximum tokens for response
139
+ temperature: Model temperature
140
+
141
+ Returns:
142
+ Tuple of (translated_text, detected_language_info)
143
+ """
144
+ if not text.strip():
145
+ return "", ErrorMessages.NO_INPUT
146
+
147
+ translated_text, source_code, source_name = self.engine.translate(
148
+ text, target_language, max_tokens, temperature
149
+ )
150
+
151
+ # Format detected language info
152
+ detected_info = f"Detected: {source_name} ({source_code})"
153
+
154
+ return translated_text, detected_info
155
+
156
+ def detect_language_only(self, text: str) -> str:
157
+ """
158
+ Detect language without translating.
159
+
160
+ Args:
161
+ text: Text to detect language
162
+
163
+ Returns:
164
+ Formatted language detection string
165
+ """
166
+ if not text.strip():
167
+ return ErrorMessages.NO_INPUT
168
+
169
+ source_code, source_name = self.engine.language_detector.detect_language(text)
170
+ return f"Detected: {source_name} ({source_code})"
translator.py ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Universal Translator - Main UI Module
3
+ Gradio-based user interface for the translation application.
4
+ """
5
+
6
+ import gradio as gr
7
+ from dotenv import load_dotenv
8
+
9
+ from config import (
10
+ ModelConfig,
11
+ LanguageConfig,
12
+ VoiceConfig,
13
+ UIConfig,
14
+ get_popular_languages_list
15
+ )
16
+ from translation_service import TranslationService
17
+ from voice_handler import (
18
+ create_stt_provider,
19
+ create_tts_provider,
20
+ get_available_stt_providers,
21
+ get_available_tts_providers
22
+ )
23
+
24
+ # Load environment variables
25
+ load_dotenv(override=True)
26
+
27
+ # Initialize translation service
28
+ translation_service = TranslationService()
29
+
30
+
31
+ # ============================================================================
32
+ # Voice Processing Functions
33
+ # ============================================================================
34
+
35
+ def transcribe_audio(audio_path, stt_provider_name):
36
+ """
37
+ Transcribe audio to text using selected STT provider.
38
+
39
+ Args:
40
+ audio_path: Path to audio file
41
+ stt_provider_name: Name of STT provider
42
+
43
+ Returns:
44
+ Transcribed text or error message
45
+ """
46
+ if audio_path is None:
47
+ return ""
48
+
49
+ try:
50
+ stt_provider = create_stt_provider(stt_provider_name)
51
+ text = stt_provider.transcribe(audio_path)
52
+ return text
53
+ except Exception as e:
54
+ return f"[Transcription Error: {str(e)}]"
55
+
56
+
57
+ def synthesize_speech(text, tts_provider_name, tts_voice):
58
+ """
59
+ Synthesize text to speech using selected TTS provider.
60
+
61
+ Args:
62
+ text: Text to synthesize
63
+ tts_provider_name: Name of TTS provider
64
+ tts_voice: Voice to use
65
+
66
+ Returns:
67
+ Path to generated audio file or None if failed
68
+ """
69
+ if not text or not text.strip():
70
+ return None
71
+
72
+ try:
73
+ tts_provider = create_tts_provider(tts_provider_name, voice=tts_voice)
74
+ audio_path = tts_provider.synthesize(text)
75
+ return audio_path
76
+ except Exception as e:
77
+ print(f"TTS Error: {str(e)}")
78
+ return None
79
+
80
+
81
+ # ============================================================================
82
+ # Translation Handler Functions
83
+ # ============================================================================
84
+
85
+ def process_translation(
86
+ input_text,
87
+ target_language,
88
+ max_tokens,
89
+ temperature,
90
+ enable_tts,
91
+ tts_provider_name,
92
+ tts_voice
93
+ ):
94
+ """
95
+ Process translation request with text input.
96
+
97
+ Args:
98
+ input_text: Text to translate
99
+ target_language: Target language for translation
100
+ max_tokens: Maximum tokens for translation
101
+ temperature: Model temperature
102
+ enable_tts: Whether to generate speech output
103
+ tts_provider_name: TTS provider name
104
+ tts_voice: TTS voice
105
+
106
+ Returns:
107
+ Tuple of (translated_text, detected_language_info, audio_output)
108
+ """
109
+ if not input_text.strip():
110
+ return "", "No input provided", None
111
+
112
+ # Translate the text using the service
113
+ translated_text, detected_info = translation_service.translate_text(
114
+ input_text,
115
+ target_language,
116
+ max_tokens,
117
+ temperature
118
+ )
119
+
120
+ # Generate speech if enabled
121
+ audio_output = None
122
+ if enable_tts and translated_text and not translated_text.startswith("Translation Error"):
123
+ audio_output = synthesize_speech(translated_text, tts_provider_name, tts_voice)
124
+
125
+ return translated_text, detected_info, audio_output
126
+
127
+
128
+ def process_voice_translation(
129
+ audio,
130
+ target_language,
131
+ stt_provider_name,
132
+ max_tokens,
133
+ temperature,
134
+ enable_tts,
135
+ tts_provider_name,
136
+ tts_voice
137
+ ):
138
+ """
139
+ Process translation request with voice input.
140
+
141
+ Args:
142
+ audio: Audio file from microphone
143
+ target_language: Target language for translation
144
+ stt_provider_name: STT provider name
145
+ max_tokens: Maximum tokens for translation
146
+ temperature: Model temperature
147
+ enable_tts: Whether to generate speech output
148
+ tts_provider_name: TTS provider name
149
+ tts_voice: TTS voice
150
+
151
+ Returns:
152
+ Tuple of (input_text, translated_text, detected_language_info, audio_output)
153
+ """
154
+ if audio is None:
155
+ return "", "", "No audio input", None
156
+
157
+ # Transcribe audio
158
+ input_text = transcribe_audio(audio, stt_provider_name)
159
+
160
+ if not input_text or input_text.startswith("[Transcription Error"):
161
+ return input_text, "", "Transcription failed", None
162
+
163
+ # Translate the transcribed text
164
+ translated_text, detected_info, audio_output = process_translation(
165
+ input_text,
166
+ target_language,
167
+ max_tokens,
168
+ temperature,
169
+ enable_tts,
170
+ tts_provider_name,
171
+ tts_voice
172
+ )
173
+
174
+ return input_text, translated_text, detected_info, audio_output
175
+
176
+
177
+ # ============================================================================
178
+ # Gradio UI
179
+ # ============================================================================
180
+
181
+ def create_ui():
182
+ """Create and configure the Gradio UI."""
183
+
184
+ with gr.Blocks(title=UIConfig.APP_TITLE) as demo:
185
+ # Header
186
+ gr.Markdown(f"# {ModelConfig.SHORT_MODEL_NAME}")
187
+ gr.Markdown(f"Powered by **{ModelConfig.MODEL_NAME}** - Supporting 1000+ Languages 🌍")
188
+ gr.Markdown(UIConfig.APP_DESCRIPTION)
189
+
190
+ with gr.Row():
191
+ # Main content column
192
+ with gr.Column(scale=3):
193
+ # Target language selector
194
+ target_language = gr.Dropdown(
195
+ choices=get_popular_languages_list(),
196
+ value=LanguageConfig.DEFAULT_TARGET_LANGUAGE,
197
+ label="Target Language",
198
+ info="Select the language to translate to"
199
+ )
200
+
201
+ # Text input section
202
+ gr.Markdown("### Text Input")
203
+ with gr.Row():
204
+ input_text = gr.Textbox(
205
+ label="Enter text to translate",
206
+ placeholder="Type or paste text in any language...",
207
+ lines=UIConfig.INPUT_TEXT_LINES
208
+ )
209
+
210
+ with gr.Row():
211
+ translate_btn = gr.Button("Translate", variant="primary", scale=2)
212
+ clear_btn = gr.Button("Clear", scale=1)
213
+
214
+ # Voice input section
215
+ gr.Markdown("### Voice Input")
216
+ with gr.Row():
217
+ with gr.Column(scale=3):
218
+ voice_input = gr.Audio(
219
+ sources=["microphone"],
220
+ type="filepath",
221
+ label="Record Audio (Click to speak)"
222
+ )
223
+ with gr.Column(scale=1):
224
+ voice_translate_btn = gr.Button("Translate Voice", variant="secondary")
225
+
226
+ # Output section
227
+ gr.Markdown("### Translation Output")
228
+ detected_language = gr.Textbox(
229
+ label="Detected Source Language",
230
+ interactive=False
231
+ )
232
+
233
+ translated_text = gr.Textbox(
234
+ label="Translated Text",
235
+ lines=UIConfig.OUTPUT_TEXT_LINES,
236
+ interactive=False
237
+ )
238
+
239
+ # Voice output section
240
+ voice_output = gr.Audio(
241
+ label="Translated Audio",
242
+ autoplay=True,
243
+ visible=True
244
+ )
245
+
246
+ # Settings panel
247
+ with gr.Column(scale=1):
248
+ gr.Markdown("### Translation Settings")
249
+
250
+ max_tokens = gr.Slider(
251
+ minimum=UIConfig.MAX_TOKENS_MIN,
252
+ maximum=UIConfig.MAX_TOKENS_MAX,
253
+ value=ModelConfig.DEFAULT_MAX_TOKENS,
254
+ step=UIConfig.MAX_TOKENS_STEP,
255
+ label="Max Tokens",
256
+ info="Maximum length of translation"
257
+ )
258
+
259
+ temperature = gr.Slider(
260
+ minimum=UIConfig.TEMPERATURE_MIN,
261
+ maximum=UIConfig.TEMPERATURE_MAX,
262
+ value=ModelConfig.DEFAULT_TEMPERATURE,
263
+ step=UIConfig.TEMPERATURE_STEP,
264
+ label="Temperature",
265
+ info="Lower = more literal, Higher = more creative"
266
+ )
267
+
268
+ gr.Markdown("### Voice Settings")
269
+
270
+ stt_provider = gr.Dropdown(
271
+ choices=get_available_stt_providers(),
272
+ value=VoiceConfig.DEFAULT_STT_PROVIDER,
273
+ label="Speech-to-Text Provider",
274
+ info="For voice input"
275
+ )
276
+
277
+ enable_voice_output = gr.Checkbox(
278
+ label="Enable Voice Output",
279
+ value=VoiceConfig.DEFAULT_VOICE_OUTPUT_ENABLED,
280
+ info="Generate audio for translations"
281
+ )
282
+
283
+ tts_provider = gr.Dropdown(
284
+ choices=get_available_tts_providers(),
285
+ value=VoiceConfig.DEFAULT_TTS_PROVIDER,
286
+ label="Text-to-Speech Provider",
287
+ info="For audio output"
288
+ )
289
+
290
+ tts_voice = gr.Dropdown(
291
+ choices=VoiceConfig.OPENAI_TTS_VOICES,
292
+ value=VoiceConfig.DEFAULT_TTS_VOICE,
293
+ label="TTS Voice",
294
+ info="Select voice style"
295
+ )
296
+
297
+ # Event handlers
298
+ translate_btn.click(
299
+ process_translation,
300
+ inputs=[
301
+ input_text,
302
+ target_language,
303
+ max_tokens,
304
+ temperature,
305
+ enable_voice_output,
306
+ tts_provider,
307
+ tts_voice
308
+ ],
309
+ outputs=[translated_text, detected_language, voice_output]
310
+ )
311
+
312
+ voice_translate_btn.click(
313
+ process_voice_translation,
314
+ inputs=[
315
+ voice_input,
316
+ target_language,
317
+ stt_provider,
318
+ max_tokens,
319
+ temperature,
320
+ enable_voice_output,
321
+ tts_provider,
322
+ tts_voice
323
+ ],
324
+ outputs=[input_text, translated_text, detected_language, voice_output]
325
+ )
326
+
327
+ clear_btn.click(
328
+ lambda: ("", "", "", None, None),
329
+ outputs=[input_text, translated_text, detected_language, voice_output, voice_input]
330
+ )
331
+
332
+ return demo
333
+
334
+
335
+ # ============================================================================
336
+ # Main Entry Point
337
+ # ============================================================================
338
+
339
+ def main():
340
+ """Main entry point for the translator app."""
341
+ # Create custom theme
342
+ theme = gr.themes.Soft(
343
+ primary_hue=UIConfig.THEME_PRIMARY_HUE,
344
+ secondary_hue=UIConfig.THEME_SECONDARY_HUE,
345
+ neutral_hue=UIConfig.THEME_NEUTRAL_HUE,
346
+ font=UIConfig.THEME_FONT
347
+ )
348
+
349
+ # Create and launch the UI
350
+ demo = create_ui()
351
+ demo.launch(
352
+ share=UIConfig.SHARE_LINK,
353
+ inbrowser=UIConfig.OPEN_IN_BROWSER,
354
+ theme=theme,
355
+ css=UIConfig.CUSTOM_CSS
356
+ )
357
+
358
+
359
+ if __name__ == "__main__":
360
+ main()
uv.lock ADDED
The diff for this file is too large to render. See raw diff
 
voice_handler.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Voice Handler Module
3
+ Provides Speech-to-Text (STT) and Text-to-Speech (TTS) capabilities
4
+ with multiple provider options for different cost/quality tiers.
5
+ """
6
+
7
+ import os
8
+ import tempfile
9
+ from abc import ABC, abstractmethod
10
+ from pathlib import Path
11
+ from typing import Optional, List, Dict
12
+ import asyncio
13
+
14
+ # Import voice processing libraries
15
+ from openai import OpenAI
16
+ import whisper
17
+ import edge_tts
18
+ from gtts import gTTS
19
+
20
+
21
+ # ============================================================================
22
+ # Configuration and Cost Tiers
23
+ # ============================================================================
24
+
25
+ class VoiceConfig:
26
+ """Configuration for voice providers and their characteristics."""
27
+
28
+ # STT Provider definitions
29
+ STT_PROVIDERS = {
30
+ "OpenAI Whisper API": {
31
+ "id": "openai_whisper",
32
+ "cost_tier": "medium",
33
+ "cost_per_minute": 0.006,
34
+ "requires_api_key": True,
35
+ },
36
+ "Local Whisper (Tiny)": {
37
+ "id": "local_whisper_tiny",
38
+ "cost_tier": "free",
39
+ "cost_per_minute": 0.0,
40
+ "requires_api_key": False,
41
+ },
42
+ "Local Whisper (Base)": {
43
+ "id": "local_whisper_base",
44
+ "cost_tier": "free",
45
+ "cost_per_minute": 0.0,
46
+ "requires_api_key": False,
47
+ },
48
+ }
49
+
50
+ # TTS Provider definitions
51
+ TTS_PROVIDERS = {
52
+ "Edge-TTS (Free)": {
53
+ "id": "edge_tts",
54
+ "cost_tier": "free",
55
+ "cost_per_1k_chars": 0.0,
56
+ "requires_api_key": False,
57
+ "voices": [
58
+ "en-US-AriaNeural",
59
+ "en-US-GuyNeural",
60
+ "en-US-JennyNeural",
61
+ "en-GB-SoniaNeural",
62
+ "en-GB-RyanNeural",
63
+ ]
64
+ },
65
+ "OpenAI TTS": {
66
+ "id": "openai_tts",
67
+ "cost_tier": "medium",
68
+ "cost_per_1k_chars": 0.015,
69
+ "requires_api_key": True,
70
+ "voices": ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
71
+ },
72
+ "gTTS (Free)": {
73
+ "id": "gtts",
74
+ "cost_tier": "free",
75
+ "cost_per_1k_chars": 0.0,
76
+ "requires_api_key": False,
77
+ "voices": ["default"]
78
+ },
79
+ }
80
+
81
+ # Default selections
82
+ DEFAULT_STT = "OpenAI Whisper API"
83
+ DEFAULT_TTS = "Edge-TTS (Free)"
84
+ DEFAULT_TTS_VOICE = "en-US-JennyNeural"
85
+
86
+
87
+ # ============================================================================
88
+ # Abstract Base Classes
89
+ # ============================================================================
90
+
91
+ class STTProvider(ABC):
92
+ """Abstract base class for Speech-to-Text providers."""
93
+
94
+ @abstractmethod
95
+ def transcribe(self, audio_path: str) -> str:
96
+ """
97
+ Transcribe audio file to text.
98
+
99
+ Args:
100
+ audio_path: Path to audio file
101
+
102
+ Returns:
103
+ Transcribed text
104
+ """
105
+ pass
106
+
107
+
108
+ class TTSProvider(ABC):
109
+ """Abstract base class for Text-to-Speech providers."""
110
+
111
+ @abstractmethod
112
+ def synthesize(self, text: str, output_path: Optional[str] = None) -> str:
113
+ """
114
+ Synthesize text to speech.
115
+
116
+ Args:
117
+ text: Text to convert to speech
118
+ output_path: Optional path to save audio file
119
+
120
+ Returns:
121
+ Path to generated audio file
122
+ """
123
+ pass
124
+
125
+ @abstractmethod
126
+ def get_available_voices(self) -> List[str]:
127
+ """Get list of available voices for this provider."""
128
+ pass
129
+
130
+
131
+ # ============================================================================
132
+ # STT Provider Implementations
133
+ # ============================================================================
134
+
135
+ class OpenAIWhisperSTT(STTProvider):
136
+ """OpenAI Whisper API implementation."""
137
+
138
+ def __init__(self, api_key: Optional[str] = None):
139
+ self.api_key = api_key or os.getenv("OPENAI_API_KEY")
140
+ if not self.api_key:
141
+ raise ValueError("OpenAI API key is required. Set OPENAI_API_KEY environment variable.")
142
+
143
+ self.client = OpenAI(api_key=self.api_key)
144
+
145
+ def transcribe(self, audio_path: str) -> str:
146
+ """Transcribe audio using OpenAI Whisper API."""
147
+ try:
148
+ with open(audio_path, "rb") as audio_file:
149
+ transcript = self.client.audio.transcriptions.create(
150
+ model="whisper-1",
151
+ file=audio_file
152
+ )
153
+ return transcript.text
154
+ except Exception as e:
155
+ raise Exception(f"OpenAI Whisper transcription failed: {str(e)}")
156
+
157
+
158
+ class LocalWhisperSTT(STTProvider):
159
+ """Local Whisper model implementation."""
160
+
161
+ def __init__(self, model_size: str = "base"):
162
+ """
163
+ Initialize local Whisper model.
164
+
165
+ Args:
166
+ model_size: Model size (tiny, base, small, medium, large)
167
+ """
168
+ self.model_size = model_size
169
+ self.model = None
170
+
171
+ def _load_model(self):
172
+ """Lazy load the model."""
173
+ if self.model is None:
174
+ self.model = whisper.load_model(self.model_size)
175
+
176
+ def transcribe(self, audio_path: str) -> str:
177
+ """Transcribe audio using local Whisper model."""
178
+ self._load_model()
179
+ try:
180
+ result = self.model.transcribe(audio_path)
181
+ return result["text"]
182
+ except Exception as e:
183
+ raise Exception(f"Local Whisper transcription failed: {str(e)}")
184
+
185
+
186
+ # ============================================================================
187
+ # TTS Provider Implementations
188
+ # ============================================================================
189
+
190
+ class EdgeTTSProvider(TTSProvider):
191
+ """Microsoft Edge TTS implementation (free)."""
192
+
193
+ def __init__(self, voice: str = "en-US-JennyNeural"):
194
+ self.voice = voice
195
+
196
+ def synthesize(self, text: str, output_path: Optional[str] = None) -> str:
197
+ """Synthesize speech using Edge TTS."""
198
+
199
+ if output_path is None:
200
+ output_path = os.path.join(tempfile.gettempdir(), f"tts_{os.getpid()}.mp3")
201
+
202
+ try:
203
+ # Edge TTS requires async
204
+ async def _synthesize():
205
+ communicate = edge_tts.Communicate(text, self.voice)
206
+ await communicate.save(output_path)
207
+
208
+ asyncio.run(_synthesize())
209
+ return output_path
210
+ except Exception as e:
211
+ raise Exception(f"Edge TTS synthesis failed: {str(e)}")
212
+
213
+ def get_available_voices(self) -> List[str]:
214
+ """Get available Edge TTS voices."""
215
+ return VoiceConfig.TTS_PROVIDERS["Edge-TTS (Free)"]["voices"]
216
+
217
+
218
+ class OpenAITTSProvider(TTSProvider):
219
+ """OpenAI TTS implementation."""
220
+
221
+ def __init__(self, voice: str = "nova", api_key: Optional[str] = None):
222
+ self.voice = voice
223
+ self.api_key = api_key or os.getenv("OPENAI_API_KEY")
224
+ if not self.api_key:
225
+ raise ValueError("OpenAI API key is required. Set OPENAI_API_KEY environment variable.")
226
+
227
+ self.client = OpenAI(api_key=self.api_key)
228
+
229
+ def synthesize(self, text: str, output_path: Optional[str] = None) -> str:
230
+ """Synthesize speech using OpenAI TTS."""
231
+ if output_path is None:
232
+ output_path = os.path.join(tempfile.gettempdir(), f"tts_{os.getpid()}.mp3")
233
+
234
+ try:
235
+ response = self.client.audio.speech.create(
236
+ model="tts-1",
237
+ voice=self.voice,
238
+ input=text
239
+ )
240
+ response.stream_to_file(output_path)
241
+ return output_path
242
+ except Exception as e:
243
+ raise Exception(f"OpenAI TTS synthesis failed: {str(e)}")
244
+
245
+ def get_available_voices(self) -> List[str]:
246
+ """Get available OpenAI TTS voices."""
247
+ return VoiceConfig.TTS_PROVIDERS["OpenAI TTS"]["voices"]
248
+
249
+
250
+ class GTTSProvider(TTSProvider):
251
+ """Google TTS implementation (free, basic quality)."""
252
+
253
+ def __init__(self, voice: str = "default"):
254
+ self.voice = voice
255
+
256
+ def synthesize(self, text: str, output_path: Optional[str] = None) -> str:
257
+ """Synthesize speech using gTTS."""
258
+
259
+ if output_path is None:
260
+ output_path = os.path.join(tempfile.gettempdir(), f"tts_{os.getpid()}.mp3")
261
+
262
+ try:
263
+ tts = gTTS(text=text, lang='en')
264
+ tts.save(output_path)
265
+ return output_path
266
+ except Exception as e:
267
+ raise Exception(f"gTTS synthesis failed: {str(e)}")
268
+
269
+ def get_available_voices(self) -> List[str]:
270
+ """Get available gTTS voices."""
271
+ return VoiceConfig.TTS_PROVIDERS["gTTS (Free)"]["voices"]
272
+
273
+
274
+ # ============================================================================
275
+ # Factory Functions
276
+ # ============================================================================
277
+
278
+ def create_stt_provider(provider_name: str) -> STTProvider:
279
+ """
280
+ Create an STT provider instance.
281
+
282
+ Args:
283
+ provider_name: Name of the provider (from VoiceConfig.STT_PROVIDERS)
284
+
285
+ Returns:
286
+ STTProvider instance
287
+ """
288
+ provider_id = VoiceConfig.STT_PROVIDERS[provider_name]["id"]
289
+
290
+ if provider_id == "openai_whisper":
291
+ return OpenAIWhisperSTT()
292
+ elif provider_id == "local_whisper_tiny":
293
+ return LocalWhisperSTT(model_size="tiny")
294
+ elif provider_id == "local_whisper_base":
295
+ return LocalWhisperSTT(model_size="base")
296
+ else:
297
+ raise ValueError(f"Unknown STT provider: {provider_name}")
298
+
299
+
300
+ def create_tts_provider(provider_name: str, voice: Optional[str] = None) -> TTSProvider:
301
+ """
302
+ Create a TTS provider instance.
303
+
304
+ Args:
305
+ provider_name: Name of the provider (from VoiceConfig.TTS_PROVIDERS)
306
+ voice: Optional voice name
307
+
308
+ Returns:
309
+ TTSProvider instance
310
+ """
311
+ provider_id = VoiceConfig.TTS_PROVIDERS[provider_name]["id"]
312
+ provider_info = VoiceConfig.TTS_PROVIDERS[provider_name]
313
+
314
+ # Use default voice if not specified
315
+ if voice is None:
316
+ voice = provider_info["voices"][0]
317
+
318
+ if provider_id == "edge_tts":
319
+ return EdgeTTSProvider(voice=voice)
320
+ elif provider_id == "openai_tts":
321
+ return OpenAITTSProvider(voice=voice)
322
+ elif provider_id == "gtts":
323
+ return GTTSProvider(voice=voice)
324
+ else:
325
+ raise ValueError(f"Unknown TTS provider: {provider_name}")
326
+
327
+
328
+ def get_available_stt_providers() -> List[str]:
329
+ """Get list of available STT provider names."""
330
+ return list(VoiceConfig.STT_PROVIDERS.keys())
331
+
332
+
333
+ def get_available_tts_providers() -> List[str]:
334
+ """Get list of available TTS provider names."""
335
+ return list(VoiceConfig.TTS_PROVIDERS.keys())
336
+
337
+
338
+ def get_voices_for_provider(provider_name: str) -> List[str]:
339
+ """Get available voices for a TTS provider."""
340
+ if provider_name not in VoiceConfig.TTS_PROVIDERS:
341
+ return []
342
+ return VoiceConfig.TTS_PROVIDERS[provider_name]["voices"]
343
+
344
+
345
+ def get_provider_info(provider_name: str, provider_type: str = "tts") -> Dict:
346
+ """
347
+ Get information about a provider.
348
+
349
+ Args:
350
+ provider_name: Name of the provider
351
+ provider_type: "stt" or "tts"
352
+
353
+ Returns:
354
+ Provider information dictionary
355
+ """
356
+ if provider_type == "tts":
357
+ return VoiceConfig.TTS_PROVIDERS.get(provider_name, {})
358
+ else:
359
+ return VoiceConfig.STT_PROVIDERS.get(provider_name, {})