Spaces:
Sleeping
Sleeping
| """ | |
| Command Line Interface for Speech Translation System | |
| This module provides a user-friendly CLI for the speech translation system. | |
| """ | |
| import click | |
| import logging | |
| import sys | |
| from pathlib import Path | |
| from typing import Optional, List | |
| import json | |
| from rich.console import Console | |
| from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeRemainingColumn | |
| from rich.table import Table | |
| from rich.panel import Panel | |
| from rich import print as rprint | |
| from ..pipeline.main_pipeline import create_speech_translator, SpeechTranslator | |
| from ..config import SUPPORTED_LANGUAGES, WHISPER_MODEL_SIZE, DEFAULT_TRANSLATION_SERVICE, TTS_MODEL | |
| # Initialize rich console | |
| console = Console() | |
| def setup_logging(verbose: bool = False): | |
| """Setup logging configuration.""" | |
| level = logging.DEBUG if verbose else logging.INFO | |
| logging.basicConfig( | |
| level=level, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.FileHandler('speech_translation.log'), | |
| logging.StreamHandler() | |
| ] | |
| ) | |
| def cli(ctx, verbose): | |
| """Speech Translation System with Voice Cloning""" | |
| ctx.ensure_object(dict) | |
| ctx.obj['verbose'] = verbose | |
| setup_logging(verbose) | |
| def translate(ctx, input_audio, voice_sample, source_lang, target_lang, output, | |
| speech_model, translation_engine, tts_model, device): | |
| """Translate audio file with voice cloning.""" | |
| try: | |
| # Validate language codes | |
| if target_lang not in SUPPORTED_LANGUAGES: | |
| console.print(f"[red]Error: Unsupported target language '{target_lang}'[/red]") | |
| console.print("Supported languages:", list(SUPPORTED_LANGUAGES.keys())) | |
| sys.exit(1) | |
| if source_lang and source_lang not in SUPPORTED_LANGUAGES: | |
| console.print(f"[red]Error: Unsupported source language '{source_lang}'[/red]") | |
| sys.exit(1) | |
| # Generate output path if not provided | |
| if not output: | |
| input_path = Path(input_audio) | |
| output = input_path.parent / f"{input_path.stem}_translated_{target_lang}.wav" | |
| console.print(Panel.fit(f"ποΈ Speech Translation System", style="bold blue")) | |
| console.print(f"π Input: {input_audio}") | |
| console.print(f"π― Voice Sample: {voice_sample}") | |
| console.print(f"π Translation: {source_lang or 'auto'} β {target_lang}") | |
| console.print(f"πΎ Output: {output}") | |
| # Progress tracking | |
| progress_messages = [] | |
| def progress_callback(message): | |
| progress_messages.append(message) | |
| console.print(f"β³ {message}") | |
| # Initialize translator | |
| console.print("\\nπ Initializing translation system...") | |
| translator = create_speech_translator( | |
| speech_model=speech_model, | |
| translation_engine=translation_engine, | |
| tts_model=tts_model, | |
| device=device, | |
| initialize=False | |
| ) | |
| translator.progress_callback = progress_callback | |
| translator.initialize() | |
| # Perform translation | |
| console.print("\\nπ Starting translation process...") | |
| with Progress( | |
| SpinnerColumn(), | |
| TextColumn("[progress.description]{task.description}"), | |
| BarColumn(), | |
| TimeRemainingColumn(), | |
| console=console, | |
| ) as progress: | |
| task = progress.add_task("Translating...", total=100) | |
| result = translator.translate_audio( | |
| input_audio=input_audio, | |
| source_lang=source_lang, | |
| target_lang=target_lang, | |
| voice_sample=voice_sample, | |
| output_path=output, | |
| return_intermediate=True | |
| ) | |
| # Display results | |
| if result['success']: | |
| console.print("\\nβ [green]Translation completed successfully![/green]") | |
| # Create results table | |
| table = Table(title="Translation Results") | |
| table.add_column("Property", style="cyan") | |
| table.add_column("Value", style="white") | |
| table.add_row("Original Text", result['original_text'][:100] + "..." if len(result['original_text']) > 100 else result['original_text']) | |
| table.add_row("Translated Text", result['translated_text'][:100] + "..." if len(result['translated_text']) > 100 else result['translated_text']) | |
| table.add_row("Source Language", result['source_language']) | |
| table.add_row("Target Language", result['target_language']) | |
| table.add_row("Processing Time", f"{result['processing_time']:.2f} seconds") | |
| table.add_row("Audio Duration", f"{result['audio_duration']:.2f} seconds") | |
| table.add_row("Output File", str(result['output_audio'])) | |
| console.print(table) | |
| else: | |
| console.print(f"\\nβ [red]Translation failed: {result['error']}[/red]") | |
| sys.exit(1) | |
| except Exception as e: | |
| console.print(f"\\nπ₯ [red]Unexpected error: {str(e)}[/red]") | |
| if ctx.obj['verbose']: | |
| console.print_exception() | |
| sys.exit(1) | |
| def text_to_speech(text, voice_sample, source_lang, target_lang, output, tts_model, device): | |
| """Translate text and generate speech with voice cloning.""" | |
| try: | |
| # Validate inputs | |
| if not output: | |
| output = f"translated_speech_{target_lang}.wav" | |
| console.print(Panel.fit("π Text to Speech Translation", style="bold green")) | |
| console.print(f"π Text: {text}") | |
| console.print(f"π― Voice Sample: {voice_sample}") | |
| console.print(f"π Translation: {source_lang} β {target_lang}") | |
| # Initialize translator | |
| translator = create_speech_translator(tts_model=tts_model, device=device) | |
| # Perform translation and speech generation | |
| result = translator.translate_text_with_voice( | |
| text=text, | |
| source_lang=source_lang, | |
| target_lang=target_lang, | |
| voice_sample=voice_sample, | |
| output_path=output | |
| ) | |
| if result['success']: | |
| console.print("\\nβ [green]Text translation completed![/green]") | |
| console.print(f"π΅ Audio saved to: {result['output_audio']}") | |
| else: | |
| console.print(f"\\nβ [red]Translation failed: {result['error']}[/red]") | |
| except Exception as e: | |
| console.print(f"\\nπ₯ [red]Error: {str(e)}[/red]") | |
| sys.exit(1) | |
| def batch(audio_files, voice_sample, target_lang, output_dir, speech_model, device): | |
| """Batch translate multiple audio files.""" | |
| try: | |
| if not output_dir: | |
| output_dir = Path.cwd() / "translated_batch" | |
| output_dir = Path(output_dir) | |
| output_dir.mkdir(exist_ok=True) | |
| console.print(Panel.fit("π¦ Batch Translation", style="bold yellow")) | |
| console.print(f"π Files: {len(audio_files)} audio files") | |
| console.print(f"π― Voice Sample: {voice_sample}") | |
| console.print(f"π Target Language: {target_lang}") | |
| console.print(f"πΎ Output Directory: {output_dir}") | |
| # Initialize translator | |
| translator = create_speech_translator(speech_model=speech_model, device=device) | |
| # Perform batch translation | |
| with Progress(console=console) as progress: | |
| task = progress.add_task("Processing batch...", total=len(audio_files)) | |
| result = translator.batch_translate_audio( | |
| audio_files=list(audio_files), | |
| target_lang=target_lang, | |
| voice_sample=voice_sample, | |
| output_dir=output_dir | |
| ) | |
| progress.update(task, completed=len(audio_files)) | |
| # Display results | |
| console.print(f"\\nπ Batch processing completed!") | |
| console.print(f"β Successful: {result['successful']}") | |
| console.print(f"β Failed: {result['failed']}") | |
| if result['failed_files']: | |
| console.print("\\nπ¨ Failed files:") | |
| for failed in result['failed_files']: | |
| console.print(f" - {failed['file']}: {failed['error']}") | |
| except Exception as e: | |
| console.print(f"\\nπ₯ [red]Error: {str(e)}[/red]") | |
| sys.exit(1) | |
| def register_speaker(speaker_name, voice_samples, session_dir): | |
| """Register a speaker voice for reuse.""" | |
| try: | |
| console.print(Panel.fit(f"π€ Registering Speaker: {speaker_name}", style="bold purple")) | |
| # Initialize voice cloner | |
| from ..voice_cloning.voice_cloner import create_voice_cloner | |
| cloner = create_voice_cloner() | |
| # Register speaker | |
| result = cloner.register_voice(speaker_name, list(voice_samples)) | |
| console.print("\\nβ [green]Speaker registered successfully![/green]") | |
| console.print(f"π€ Speaker: {result['speaker_name']}") | |
| console.print(f"π΅ Samples: {result['num_samples']}") | |
| console.print(f"β±οΈ Duration: {result['total_duration']:.1f} seconds") | |
| # Save to session if specified | |
| if session_dir: | |
| session_path = Path(session_dir) | |
| cloner.save_speaker_data(session_path) | |
| console.print(f"πΎ Saved to session: {session_path}") | |
| except Exception as e: | |
| console.print(f"\\nπ₯ [red]Error: {str(e)}[/red]") | |
| sys.exit(1) | |
| def languages(): | |
| """List supported languages.""" | |
| console.print(Panel.fit("π Supported Languages", style="bold blue")) | |
| table = Table() | |
| table.add_column("Code", style="cyan") | |
| table.add_column("Language", style="white") | |
| for code, name in SUPPORTED_LANGUAGES.items(): | |
| table.add_row(code, name) | |
| console.print(table) | |
| def info(speech_model, translation_engine, tts_model, device): | |
| """Show system information and status.""" | |
| try: | |
| console.print(Panel.fit("βΉοΈ System Information", style="bold cyan")) | |
| # Create translator to get system info | |
| translator = create_speech_translator( | |
| speech_model=speech_model, | |
| translation_engine=translation_engine, | |
| tts_model=tts_model, | |
| device=device, | |
| initialize=False | |
| ) | |
| info_data = translator.get_system_info() | |
| # Configuration table | |
| config_table = Table(title="Configuration") | |
| config_table.add_column("Component", style="cyan") | |
| config_table.add_column("Setting", style="white") | |
| for key, value in info_data['configuration'].items(): | |
| config_table.add_row(key.replace('_', ' ').title(), str(value)) | |
| console.print(config_table) | |
| # Component status | |
| status_table = Table(title="Component Status") | |
| status_table.add_column("Component", style="cyan") | |
| status_table.add_column("Status", style="white") | |
| for component, loaded in info_data['components_loaded'].items(): | |
| status = "β Loaded" if loaded else "β Not Loaded" | |
| status_table.add_row(component.replace('_', ' ').title(), status) | |
| console.print(status_table) | |
| # Statistics | |
| if any(info_data['statistics'].values()): | |
| stats_table = Table(title="Usage Statistics") | |
| stats_table.add_column("Metric", style="cyan") | |
| stats_table.add_column("Value", style="white") | |
| for key, value in info_data['statistics'].items(): | |
| stats_table.add_row(key.replace('_', ' ').title(), str(value)) | |
| console.print(stats_table) | |
| except Exception as e: | |
| console.print(f"\\nπ₯ [red]Error getting system info: {str(e)}[/red]") | |
| def save_session(session_path): | |
| """Save current session including registered speakers.""" | |
| try: | |
| # Create a basic translator and save session | |
| translator = create_speech_translator(initialize=False) | |
| translator.save_session(session_path) | |
| console.print(f"πΎ Session saved to: {session_path}") | |
| except Exception as e: | |
| console.print(f"π₯ [red]Error saving session: {str(e)}[/red]") | |
| def load_session(session_path): | |
| """Load previous session.""" | |
| try: | |
| translator = create_speech_translator(initialize=False) | |
| translator.load_session(session_path) | |
| console.print(f"π Session loaded from: {session_path}") | |
| # Show loaded speakers | |
| speakers = translator.get_registered_speakers() | |
| if speakers: | |
| console.print(f"π₯ Registered speakers: {', '.join(speakers)}") | |
| except Exception as e: | |
| console.print(f"π₯ [red]Error loading session: {str(e)}[/red]") | |
| def main(): | |
| """Main CLI entry point.""" | |
| try: | |
| cli() | |
| except KeyboardInterrupt: | |
| console.print("\\nπ Operation cancelled by user") | |
| sys.exit(1) | |
| except Exception as e: | |
| console.print(f"\\nπ₯ [red]Unexpected error: {str(e)}[/red]") | |
| sys.exit(1) | |
| if __name__ == '__main__': | |
| main() |