"""
Production-Ready FastAPI Backend for DeBERTa Emotion Detection
Optimized for 710MB model with efficient tokenization and inference
"""

from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field, field_validator
from contextlib import asynccontextmanager
import torch
import torch.nn as nn
from transformers import DebertaV2Model, DebertaV2Tokenizer
import uvicorn
import logging
import time
from typing import Dict, List, Optional
import gc
import psutil
import os
import json
import httpx
from dotenv import load_dotenv
import warnings

# Suppress HuggingFace deprecation warning
warnings.filterwarnings('ignore', category=FutureWarning, module='huggingface_hub')

# ==================== LOGGING SETUP ====================
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# ==================== ENVIRONMENT SETUP ====================
load_dotenv()  # Load from .env file
TMDB_API_KEY = os.getenv('TMDB_API_KEY', '')
TMDB_BASE_URL = "https://api.themoviedb.org/3"
TMDB_IMAGE_BASE_URL = "https://image.tmdb.org/t/p/w500"

# ==================== CONFIGURATION ====================
class Config:
    MODEL_PATH = "./../../models/"  # Path to your saved model
    MAX_LENGTH = 128
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    BATCH_SIZE = 8  # For batch predictions
    MAX_TEXT_LENGTH = 5000  # Character limit for input
    
    EMOTION_CLASSES = ['anger', 'fear', 'joy', 'love', 'neutral', 'sadness', 'surprise']
    
    # Emotion to Genre Mapping (4 genres per emotion)
    EMOTION_GENRE_MAP = {
        'anger': ['Action', 'Crime', 'Thriller', 'Revenge-Drama'],
        'fear': ['Horror', 'Thriller', 'Mystery', 'Supernatural'],
        'joy': ['Comedy', 'Adventure', 'Family', 'Animation', 'Musical'],
        'love': ['Romance', 'Rom-Com', 'Emotional Drama', 'Fantasy'],
        'neutral': ['Documentary', 'Drama', 'Biography', 'Slice-of-Life'],
        'sadness': ['Drama', 'Romance', 'Indie', 'Healing-Stories'],
        'surprise': ['Mystery', 'Sci-Fi', 'Fantasy', 'Twist-Thriller']
    }
    
    # Performance settings
    TORCH_THREADS = 4
    USE_HALF_PRECISION = False  # FP16 for GPU

config = Config()

# Set torch threads for CPU inference
torch.set_num_threads(config.TORCH_THREADS)

# ==================== MODEL DEFINITION ====================
class DeBERTaEmotionClassifier(nn.Module):
    """DeBERTa model for emotion classification"""
    def __init__(self, config_dict: Dict, num_labels: int):
        super().__init__()
        from transformers import DebertaV2Config
        deberta_config = DebertaV2Config(**config_dict)
        self.deberta = DebertaV2Model(deberta_config)
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(deberta_config.hidden_size, num_labels)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
        sequence_output = outputs.last_hidden_state
        cls_output = sequence_output[:, 0, :]
        cls_output = self.dropout(cls_output)
        logits = self.classifier(cls_output)
        return logits

# ==================== MODEL MANAGER ====================
class ModelManager:
    """Singleton class to manage model loading and inference"""
    _instance = None
    
    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
            cls._instance.initialized = False
        return cls._instance
    
    def __init__(self):
        if not self.initialized:
            self.model = None
            self.tokenizer = None
            self.initialized = True
    
    def load_model(self):
        """Load model and tokenizer with error handling"""
        try:
            logger.info(f"Loading model on device: {config.DEVICE}")
            start_time = time.time()
            
            # Load config from local file
            logger.info("Loading model config...")
            config_path = os.path.join(config.MODEL_PATH, "config.json")
            if not os.path.exists(config_path):
                raise FileNotFoundError(f"Model config not found at {config_path}")
            
            with open(config_path, 'r') as f:
                model_config = json.load(f)
            
            # Load tokenizer from local files or fallback to HuggingFace
            logger.info("Loading tokenizer...")
            tokenizer_files = {
                'vocab.json': os.path.join(config.MODEL_PATH, 'vocab.json'),
                'merges.txt': os.path.join(config.MODEL_PATH, 'merges.txt')
            }
            if os.path.exists(tokenizer_files['vocab.json']):
                logger.info("Loading tokenizer from local files...")
                self.tokenizer = DebertaV2Tokenizer(vocab_file=tokenizer_files['vocab.json'])
            else:
                logger.info("Downloading tokenizer from HuggingFace...")
                self.tokenizer = DebertaV2Tokenizer.from_pretrained("microsoft/deberta-v3-base")
            
            # Load model architecture from config
            logger.info("Loading model architecture from config...")
            self.model = DeBERTaEmotionClassifier(config_dict=model_config, num_labels=len(config.EMOTION_CLASSES))
            
            # Load trained weights: prefer full model file (safetensors) then fall back to checkpoint
            safetensors_path = os.path.join(config.MODEL_PATH, "model.safetensors")
            classifier_path = os.path.join(config.MODEL_PATH, "classifier.pt")

            # Helper to map keys that were saved without proper prefixes
            def _normalize_state_dict_keys(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
                """
                Normalize state dict keys to match the model architecture:
                - Add 'deberta.' prefix to encoder/embedding keys if missing
                - Add 'classifier.' prefix to weight/bias keys if they're classifier weights
                """
                if not state_dict:
                    return state_dict
                
                mapped = {}
                first_key = next(iter(state_dict.keys()), None)
                
                for k, v in state_dict.items():
                    # Check if this is a classifier weight/bias (2D/1D tensor from Linear layer)
                    # Classifier: weight [7, 768], bias [7]
                    if k in ['weight', 'bias']:
                        # Verify this is classifier-sized (num_classes=7, hidden_size=768)
                        if k == 'weight' and len(v.shape) == 2 and v.shape[0] == 7 and v.shape[1] == 768:
                            mapped['classifier.weight'] = v
                        elif k == 'bias' and len(v.shape) == 1 and v.shape[0] == 7:
                            mapped['classifier.bias'] = v
                        else:
                            mapped[k] = v
                    # Add deberta prefix to encoder/embedding keys if missing
                    elif k.startswith("deberta."):
                        # Already has prefix
                        mapped[k] = v
                    elif k.startswith("embeddings.") or k.startswith("encoder.") or k.startswith("rel_embeddings") or k.startswith("LayerNorm") or k.startswith("word_embeddings"):
                        # Encoder/embedding key without prefix
                        mapped[f"deberta.{k}"] = v
                    else:
                        # Keep as-is (dropout, etc.)
                        mapped[k] = v
                
                return mapped

            # 1) Try safetensors full-model file (fast and safe if present)
            if os.path.exists(safetensors_path):
                try:
                    logger.info("Loading full trained model from safetensors...")
                    try:
                        from safetensors.torch import load_file
                    except Exception as ie:
                        logger.error("safetensors package not available: %s", ie)
                        raise

                    state = load_file(safetensors_path)
                    # state is a dict of tensors; adjust keys if necessary
                    mapped = _normalize_state_dict_keys(state)
                    load_res = self.model.load_state_dict(mapped, strict=False)
                    logger.info("Loaded safetensors model (missing: %s, unexpected: %s)", getattr(load_res, 'missing_keys', []), getattr(load_res, 'unexpected_keys', []))
                except Exception as e:
                    logger.error("Error loading safetensors model: %s", e, exc_info=True)
                    # fall through to try checkpoint

            # 2) Fallback: try classic PyTorch checkpoint
            if os.path.exists(classifier_path):
                try:
                    logger.info("Loading trained checkpoint (torch) ...")
                    # In recent PyTorch versions the default weights_only may block some globals. Use weights_only=False
                    checkpoint = torch.load(classifier_path, map_location=config.DEVICE, weights_only=False)

                    # checkpoint might be a dict containing different keys depending on how it was saved
                    if isinstance(checkpoint, dict):
                        # Common key names used in training scripts
                        if 'model_state_dict' in checkpoint:
                            sd = checkpoint['model_state_dict']
                        elif 'state_dict' in checkpoint:
                            sd = checkpoint['state_dict']
                        elif 'classifier_state_dict' in checkpoint or 'dropout_state_dict' in checkpoint:
                            # old style: only classifier saved
                            if 'classifier_state_dict' in checkpoint:
                                try:
                                    self.model.classifier.load_state_dict(checkpoint['classifier_state_dict'])
                                except Exception:
                                    # try flexible load with key normalization
                                    normalized = _normalize_state_dict_keys(checkpoint['classifier_state_dict'])
                                    self.model.load_state_dict(normalized, strict=False)
                            if 'dropout_state_dict' in checkpoint:
                                try:
                                    self.model.dropout.load_state_dict(checkpoint['dropout_state_dict'])
                                except Exception:
                                    logger.warning('Could not load dropout state dict')
                            sd = None
                        else:
                            sd = checkpoint

                        if sd:
                            # sd may contain keys without proper prefixes
                            mapped = _normalize_state_dict_keys(sd)
                            load_res = self.model.load_state_dict(mapped, strict=False)
                            logger.info("Loaded checkpoint (missing: %s, unexpected: %s)", getattr(load_res, 'missing_keys', []), getattr(load_res, 'unexpected_keys', []))
                    else:
                        # Not a dict: cannot handle
                        logger.warning("Checkpoint loaded but is not a dict, skipping")

                except Exception as e:
                    logger.error("Error loading checkpoint: %s", e, exc_info=True)
                    raise
            else:
                logger.warning(f"No trained model found at {safetensors_path} or {classifier_path}, using base model")
            
            # Move to device
            self.model.to(config.DEVICE)
            self.model.eval()
            
            # Apply half precision for GPU
            if config.USE_HALF_PRECISION:
                self.model.half()
                logger.info("Applied FP16 precision")
            
            # Optimize for inference
            if config.DEVICE == "cuda":
                torch.backends.cudnn.benchmark = True
            
            load_time = time.time() - start_time
            logger.info(f"Model loaded successfully in {load_time:.2f}s")
            logger.info(f"Memory usage: {psutil.Process().memory_info().rss / 1024 ** 2:.2f} MB")
            
            return True
            
        except FileNotFoundError as e:
            logger.error(f"Model files not found: {e}")
            raise HTTPException(status_code=500, detail=f"Model files not found: {str(e)}")
        except Exception as e:
            logger.error(f"Error loading model: {e}", exc_info=True)
            raise HTTPException(status_code=500, detail=f"Failed to load model: {str(e)}")
    
    @torch.no_grad()
    def predict(self, text: str) -> Dict:
        """Run inference on input text"""
        try:
            if not self.model or not self.tokenizer:
                raise ValueError("Model not loaded")
            
            start_time = time.time()
            
            # Tokenize
            encoding = self.tokenizer(
                text,
                add_special_tokens=True,
                max_length=config.MAX_LENGTH,
                padding='max_length',
                truncation=True,
                return_attention_mask=True,
                return_tensors='pt'
            )
            
            input_ids = encoding['input_ids'].to(config.DEVICE)
            attention_mask = encoding['attention_mask'].to(config.DEVICE)
            
            # Apply half precision if enabled
            if config.USE_HALF_PRECISION:
                input_ids = input_ids.half().long()  # Convert back to long for embeddings
            
            # Inference
            with torch.cuda.amp.autocast(enabled=config.USE_HALF_PRECISION):
                logits = self.model(input_ids, attention_mask)
            
            # Get probabilities
            probs = torch.softmax(logits, dim=-1)
            confidence, predicted_class = torch.max(probs, dim=-1)
            
            # Convert to CPU and numpy
            probs_np = probs.cpu().float().tolist()[0]
            predicted_idx = predicted_class.item()
            confidence_score = confidence.item()
            
            # Create emotion probability dict
            emotion_probs = {
                emotion: float(probs_np[i]) 
                for i, emotion in enumerate(config.EMOTION_CLASSES)
            }
            
            inference_time = time.time() - start_time
            
            return {
                "emotion": config.EMOTION_CLASSES[predicted_idx],
                "confidence": confidence_score,
                "all_probabilities": emotion_probs,
                "inference_time_ms": round(inference_time * 1000, 2)
            }
            
        except Exception as e:
            logger.error(f"Prediction error: {e}", exc_info=True)
            raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
    
    def cleanup(self):
        """Cleanup resources"""
        try:
            if self.model:
                del self.model
            if self.tokenizer:
                del self.tokenizer
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            logger.info("Model resources cleaned up")
        except Exception as e:
            logger.error(f"Cleanup error: {e}")

# ==================== LIFESPAN MANAGEMENT ====================
model_manager = ModelManager()

@asynccontextmanager
async def lifespan(app: FastAPI):
    """Manage application lifecycle"""
    # Startup
    logger.info("Starting application...")
    try:
        model_manager.load_model()
        logger.info("Application ready")
    except Exception as e:
        logger.error(f"Startup failed: {e}")
        raise
    
    yield
    
    # Shutdown
    logger.info("Shutting down application...")
    model_manager.cleanup()
    logger.info("Application stopped")

# ==================== FASTAPI APP ====================
app = FastAPI(
    title="Emotion Detection API",
    description="DeBERTa v3 based emotion detection for 7 emotion classes",
    version="1.0.0",
    lifespan=lifespan
)

# CORS middleware
FRONTEND_URL = 'https://moodflix-ai-nu.vercel.app'

app.add_middleware(
    CORSMiddleware,
    allow_origins=[
        "http://localhost:3000",  # Local development
        "http://localhost:8000",  # Local backend
        FRONTEND_URL,  # Production frontend
        "https://*.vercel.app",  # All Vercel preview deployments
    ],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# ==================== REQUEST/RESPONSE MODELS ====================
class PredictionRequest(BaseModel):
    text: str = Field(..., min_length=1, max_length=config.MAX_TEXT_LENGTH)
    
    @field_validator('text')
    @classmethod
    def validate_text(cls, v):
        if not v or not v.strip():
            raise ValueError('Text cannot be empty or whitespace only')
        return v.strip()

class PredictionResponse(BaseModel):
    emotion: str
    confidence: float
    all_probabilities: Dict[str, float]
    inference_time_ms: float
    text_length: int

class HealthResponse(BaseModel):
    status: str
    device: str
    model_loaded: bool
    memory_mb: float

class BatchPredictionRequest(BaseModel):
    texts: List[str] = Field(..., min_length=1, max_length=10)
    
    @field_validator('texts')
    @classmethod
    def validate_texts(cls, v):
        cleaned = [t.strip() for t in v if t and t.strip()]
        if not cleaned:
            raise ValueError('At least one valid text required')
        if len(cleaned) > 10:
            raise ValueError('Maximum 10 texts allowed per batch')
        return cleaned

# ==================== MOVIE MODELS ====================
class MovieItem(BaseModel):
    id: int
    title: str
    poster_path: Optional[str] = None
    vote_average: float
    release_date: Optional[str] = None

class GenreMovies(BaseModel):
    genre: str
    movies: List[MovieItem]

class RecommendationResponse(BaseModel):
    emotion: str
    confidence: float
    recommendations: List[GenreMovies]

# ==================== ENDPOINTS ====================
@app.get("/", response_model=Dict)
async def root():
    """Root endpoint"""
    return {
        "message": "Emotion Detection API",
        "version": "1.0.0",
        "endpoints": {
            "predict": "/predict",
            "batch_predict": "/batch_predict",
            "health": "/health",
            "emotions": "/emotions"
        }
    }

@app.get("/health", response_model=HealthResponse)
async def health_check():
    """Health check endpoint"""
    try:
        memory_mb = psutil.Process().memory_info().rss / 1024 ** 2
        return HealthResponse(
            status="healthy",
            device=config.DEVICE,
            model_loaded=model_manager.model is not None,
            memory_mb=round(memory_mb, 2)
        )
    except Exception as e:
        logger.error(f"Health check failed: {e}")
        raise HTTPException(status_code=500, detail="Health check failed")

@app.get("/emotions", response_model=Dict)
async def get_emotions():
    """Get list of supported emotions"""
    return {
        "emotions": config.EMOTION_CLASSES,
        "count": len(config.EMOTION_CLASSES)
    }

@app.post("/predict", response_model=PredictionResponse)
async def predict_emotion(request: PredictionRequest):
    """Predict emotion for single text"""
    try:
        logger.info(f"Prediction request: {len(request.text)} chars")
        
        result = model_manager.predict(request.text)
        
        return PredictionResponse(
            emotion=result["emotion"],
            confidence=result["confidence"],
            all_probabilities=result["all_probabilities"],
            inference_time_ms=result["inference_time_ms"],
            text_length=len(request.text)
        )
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Prediction endpoint error: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/batch_predict")
async def batch_predict_emotion(request: BatchPredictionRequest):
    """Predict emotions for multiple texts"""
    try:
        logger.info(f"Batch prediction request: {len(request.texts)} texts")
        
        results = []
        for text in request.texts:
            result = model_manager.predict(text)
            results.append({
                "text": text[:100] + "..." if len(text) > 100 else text,
                "emotion": result["emotion"],
                "confidence": result["confidence"],
                "all_probabilities": result["all_probabilities"]
            })
        
        return {
            "count": len(results),
            "predictions": results
        }
        
    except Exception as e:
        logger.error(f"Batch prediction error: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/recommendations", response_model=RecommendationResponse)
async def get_movie_recommendations(request: PredictionRequest):
    """
    Predict emotion and fetch movie recommendations based on detected emotion.
    Maps emotion to genres and fetches 12 most popular movies per genre from TMDB.
    """
    try:
        logger.info(f"Recommendation request: {len(request.text)} chars")
        
        # 1. Detect emotion
        emotion_result = model_manager.predict(request.text)
        detected_emotion = emotion_result["emotion"]
        confidence = emotion_result["confidence"]
        
        logger.info(f"Detected emotion: {detected_emotion} (confidence: {confidence})")
        
        # 2. Get mapped genres for this emotion
        genres = config.EMOTION_GENRE_MAP.get(detected_emotion, [])
        if not genres:
            raise HTTPException(status_code=400, detail=f"No genres mapped for emotion: {detected_emotion}")
        
        # 3. Fetch movies for these genres
        movies_by_genre = await fetch_movies_by_genres(genres, limit=12)
        
        # 4. Format response
        recommendations = []
        for genre in genres:
            if genre in movies_by_genre:
                recommendations.append(GenreMovies(
                    genre=genre,
                    movies=movies_by_genre[genre]
                ))
        
        return RecommendationResponse(
            emotion=detected_emotion,
            confidence=confidence,
            recommendations=recommendations
        )
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Recommendation error: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Failed to get recommendations: {str(e)}")

# ==================== ERROR HANDLERS ====================
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
    """Global exception handler"""
    logger.error(f"Unhandled exception: {exc}", exc_info=True)
    return JSONResponse(
        status_code=500,
        content={
            "error": "Internal server error",
            "detail": str(exc),
            "path": str(request.url)
        }
    )

# ==================== TMDB MOVIE FETCHING ====================
async def fetch_movies_by_genres(genres: List[str], limit: int = 12) -> Dict[str, List[MovieItem]]:
    """
    Fetch movies from TMDB API for given genres.
    Returns dict with genre as key and list of MovieItem as value.
    """
    if not TMDB_API_KEY:
        logger.error("TMDB_API_KEY not configured")
        raise HTTPException(status_code=500, detail="Movie service not configured. Please set TMDB_API_KEY environment variable.")
    
    movies_by_genre = {}
    
    async with httpx.AsyncClient(timeout=10.0) as client:
        for genre in genres:
            try:
                logger.info(f"Fetching movies for genre: {genre}")
                
                # Get genre ID from genre name
                genres_response = await client.get(
                    f"{TMDB_BASE_URL}/genre/movie/list",
                    params={"api_key": TMDB_API_KEY}
                )
                genres_response.raise_for_status()
                genres_data = genres_response.json()
                
                genre_id = None
                for g in genres_data.get("genres", []):
                    if g["name"].lower() == genre.lower():
                        genre_id = g["id"]
                        break
                
                if not genre_id:
                    logger.warning(f"Genre '{genre}' not found in TMDB")
                    continue
                
                # Fetch movies for this genre, sorted by popularity (descending)
                movies_response = await client.get(
                    f"{TMDB_BASE_URL}/discover/movie",
                    params={
                        "api_key": TMDB_API_KEY,
                        "with_genres": genre_id,
                        "sort_by": "popularity.desc",
                        "page": 1,
                        "language": "en-US"
                    }
                )
                movies_response.raise_for_status()
                movies_data = movies_response.json()
                
                # Parse movies
                movie_list = []
                for movie in movies_data.get("results", [])[:limit]:
                    movie_list.append(MovieItem(
                        id=movie.get("id"),
                        title=movie.get("title", "Unknown"),
                        poster_path=movie.get("poster_path"),
                        vote_average=movie.get("vote_average", 0.0),
                        release_date=movie.get("release_date", "N/A")
                    ))
                
                if movie_list:
                    movies_by_genre[genre] = movie_list
                    logger.info(f"Fetched {len(movie_list)} movies for genre: {genre}")
                else:
                    logger.warning(f"No movies found for genre: {genre}")
                    
            except httpx.HTTPError as e:
                logger.error(f"HTTP error fetching movies for {genre}: {e}")
            except Exception as e:
                logger.error(f"Error fetching movies for {genre}: {e}", exc_info=True)
    
    return movies_by_genre

# ==================== MAIN ====================
if __name__ == "__main__":
    uvicorn.run(
        "main:app",
        host="0.0.0.0",
        port=8000,
        reload=False,
        log_level="info",
        access_log=True
    )