mahmdshafee's picture
Update app/backend/main.py
810cec6 verified
"""
Production-Ready FastAPI Backend for DeBERTa Emotion Detection
Optimized for 710MB model with efficient tokenization and inference
"""
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field, field_validator
from contextlib import asynccontextmanager
import torch
import torch.nn as nn
from transformers import DebertaV2Model, DebertaV2Tokenizer
import uvicorn
import logging
import time
from typing import Dict, List, Optional
import gc
import psutil
import os
import json
import httpx
from dotenv import load_dotenv
import warnings
# Suppress HuggingFace deprecation warning
warnings.filterwarnings('ignore', category=FutureWarning, module='huggingface_hub')
# ==================== LOGGING SETUP ====================
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# ==================== ENVIRONMENT SETUP ====================
load_dotenv() # Load from .env file
TMDB_API_KEY = os.getenv('TMDB_API_KEY', '')
TMDB_BASE_URL = "https://api.themoviedb.org/3"
TMDB_IMAGE_BASE_URL = "https://image.tmdb.org/t/p/w500"
# ==================== CONFIGURATION ====================
class Config:
MODEL_PATH = "./../../models/" # Path to your saved model
MAX_LENGTH = 128
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 8 # For batch predictions
MAX_TEXT_LENGTH = 5000 # Character limit for input
EMOTION_CLASSES = ['anger', 'fear', 'joy', 'love', 'neutral', 'sadness', 'surprise']
# Emotion to Genre Mapping (4 genres per emotion)
EMOTION_GENRE_MAP = {
'anger': ['Action', 'Crime', 'Thriller', 'Revenge-Drama'],
'fear': ['Horror', 'Thriller', 'Mystery', 'Supernatural'],
'joy': ['Comedy', 'Adventure', 'Family', 'Animation', 'Musical'],
'love': ['Romance', 'Rom-Com', 'Emotional Drama', 'Fantasy'],
'neutral': ['Documentary', 'Drama', 'Biography', 'Slice-of-Life'],
'sadness': ['Drama', 'Romance', 'Indie', 'Healing-Stories'],
'surprise': ['Mystery', 'Sci-Fi', 'Fantasy', 'Twist-Thriller']
}
# Performance settings
TORCH_THREADS = 4
USE_HALF_PRECISION = False # FP16 for GPU
config = Config()
# Set torch threads for CPU inference
torch.set_num_threads(config.TORCH_THREADS)
# ==================== MODEL DEFINITION ====================
class DeBERTaEmotionClassifier(nn.Module):
"""DeBERTa model for emotion classification"""
def __init__(self, config_dict: Dict, num_labels: int):
super().__init__()
from transformers import DebertaV2Config
deberta_config = DebertaV2Config(**config_dict)
self.deberta = DebertaV2Model(deberta_config)
self.dropout = nn.Dropout(0.1)
self.classifier = nn.Linear(deberta_config.hidden_size, num_labels)
def forward(self, input_ids, attention_mask):
outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
sequence_output = outputs.last_hidden_state
cls_output = sequence_output[:, 0, :]
cls_output = self.dropout(cls_output)
logits = self.classifier(cls_output)
return logits
# ==================== MODEL MANAGER ====================
class ModelManager:
"""Singleton class to manage model loading and inference"""
_instance = None
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance.initialized = False
return cls._instance
def __init__(self):
if not self.initialized:
self.model = None
self.tokenizer = None
self.initialized = True
def load_model(self):
"""Load model and tokenizer with error handling"""
try:
logger.info(f"Loading model on device: {config.DEVICE}")
start_time = time.time()
# Load config from local file
logger.info("Loading model config...")
config_path = os.path.join(config.MODEL_PATH, "config.json")
if not os.path.exists(config_path):
raise FileNotFoundError(f"Model config not found at {config_path}")
with open(config_path, 'r') as f:
model_config = json.load(f)
# Load tokenizer from local files or fallback to HuggingFace
logger.info("Loading tokenizer...")
tokenizer_files = {
'vocab.json': os.path.join(config.MODEL_PATH, 'vocab.json'),
'merges.txt': os.path.join(config.MODEL_PATH, 'merges.txt')
}
if os.path.exists(tokenizer_files['vocab.json']):
logger.info("Loading tokenizer from local files...")
self.tokenizer = DebertaV2Tokenizer(vocab_file=tokenizer_files['vocab.json'])
else:
logger.info("Downloading tokenizer from HuggingFace...")
self.tokenizer = DebertaV2Tokenizer.from_pretrained("microsoft/deberta-v3-base")
# Load model architecture from config
logger.info("Loading model architecture from config...")
self.model = DeBERTaEmotionClassifier(config_dict=model_config, num_labels=len(config.EMOTION_CLASSES))
# Load trained weights: prefer full model file (safetensors) then fall back to checkpoint
safetensors_path = os.path.join(config.MODEL_PATH, "model.safetensors")
classifier_path = os.path.join(config.MODEL_PATH, "classifier.pt")
# Helper to map keys that were saved without proper prefixes
def _normalize_state_dict_keys(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
"""
Normalize state dict keys to match the model architecture:
- Add 'deberta.' prefix to encoder/embedding keys if missing
- Add 'classifier.' prefix to weight/bias keys if they're classifier weights
"""
if not state_dict:
return state_dict
mapped = {}
first_key = next(iter(state_dict.keys()), None)
for k, v in state_dict.items():
# Check if this is a classifier weight/bias (2D/1D tensor from Linear layer)
# Classifier: weight [7, 768], bias [7]
if k in ['weight', 'bias']:
# Verify this is classifier-sized (num_classes=7, hidden_size=768)
if k == 'weight' and len(v.shape) == 2 and v.shape[0] == 7 and v.shape[1] == 768:
mapped['classifier.weight'] = v
elif k == 'bias' and len(v.shape) == 1 and v.shape[0] == 7:
mapped['classifier.bias'] = v
else:
mapped[k] = v
# Add deberta prefix to encoder/embedding keys if missing
elif k.startswith("deberta."):
# Already has prefix
mapped[k] = v
elif k.startswith("embeddings.") or k.startswith("encoder.") or k.startswith("rel_embeddings") or k.startswith("LayerNorm") or k.startswith("word_embeddings"):
# Encoder/embedding key without prefix
mapped[f"deberta.{k}"] = v
else:
# Keep as-is (dropout, etc.)
mapped[k] = v
return mapped
# 1) Try safetensors full-model file (fast and safe if present)
if os.path.exists(safetensors_path):
try:
logger.info("Loading full trained model from safetensors...")
try:
from safetensors.torch import load_file
except Exception as ie:
logger.error("safetensors package not available: %s", ie)
raise
state = load_file(safetensors_path)
# state is a dict of tensors; adjust keys if necessary
mapped = _normalize_state_dict_keys(state)
load_res = self.model.load_state_dict(mapped, strict=False)
logger.info("Loaded safetensors model (missing: %s, unexpected: %s)", getattr(load_res, 'missing_keys', []), getattr(load_res, 'unexpected_keys', []))
except Exception as e:
logger.error("Error loading safetensors model: %s", e, exc_info=True)
# fall through to try checkpoint
# 2) Fallback: try classic PyTorch checkpoint
if os.path.exists(classifier_path):
try:
logger.info("Loading trained checkpoint (torch) ...")
# In recent PyTorch versions the default weights_only may block some globals. Use weights_only=False
checkpoint = torch.load(classifier_path, map_location=config.DEVICE, weights_only=False)
# checkpoint might be a dict containing different keys depending on how it was saved
if isinstance(checkpoint, dict):
# Common key names used in training scripts
if 'model_state_dict' in checkpoint:
sd = checkpoint['model_state_dict']
elif 'state_dict' in checkpoint:
sd = checkpoint['state_dict']
elif 'classifier_state_dict' in checkpoint or 'dropout_state_dict' in checkpoint:
# old style: only classifier saved
if 'classifier_state_dict' in checkpoint:
try:
self.model.classifier.load_state_dict(checkpoint['classifier_state_dict'])
except Exception:
# try flexible load with key normalization
normalized = _normalize_state_dict_keys(checkpoint['classifier_state_dict'])
self.model.load_state_dict(normalized, strict=False)
if 'dropout_state_dict' in checkpoint:
try:
self.model.dropout.load_state_dict(checkpoint['dropout_state_dict'])
except Exception:
logger.warning('Could not load dropout state dict')
sd = None
else:
sd = checkpoint
if sd:
# sd may contain keys without proper prefixes
mapped = _normalize_state_dict_keys(sd)
load_res = self.model.load_state_dict(mapped, strict=False)
logger.info("Loaded checkpoint (missing: %s, unexpected: %s)", getattr(load_res, 'missing_keys', []), getattr(load_res, 'unexpected_keys', []))
else:
# Not a dict: cannot handle
logger.warning("Checkpoint loaded but is not a dict, skipping")
except Exception as e:
logger.error("Error loading checkpoint: %s", e, exc_info=True)
raise
else:
logger.warning(f"No trained model found at {safetensors_path} or {classifier_path}, using base model")
# Move to device
self.model.to(config.DEVICE)
self.model.eval()
# Apply half precision for GPU
if config.USE_HALF_PRECISION:
self.model.half()
logger.info("Applied FP16 precision")
# Optimize for inference
if config.DEVICE == "cuda":
torch.backends.cudnn.benchmark = True
load_time = time.time() - start_time
logger.info(f"Model loaded successfully in {load_time:.2f}s")
logger.info(f"Memory usage: {psutil.Process().memory_info().rss / 1024 ** 2:.2f} MB")
return True
except FileNotFoundError as e:
logger.error(f"Model files not found: {e}")
raise HTTPException(status_code=500, detail=f"Model files not found: {str(e)}")
except Exception as e:
logger.error(f"Error loading model: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to load model: {str(e)}")
@torch.no_grad()
def predict(self, text: str) -> Dict:
"""Run inference on input text"""
try:
if not self.model or not self.tokenizer:
raise ValueError("Model not loaded")
start_time = time.time()
# Tokenize
encoding = self.tokenizer(
text,
add_special_tokens=True,
max_length=config.MAX_LENGTH,
padding='max_length',
truncation=True,
return_attention_mask=True,
return_tensors='pt'
)
input_ids = encoding['input_ids'].to(config.DEVICE)
attention_mask = encoding['attention_mask'].to(config.DEVICE)
# Apply half precision if enabled
if config.USE_HALF_PRECISION:
input_ids = input_ids.half().long() # Convert back to long for embeddings
# Inference
with torch.cuda.amp.autocast(enabled=config.USE_HALF_PRECISION):
logits = self.model(input_ids, attention_mask)
# Get probabilities
probs = torch.softmax(logits, dim=-1)
confidence, predicted_class = torch.max(probs, dim=-1)
# Convert to CPU and numpy
probs_np = probs.cpu().float().tolist()[0]
predicted_idx = predicted_class.item()
confidence_score = confidence.item()
# Create emotion probability dict
emotion_probs = {
emotion: float(probs_np[i])
for i, emotion in enumerate(config.EMOTION_CLASSES)
}
inference_time = time.time() - start_time
return {
"emotion": config.EMOTION_CLASSES[predicted_idx],
"confidence": confidence_score,
"all_probabilities": emotion_probs,
"inference_time_ms": round(inference_time * 1000, 2)
}
except Exception as e:
logger.error(f"Prediction error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
def cleanup(self):
"""Cleanup resources"""
try:
if self.model:
del self.model
if self.tokenizer:
del self.tokenizer
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
logger.info("Model resources cleaned up")
except Exception as e:
logger.error(f"Cleanup error: {e}")
# ==================== LIFESPAN MANAGEMENT ====================
model_manager = ModelManager()
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Manage application lifecycle"""
# Startup
logger.info("Starting application...")
try:
model_manager.load_model()
logger.info("Application ready")
except Exception as e:
logger.error(f"Startup failed: {e}")
raise
yield
# Shutdown
logger.info("Shutting down application...")
model_manager.cleanup()
logger.info("Application stopped")
# ==================== FASTAPI APP ====================
app = FastAPI(
title="Emotion Detection API",
description="DeBERTa v3 based emotion detection for 7 emotion classes",
version="1.0.0",
lifespan=lifespan
)
# CORS middleware
FRONTEND_URL = 'https://moodflix-ai-nu.vercel.app'
app.add_middleware(
CORSMiddleware,
allow_origins=[
"http://localhost:3000", # Local development
"http://localhost:8000", # Local backend
FRONTEND_URL, # Production frontend
"https://*.vercel.app", # All Vercel preview deployments
],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ==================== REQUEST/RESPONSE MODELS ====================
class PredictionRequest(BaseModel):
text: str = Field(..., min_length=1, max_length=config.MAX_TEXT_LENGTH)
@field_validator('text')
@classmethod
def validate_text(cls, v):
if not v or not v.strip():
raise ValueError('Text cannot be empty or whitespace only')
return v.strip()
class PredictionResponse(BaseModel):
emotion: str
confidence: float
all_probabilities: Dict[str, float]
inference_time_ms: float
text_length: int
class HealthResponse(BaseModel):
status: str
device: str
model_loaded: bool
memory_mb: float
class BatchPredictionRequest(BaseModel):
texts: List[str] = Field(..., min_length=1, max_length=10)
@field_validator('texts')
@classmethod
def validate_texts(cls, v):
cleaned = [t.strip() for t in v if t and t.strip()]
if not cleaned:
raise ValueError('At least one valid text required')
if len(cleaned) > 10:
raise ValueError('Maximum 10 texts allowed per batch')
return cleaned
# ==================== MOVIE MODELS ====================
class MovieItem(BaseModel):
id: int
title: str
poster_path: Optional[str] = None
vote_average: float
release_date: Optional[str] = None
class GenreMovies(BaseModel):
genre: str
movies: List[MovieItem]
class RecommendationResponse(BaseModel):
emotion: str
confidence: float
recommendations: List[GenreMovies]
# ==================== ENDPOINTS ====================
@app.get("/", response_model=Dict)
async def root():
"""Root endpoint"""
return {
"message": "Emotion Detection API",
"version": "1.0.0",
"endpoints": {
"predict": "/predict",
"batch_predict": "/batch_predict",
"health": "/health",
"emotions": "/emotions"
}
}
@app.get("/health", response_model=HealthResponse)
async def health_check():
"""Health check endpoint"""
try:
memory_mb = psutil.Process().memory_info().rss / 1024 ** 2
return HealthResponse(
status="healthy",
device=config.DEVICE,
model_loaded=model_manager.model is not None,
memory_mb=round(memory_mb, 2)
)
except Exception as e:
logger.error(f"Health check failed: {e}")
raise HTTPException(status_code=500, detail="Health check failed")
@app.get("/emotions", response_model=Dict)
async def get_emotions():
"""Get list of supported emotions"""
return {
"emotions": config.EMOTION_CLASSES,
"count": len(config.EMOTION_CLASSES)
}
@app.post("/predict", response_model=PredictionResponse)
async def predict_emotion(request: PredictionRequest):
"""Predict emotion for single text"""
try:
logger.info(f"Prediction request: {len(request.text)} chars")
result = model_manager.predict(request.text)
return PredictionResponse(
emotion=result["emotion"],
confidence=result["confidence"],
all_probabilities=result["all_probabilities"],
inference_time_ms=result["inference_time_ms"],
text_length=len(request.text)
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Prediction endpoint error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@app.post("/batch_predict")
async def batch_predict_emotion(request: BatchPredictionRequest):
"""Predict emotions for multiple texts"""
try:
logger.info(f"Batch prediction request: {len(request.texts)} texts")
results = []
for text in request.texts:
result = model_manager.predict(text)
results.append({
"text": text[:100] + "..." if len(text) > 100 else text,
"emotion": result["emotion"],
"confidence": result["confidence"],
"all_probabilities": result["all_probabilities"]
})
return {
"count": len(results),
"predictions": results
}
except Exception as e:
logger.error(f"Batch prediction error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@app.post("/recommendations", response_model=RecommendationResponse)
async def get_movie_recommendations(request: PredictionRequest):
"""
Predict emotion and fetch movie recommendations based on detected emotion.
Maps emotion to genres and fetches 12 most popular movies per genre from TMDB.
"""
try:
logger.info(f"Recommendation request: {len(request.text)} chars")
# 1. Detect emotion
emotion_result = model_manager.predict(request.text)
detected_emotion = emotion_result["emotion"]
confidence = emotion_result["confidence"]
logger.info(f"Detected emotion: {detected_emotion} (confidence: {confidence})")
# 2. Get mapped genres for this emotion
genres = config.EMOTION_GENRE_MAP.get(detected_emotion, [])
if not genres:
raise HTTPException(status_code=400, detail=f"No genres mapped for emotion: {detected_emotion}")
# 3. Fetch movies for these genres
movies_by_genre = await fetch_movies_by_genres(genres, limit=12)
# 4. Format response
recommendations = []
for genre in genres:
if genre in movies_by_genre:
recommendations.append(GenreMovies(
genre=genre,
movies=movies_by_genre[genre]
))
return RecommendationResponse(
emotion=detected_emotion,
confidence=confidence,
recommendations=recommendations
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Recommendation error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to get recommendations: {str(e)}")
# ==================== ERROR HANDLERS ====================
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
"""Global exception handler"""
logger.error(f"Unhandled exception: {exc}", exc_info=True)
return JSONResponse(
status_code=500,
content={
"error": "Internal server error",
"detail": str(exc),
"path": str(request.url)
}
)
# ==================== TMDB MOVIE FETCHING ====================
async def fetch_movies_by_genres(genres: List[str], limit: int = 12) -> Dict[str, List[MovieItem]]:
"""
Fetch movies from TMDB API for given genres.
Returns dict with genre as key and list of MovieItem as value.
"""
if not TMDB_API_KEY:
logger.error("TMDB_API_KEY not configured")
raise HTTPException(status_code=500, detail="Movie service not configured. Please set TMDB_API_KEY environment variable.")
movies_by_genre = {}
async with httpx.AsyncClient(timeout=10.0) as client:
for genre in genres:
try:
logger.info(f"Fetching movies for genre: {genre}")
# Get genre ID from genre name
genres_response = await client.get(
f"{TMDB_BASE_URL}/genre/movie/list",
params={"api_key": TMDB_API_KEY}
)
genres_response.raise_for_status()
genres_data = genres_response.json()
genre_id = None
for g in genres_data.get("genres", []):
if g["name"].lower() == genre.lower():
genre_id = g["id"]
break
if not genre_id:
logger.warning(f"Genre '{genre}' not found in TMDB")
continue
# Fetch movies for this genre, sorted by popularity (descending)
movies_response = await client.get(
f"{TMDB_BASE_URL}/discover/movie",
params={
"api_key": TMDB_API_KEY,
"with_genres": genre_id,
"sort_by": "popularity.desc",
"page": 1,
"language": "en-US"
}
)
movies_response.raise_for_status()
movies_data = movies_response.json()
# Parse movies
movie_list = []
for movie in movies_data.get("results", [])[:limit]:
movie_list.append(MovieItem(
id=movie.get("id"),
title=movie.get("title", "Unknown"),
poster_path=movie.get("poster_path"),
vote_average=movie.get("vote_average", 0.0),
release_date=movie.get("release_date", "N/A")
))
if movie_list:
movies_by_genre[genre] = movie_list
logger.info(f"Fetched {len(movie_list)} movies for genre: {genre}")
else:
logger.warning(f"No movies found for genre: {genre}")
except httpx.HTTPError as e:
logger.error(f"HTTP error fetching movies for {genre}: {e}")
except Exception as e:
logger.error(f"Error fetching movies for {genre}: {e}", exc_info=True)
return movies_by_genre
# ==================== MAIN ====================
if __name__ == "__main__":
uvicorn.run(
"main:app",
host="0.0.0.0",
port=8000,
reload=False,
log_level="info",
access_log=True
)