Spaces:

mahmdshafee
/

emotion-detection-api

Running

App Files Files Community

emotion-detection-api / app /backend /main.py

mahmdshafee

Update app/backend/main.py

810cec6 verified about 1 month ago

raw

history blame contribute delete

26.9 kB

	"""
	Production-Ready FastAPI Backend for DeBERTa Emotion Detection
	Optimized for 710MB model with efficient tokenization and inference
	"""

	from fastapi import FastAPI, HTTPException, Request
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	from pydantic import BaseModel, Field, field_validator
	from contextlib import asynccontextmanager
	import torch
	import torch.nn as nn
	from transformers import DebertaV2Model, DebertaV2Tokenizer
	import uvicorn
	import logging
	import time
	from typing import Dict, List, Optional
	import gc
	import psutil
	import os
	import json
	import httpx
	from dotenv import load_dotenv
	import warnings

	# Suppress HuggingFace deprecation warning
	warnings.filterwarnings('ignore', category=FutureWarning, module='huggingface_hub')

	# ==================== LOGGING SETUP ====================
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	# ==================== ENVIRONMENT SETUP ====================
	load_dotenv() # Load from .env file
	TMDB_API_KEY = os.getenv('TMDB_API_KEY', '')
	TMDB_BASE_URL = "https://api.themoviedb.org/3"
	TMDB_IMAGE_BASE_URL = "https://image.tmdb.org/t/p/w500"

	# ==================== CONFIGURATION ====================
	class Config:
	MODEL_PATH = "./../../models/" # Path to your saved model
	MAX_LENGTH = 128
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	BATCH_SIZE = 8 # For batch predictions
	MAX_TEXT_LENGTH = 5000 # Character limit for input

	EMOTION_CLASSES = ['anger', 'fear', 'joy', 'love', 'neutral', 'sadness', 'surprise']

	# Emotion to Genre Mapping (4 genres per emotion)
	EMOTION_GENRE_MAP = {
	'anger': ['Action', 'Crime', 'Thriller', 'Revenge-Drama'],
	'fear': ['Horror', 'Thriller', 'Mystery', 'Supernatural'],
	'joy': ['Comedy', 'Adventure', 'Family', 'Animation', 'Musical'],
	'love': ['Romance', 'Rom-Com', 'Emotional Drama', 'Fantasy'],
	'neutral': ['Documentary', 'Drama', 'Biography', 'Slice-of-Life'],
	'sadness': ['Drama', 'Romance', 'Indie', 'Healing-Stories'],
	'surprise': ['Mystery', 'Sci-Fi', 'Fantasy', 'Twist-Thriller']
	}

	# Performance settings
	TORCH_THREADS = 4
	USE_HALF_PRECISION = False # FP16 for GPU

	config = Config()

	# Set torch threads for CPU inference
	torch.set_num_threads(config.TORCH_THREADS)

	# ==================== MODEL DEFINITION ====================
	class DeBERTaEmotionClassifier(nn.Module):
	"""DeBERTa model for emotion classification"""
	def __init__(self, config_dict: Dict, num_labels: int):
	super().__init__()
	from transformers import DebertaV2Config
	deberta_config = DebertaV2Config(**config_dict)
	self.deberta = DebertaV2Model(deberta_config)
	self.dropout = nn.Dropout(0.1)
	self.classifier = nn.Linear(deberta_config.hidden_size, num_labels)

	def forward(self, input_ids, attention_mask):
	outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
	sequence_output = outputs.last_hidden_state
	cls_output = sequence_output[:, 0, :]
	cls_output = self.dropout(cls_output)
	logits = self.classifier(cls_output)
	return logits

	# ==================== MODEL MANAGER ====================
	class ModelManager:
	"""Singleton class to manage model loading and inference"""
	_instance = None

	def __new__(cls):
	if cls._instance is None:
	cls._instance = super().__new__(cls)
	cls._instance.initialized = False
	return cls._instance

	def __init__(self):
	if not self.initialized:
	self.model = None
	self.tokenizer = None
	self.initialized = True

	def load_model(self):
	"""Load model and tokenizer with error handling"""
	try:
	logger.info(f"Loading model on device: {config.DEVICE}")
	start_time = time.time()

	# Load config from local file
	logger.info("Loading model config...")
	config_path = os.path.join(config.MODEL_PATH, "config.json")
	if not os.path.exists(config_path):
	raise FileNotFoundError(f"Model config not found at {config_path}")

	with open(config_path, 'r') as f:
	model_config = json.load(f)

	# Load tokenizer from local files or fallback to HuggingFace
	logger.info("Loading tokenizer...")
	tokenizer_files = {
	'vocab.json': os.path.join(config.MODEL_PATH, 'vocab.json'),
	'merges.txt': os.path.join(config.MODEL_PATH, 'merges.txt')
	}
	if os.path.exists(tokenizer_files['vocab.json']):
	logger.info("Loading tokenizer from local files...")
	self.tokenizer = DebertaV2Tokenizer(vocab_file=tokenizer_files['vocab.json'])
	else:
	logger.info("Downloading tokenizer from HuggingFace...")
	self.tokenizer = DebertaV2Tokenizer.from_pretrained("microsoft/deberta-v3-base")

	# Load model architecture from config
	logger.info("Loading model architecture from config...")
	self.model = DeBERTaEmotionClassifier(config_dict=model_config, num_labels=len(config.EMOTION_CLASSES))

	# Load trained weights: prefer full model file (safetensors) then fall back to checkpoint
	safetensors_path = os.path.join(config.MODEL_PATH, "model.safetensors")
	classifier_path = os.path.join(config.MODEL_PATH, "classifier.pt")

	# Helper to map keys that were saved without proper prefixes
	def _normalize_state_dict_keys(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
	"""
	Normalize state dict keys to match the model architecture:
	- Add 'deberta.' prefix to encoder/embedding keys if missing
	- Add 'classifier.' prefix to weight/bias keys if they're classifier weights
	"""
	if not state_dict:
	return state_dict

	mapped = {}
	first_key = next(iter(state_dict.keys()), None)

	for k, v in state_dict.items():
	# Check if this is a classifier weight/bias (2D/1D tensor from Linear layer)
	# Classifier: weight [7, 768], bias [7]
	if k in ['weight', 'bias']:
	# Verify this is classifier-sized (num_classes=7, hidden_size=768)
	if k == 'weight' and len(v.shape) == 2 and v.shape[0] == 7 and v.shape[1] == 768:
	mapped['classifier.weight'] = v
	elif k == 'bias' and len(v.shape) == 1 and v.shape[0] == 7:
	mapped['classifier.bias'] = v
	else:
	mapped[k] = v
	# Add deberta prefix to encoder/embedding keys if missing
	elif k.startswith("deberta."):
	# Already has prefix
	mapped[k] = v
	elif k.startswith("embeddings.") or k.startswith("encoder.") or k.startswith("rel_embeddings") or k.startswith("LayerNorm") or k.startswith("word_embeddings"):
	# Encoder/embedding key without prefix
	mapped[f"deberta.{k}"] = v
	else:
	# Keep as-is (dropout, etc.)
	mapped[k] = v

	return mapped

	# 1) Try safetensors full-model file (fast and safe if present)
	if os.path.exists(safetensors_path):
	try:
	logger.info("Loading full trained model from safetensors...")
	try:
	from safetensors.torch import load_file
	except Exception as ie:
	logger.error("safetensors package not available: %s", ie)
	raise

	state = load_file(safetensors_path)
	# state is a dict of tensors; adjust keys if necessary
	mapped = _normalize_state_dict_keys(state)
	load_res = self.model.load_state_dict(mapped, strict=False)
	logger.info("Loaded safetensors model (missing: %s, unexpected: %s)", getattr(load_res, 'missing_keys', []), getattr(load_res, 'unexpected_keys', []))
	except Exception as e:
	logger.error("Error loading safetensors model: %s", e, exc_info=True)
	# fall through to try checkpoint

	# 2) Fallback: try classic PyTorch checkpoint
	if os.path.exists(classifier_path):
	try:
	logger.info("Loading trained checkpoint (torch) ...")
	# In recent PyTorch versions the default weights_only may block some globals. Use weights_only=False
	checkpoint = torch.load(classifier_path, map_location=config.DEVICE, weights_only=False)

	# checkpoint might be a dict containing different keys depending on how it was saved
	if isinstance(checkpoint, dict):
	# Common key names used in training scripts
	if 'model_state_dict' in checkpoint:
	sd = checkpoint['model_state_dict']
	elif 'state_dict' in checkpoint:
	sd = checkpoint['state_dict']
	elif 'classifier_state_dict' in checkpoint or 'dropout_state_dict' in checkpoint:
	# old style: only classifier saved
	if 'classifier_state_dict' in checkpoint:
	try:
	self.model.classifier.load_state_dict(checkpoint['classifier_state_dict'])
	except Exception:
	# try flexible load with key normalization
	normalized = _normalize_state_dict_keys(checkpoint['classifier_state_dict'])
	self.model.load_state_dict(normalized, strict=False)
	if 'dropout_state_dict' in checkpoint:
	try:
	self.model.dropout.load_state_dict(checkpoint['dropout_state_dict'])
	except Exception:
	logger.warning('Could not load dropout state dict')
	sd = None
	else:
	sd = checkpoint

	if sd:
	# sd may contain keys without proper prefixes
	mapped = _normalize_state_dict_keys(sd)
	load_res = self.model.load_state_dict(mapped, strict=False)
	logger.info("Loaded checkpoint (missing: %s, unexpected: %s)", getattr(load_res, 'missing_keys', []), getattr(load_res, 'unexpected_keys', []))
	else:
	# Not a dict: cannot handle
	logger.warning("Checkpoint loaded but is not a dict, skipping")

	except Exception as e:
	logger.error("Error loading checkpoint: %s", e, exc_info=True)
	raise
	else:
	logger.warning(f"No trained model found at {safetensors_path} or {classifier_path}, using base model")

	# Move to device
	self.model.to(config.DEVICE)
	self.model.eval()

	# Apply half precision for GPU
	if config.USE_HALF_PRECISION:
	self.model.half()
	logger.info("Applied FP16 precision")

	# Optimize for inference
	if config.DEVICE == "cuda":
	torch.backends.cudnn.benchmark = True

	load_time = time.time() - start_time
	logger.info(f"Model loaded successfully in {load_time:.2f}s")
	logger.info(f"Memory usage: {psutil.Process().memory_info().rss / 1024 ** 2:.2f} MB")

	return True

	except FileNotFoundError as e:
	logger.error(f"Model files not found: {e}")
	raise HTTPException(status_code=500, detail=f"Model files not found: {str(e)}")
	except Exception as e:
	logger.error(f"Error loading model: {e}", exc_info=True)
	raise HTTPException(status_code=500, detail=f"Failed to load model: {str(e)}")

	@torch.no_grad()
	def predict(self, text: str) -> Dict:
	"""Run inference on input text"""
	try:
	if not self.model or not self.tokenizer:
	raise ValueError("Model not loaded")

	start_time = time.time()

	# Tokenize
	encoding = self.tokenizer(
	text,
	add_special_tokens=True,
	max_length=config.MAX_LENGTH,
	padding='max_length',
	truncation=True,
	return_attention_mask=True,
	return_tensors='pt'
	)

	input_ids = encoding['input_ids'].to(config.DEVICE)
	attention_mask = encoding['attention_mask'].to(config.DEVICE)

	# Apply half precision if enabled
	if config.USE_HALF_PRECISION:
	input_ids = input_ids.half().long() # Convert back to long for embeddings

	# Inference
	with torch.cuda.amp.autocast(enabled=config.USE_HALF_PRECISION):
	logits = self.model(input_ids, attention_mask)

	# Get probabilities
	probs = torch.softmax(logits, dim=-1)
	confidence, predicted_class = torch.max(probs, dim=-1)

	# Convert to CPU and numpy
	probs_np = probs.cpu().float().tolist()[0]
	predicted_idx = predicted_class.item()
	confidence_score = confidence.item()

	# Create emotion probability dict
	emotion_probs = {
	emotion: float(probs_np[i])
	for i, emotion in enumerate(config.EMOTION_CLASSES)
	}

	inference_time = time.time() - start_time

	return {
	"emotion": config.EMOTION_CLASSES[predicted_idx],
	"confidence": confidence_score,
	"all_probabilities": emotion_probs,
	"inference_time_ms": round(inference_time * 1000, 2)
	}

	except Exception as e:
	logger.error(f"Prediction error: {e}", exc_info=True)
	raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")

	def cleanup(self):
	"""Cleanup resources"""
	try:
	if self.model:
	del self.model
	if self.tokenizer:
	del self.tokenizer
	gc.collect()
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	logger.info("Model resources cleaned up")
	except Exception as e:
	logger.error(f"Cleanup error: {e}")

	# ==================== LIFESPAN MANAGEMENT ====================
	model_manager = ModelManager()

	@asynccontextmanager
	async def lifespan(app: FastAPI):
	"""Manage application lifecycle"""
	# Startup
	logger.info("Starting application...")
	try:
	model_manager.load_model()
	logger.info("Application ready")
	except Exception as e:
	logger.error(f"Startup failed: {e}")
	raise

	yield

	# Shutdown
	logger.info("Shutting down application...")
	model_manager.cleanup()
	logger.info("Application stopped")

	# ==================== FASTAPI APP ====================
	app = FastAPI(
	title="Emotion Detection API",
	description="DeBERTa v3 based emotion detection for 7 emotion classes",
	version="1.0.0",
	lifespan=lifespan
	)

	# CORS middleware
	FRONTEND_URL = 'https://moodflix-ai-nu.vercel.app'

	app.add_middleware(
	CORSMiddleware,
	allow_origins=[
	"http://localhost:3000", # Local development
	"http://localhost:8000", # Local backend
	FRONTEND_URL, # Production frontend
	"https://*.vercel.app", # All Vercel preview deployments
	],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# ==================== REQUEST/RESPONSE MODELS ====================
	class PredictionRequest(BaseModel):
	text: str = Field(..., min_length=1, max_length=config.MAX_TEXT_LENGTH)

	@field_validator('text')
	@classmethod
	def validate_text(cls, v):
	if not v or not v.strip():
	raise ValueError('Text cannot be empty or whitespace only')
	return v.strip()

	class PredictionResponse(BaseModel):
	emotion: str
	confidence: float
	all_probabilities: Dict[str, float]
	inference_time_ms: float
	text_length: int

	class HealthResponse(BaseModel):
	status: str
	device: str
	model_loaded: bool
	memory_mb: float

	class BatchPredictionRequest(BaseModel):
	texts: List[str] = Field(..., min_length=1, max_length=10)

	@field_validator('texts')
	@classmethod
	def validate_texts(cls, v):
	cleaned = [t.strip() for t in v if t and t.strip()]
	if not cleaned:
	raise ValueError('At least one valid text required')
	if len(cleaned) > 10:
	raise ValueError('Maximum 10 texts allowed per batch')
	return cleaned

	# ==================== MOVIE MODELS ====================
	class MovieItem(BaseModel):
	id: int
	title: str
	poster_path: Optional[str] = None
	vote_average: float
	release_date: Optional[str] = None

	class GenreMovies(BaseModel):
	genre: str
	movies: List[MovieItem]

	class RecommendationResponse(BaseModel):
	emotion: str
	confidence: float
	recommendations: List[GenreMovies]

	# ==================== ENDPOINTS ====================
	@app.get("/", response_model=Dict)
	async def root():
	"""Root endpoint"""
	return {
	"message": "Emotion Detection API",
	"version": "1.0.0",
	"endpoints": {
	"predict": "/predict",
	"batch_predict": "/batch_predict",
	"health": "/health",
	"emotions": "/emotions"
	}
	}

	@app.get("/health", response_model=HealthResponse)
	async def health_check():
	"""Health check endpoint"""
	try:
	memory_mb = psutil.Process().memory_info().rss / 1024 ** 2
	return HealthResponse(
	status="healthy",
	device=config.DEVICE,
	model_loaded=model_manager.model is not None,
	memory_mb=round(memory_mb, 2)
	)
	except Exception as e:
	logger.error(f"Health check failed: {e}")
	raise HTTPException(status_code=500, detail="Health check failed")

	@app.get("/emotions", response_model=Dict)
	async def get_emotions():
	"""Get list of supported emotions"""
	return {
	"emotions": config.EMOTION_CLASSES,
	"count": len(config.EMOTION_CLASSES)
	}

	@app.post("/predict", response_model=PredictionResponse)
	async def predict_emotion(request: PredictionRequest):
	"""Predict emotion for single text"""
	try:
	logger.info(f"Prediction request: {len(request.text)} chars")

	result = model_manager.predict(request.text)

	return PredictionResponse(
	emotion=result["emotion"],
	confidence=result["confidence"],
	all_probabilities=result["all_probabilities"],
	inference_time_ms=result["inference_time_ms"],
	text_length=len(request.text)
	)

	except HTTPException:
	raise
	except Exception as e:
	logger.error(f"Prediction endpoint error: {e}", exc_info=True)
	raise HTTPException(status_code=500, detail=str(e))

	@app.post("/batch_predict")
	async def batch_predict_emotion(request: BatchPredictionRequest):
	"""Predict emotions for multiple texts"""
	try:
	logger.info(f"Batch prediction request: {len(request.texts)} texts")

	results = []
	for text in request.texts:
	result = model_manager.predict(text)
	results.append({
	"text": text[:100] + "..." if len(text) > 100 else text,
	"emotion": result["emotion"],
	"confidence": result["confidence"],
	"all_probabilities": result["all_probabilities"]
	})

	return {
	"count": len(results),
	"predictions": results
	}

	except Exception as e:
	logger.error(f"Batch prediction error: {e}", exc_info=True)
	raise HTTPException(status_code=500, detail=str(e))

	@app.post("/recommendations", response_model=RecommendationResponse)
	async def get_movie_recommendations(request: PredictionRequest):
	"""
	Predict emotion and fetch movie recommendations based on detected emotion.
	Maps emotion to genres and fetches 12 most popular movies per genre from TMDB.
	"""
	try:
	logger.info(f"Recommendation request: {len(request.text)} chars")

	# 1. Detect emotion
	emotion_result = model_manager.predict(request.text)
	detected_emotion = emotion_result["emotion"]
	confidence = emotion_result["confidence"]

	logger.info(f"Detected emotion: {detected_emotion} (confidence: {confidence})")

	# 2. Get mapped genres for this emotion
	genres = config.EMOTION_GENRE_MAP.get(detected_emotion, [])
	if not genres:
	raise HTTPException(status_code=400, detail=f"No genres mapped for emotion: {detected_emotion}")

	# 3. Fetch movies for these genres
	movies_by_genre = await fetch_movies_by_genres(genres, limit=12)

	# 4. Format response
	recommendations = []
	for genre in genres:
	if genre in movies_by_genre:
	recommendations.append(GenreMovies(
	genre=genre,
	movies=movies_by_genre[genre]
	))

	return RecommendationResponse(
	emotion=detected_emotion,
	confidence=confidence,
	recommendations=recommendations
	)

	except HTTPException:
	raise
	except Exception as e:
	logger.error(f"Recommendation error: {e}", exc_info=True)
	raise HTTPException(status_code=500, detail=f"Failed to get recommendations: {str(e)}")

	# ==================== ERROR HANDLERS ====================
	@app.exception_handler(Exception)
	async def global_exception_handler(request: Request, exc: Exception):
	"""Global exception handler"""
	logger.error(f"Unhandled exception: {exc}", exc_info=True)
	return JSONResponse(
	status_code=500,
	content={
	"error": "Internal server error",
	"detail": str(exc),
	"path": str(request.url)
	}
	)

	# ==================== TMDB MOVIE FETCHING ====================
	async def fetch_movies_by_genres(genres: List[str], limit: int = 12) -> Dict[str, List[MovieItem]]:
	"""
	Fetch movies from TMDB API for given genres.
	Returns dict with genre as key and list of MovieItem as value.
	"""
	if not TMDB_API_KEY:
	logger.error("TMDB_API_KEY not configured")
	raise HTTPException(status_code=500, detail="Movie service not configured. Please set TMDB_API_KEY environment variable.")

	movies_by_genre = {}

	async with httpx.AsyncClient(timeout=10.0) as client:
	for genre in genres:
	try:
	logger.info(f"Fetching movies for genre: {genre}")

	# Get genre ID from genre name
	genres_response = await client.get(
	f"{TMDB_BASE_URL}/genre/movie/list",
	params={"api_key": TMDB_API_KEY}
	)
	genres_response.raise_for_status()
	genres_data = genres_response.json()

	genre_id = None
	for g in genres_data.get("genres", []):
	if g["name"].lower() == genre.lower():
	genre_id = g["id"]
	break

	if not genre_id:
	logger.warning(f"Genre '{genre}' not found in TMDB")
	continue

	# Fetch movies for this genre, sorted by popularity (descending)
	movies_response = await client.get(
	f"{TMDB_BASE_URL}/discover/movie",
	params={
	"api_key": TMDB_API_KEY,
	"with_genres": genre_id,
	"sort_by": "popularity.desc",
	"page": 1,
	"language": "en-US"
	}
	)
	movies_response.raise_for_status()
	movies_data = movies_response.json()

	# Parse movies
	movie_list = []
	for movie in movies_data.get("results", [])[:limit]:
	movie_list.append(MovieItem(
	id=movie.get("id"),
	title=movie.get("title", "Unknown"),
	poster_path=movie.get("poster_path"),
	vote_average=movie.get("vote_average", 0.0),
	release_date=movie.get("release_date", "N/A")
	))

	if movie_list:
	movies_by_genre[genre] = movie_list
	logger.info(f"Fetched {len(movie_list)} movies for genre: {genre}")
	else:
	logger.warning(f"No movies found for genre: {genre}")

	except httpx.HTTPError as e:
	logger.error(f"HTTP error fetching movies for {genre}: {e}")
	except Exception as e:
	logger.error(f"Error fetching movies for {genre}: {e}", exc_info=True)

	return movies_by_genre

	# ==================== MAIN ====================
	if __name__ == "__main__":
	uvicorn.run(
	"main:app",
	host="0.0.0.0",
	port=8000,
	reload=False,
	log_level="info",
	access_log=True
	)