ai / main.py
helperai's picture
Update main.py
04e0693 verified
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import time
# --- MODEL CONSTANTS ---
MODEL_NAME = "deepseek-ai/deepseek-coder-1.3b-instruct"
# CRITICAL: Force model to use CPU for the free tier
DEVICE = "cpu"
MAX_NEW_TOKENS = 512 # Limit output size for speed and cost control
TORCH_DTYPE = torch.float32 # Use standard float for maximum CPU compatibility
# Global variables for model and tokenizer
model = None
tokenizer = None
# --- API Data Structure ---
class CodeRequest(BaseModel):
"""Defines the expected input structure from the front-end website."""
user_prompt: str # The user's request (e.g., "Fix the bug in this function")
code_context: str # The block of code the user provided
# --- FastAPI App Setup ---
app = FastAPI(title="CodeFlow AI Agent Backend - DeepSeek SLM")
@app.on_event("startup")
async def startup_event():
"""Load the DeepSeek SLM Model and Tokenizer ONLY ONCE when the server starts."""
global model, tokenizer
print(f"--- Starting CodeFlow AI Agent (DeepSeek 1.3B) on {DEVICE} ---")
start_time = time.time()
try:
# Load the Tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
# Load the Model
# Using device_map="cpu" is essential for the free tier.
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
dtype=TORCH_DTYPE, # Changed the argument name from torch_dtype to dtype
device_map=DEVICE,
trust_remote_code=True
)
model.eval() # Set model to evaluation mode
print(f"DeepSeek Model loaded successfully in {time.time() - start_time:.2f} seconds.")
except Exception as e:
# If the model fails to load, log the error and stop
print(f"ERROR: Failed to load DeepSeek model on CPU: {e}")
# Raising an exception will cause the Docker container to exit, which is the correct behavior
# when a critical component (the model) fails to load.
raise RuntimeError(f"Model failed to load on startup: {e}")
# --- CRITICAL HEALTH CHECK ENDPOINT (Fixes the Launch Timeout Error) ---
# This must be outside of any other function and use correct indentation.
@app.get("/")
def read_root():
"""Simple health check endpoint for Hugging Face to confirm the app is running."""
return {"status": "ok", "agent": "CodeBuddy DeepSeek-Coder-1.3B"}
# --- The Code Fixing API Endpoint ---
@app.post("/fix_code")
async def fix_code_endpoint(request: CodeRequest):
"""
Accepts code context and task, processes it with DeepSeek-Coder, and returns the fix.
"""
if model is None or tokenizer is None:
raise HTTPException(status_code=503, detail="AI Agent is still loading or failed to start.")
# --- CONSTRUCT AGENT PROMPT (DeepSeek Instruction Format) ---
instruction = (
f"You are Arya's CodeBuddy, an elite Full-Stack Software Engineer. Your only job is to analyze "
f"the user's request and provide the complete, fixed, or generated code. You must ONLY output "
f"a single, complete, and corrected Markdown code block. Use a friendly and encouraging tone.\n\n"
f"TASK: {request.user_prompt}\n\n"
f"CODE_CONTEXT:\n{request.code_context}"
)
# Format the prompt correctly for the model
prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"
# Tokenize and send tensors to CPU
model_inputs = tokenizer([prompt], return_tensors="pt").to(DEVICE)
try:
# --- GENERATE CODE (CPU Inference) ---
generated_ids = model.generate(
**model_inputs,
max_new_tokens=MAX_NEW_TOKENS,
do_sample=False, # Deterministic output
temperature=0.1, # Low temperature for reliable coding
)
# Decode the output
response_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
# Post-processing: Extract ONLY the text after the '### Response:' tag.
final_code_only = response_text.split("### Response:")[1].strip()
return {"fixed_code": final_code_only}
except Exception as e:
print(f"Generation error: {e}")
# Return a generic error to the user
raise HTTPException(status_code=500, detail="The DeepSeek CodeBuddy encountered a processing error.")