|
|
from fastapi import FastAPI, HTTPException |
|
|
from pydantic import BaseModel |
|
|
import torch |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
import time |
|
|
|
|
|
|
|
|
MODEL_NAME = "deepseek-ai/deepseek-coder-1.3b-instruct" |
|
|
|
|
|
DEVICE = "cpu" |
|
|
MAX_NEW_TOKENS = 512 |
|
|
TORCH_DTYPE = torch.float32 |
|
|
|
|
|
|
|
|
model = None |
|
|
tokenizer = None |
|
|
|
|
|
|
|
|
class CodeRequest(BaseModel): |
|
|
"""Defines the expected input structure from the front-end website.""" |
|
|
user_prompt: str |
|
|
code_context: str |
|
|
|
|
|
|
|
|
app = FastAPI(title="CodeFlow AI Agent Backend - DeepSeek SLM") |
|
|
|
|
|
@app.on_event("startup") |
|
|
async def startup_event(): |
|
|
"""Load the DeepSeek SLM Model and Tokenizer ONLY ONCE when the server starts.""" |
|
|
global model, tokenizer |
|
|
print(f"--- Starting CodeFlow AI Agent (DeepSeek 1.3B) on {DEVICE} ---") |
|
|
start_time = time.time() |
|
|
|
|
|
try: |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) |
|
|
|
|
|
|
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_NAME, |
|
|
dtype=TORCH_DTYPE, |
|
|
device_map=DEVICE, |
|
|
trust_remote_code=True |
|
|
) |
|
|
model.eval() |
|
|
print(f"DeepSeek Model loaded successfully in {time.time() - start_time:.2f} seconds.") |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
print(f"ERROR: Failed to load DeepSeek model on CPU: {e}") |
|
|
|
|
|
|
|
|
raise RuntimeError(f"Model failed to load on startup: {e}") |
|
|
|
|
|
|
|
|
|
|
|
@app.get("/") |
|
|
def read_root(): |
|
|
"""Simple health check endpoint for Hugging Face to confirm the app is running.""" |
|
|
return {"status": "ok", "agent": "CodeBuddy DeepSeek-Coder-1.3B"} |
|
|
|
|
|
|
|
|
@app.post("/fix_code") |
|
|
async def fix_code_endpoint(request: CodeRequest): |
|
|
""" |
|
|
Accepts code context and task, processes it with DeepSeek-Coder, and returns the fix. |
|
|
""" |
|
|
if model is None or tokenizer is None: |
|
|
raise HTTPException(status_code=503, detail="AI Agent is still loading or failed to start.") |
|
|
|
|
|
|
|
|
instruction = ( |
|
|
f"You are Arya's CodeBuddy, an elite Full-Stack Software Engineer. Your only job is to analyze " |
|
|
f"the user's request and provide the complete, fixed, or generated code. You must ONLY output " |
|
|
f"a single, complete, and corrected Markdown code block. Use a friendly and encouraging tone.\n\n" |
|
|
f"TASK: {request.user_prompt}\n\n" |
|
|
f"CODE_CONTEXT:\n{request.code_context}" |
|
|
) |
|
|
|
|
|
|
|
|
prompt = f"### Instruction:\n{instruction}\n\n### Response:\n" |
|
|
|
|
|
|
|
|
model_inputs = tokenizer([prompt], return_tensors="pt").to(DEVICE) |
|
|
|
|
|
try: |
|
|
|
|
|
generated_ids = model.generate( |
|
|
**model_inputs, |
|
|
max_new_tokens=MAX_NEW_TOKENS, |
|
|
do_sample=False, |
|
|
temperature=0.1, |
|
|
) |
|
|
|
|
|
|
|
|
response_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
final_code_only = response_text.split("### Response:")[1].strip() |
|
|
|
|
|
return {"fixed_code": final_code_only} |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Generation error: {e}") |
|
|
|
|
|
raise HTTPException(status_code=500, detail="The DeepSeek CodeBuddy encountered a processing error.") |