Spaces:

helperai
/

ai

Running

App Files Files Community

ai / main.py

helperai

Update main.py

04e0693 verified 9 days ago

raw

history blame contribute delete

4.52 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import time

	# --- MODEL CONSTANTS ---
	MODEL_NAME = "deepseek-ai/deepseek-coder-1.3b-instruct"
	# CRITICAL: Force model to use CPU for the free tier
	DEVICE = "cpu"
	MAX_NEW_TOKENS = 512 # Limit output size for speed and cost control
	TORCH_DTYPE = torch.float32 # Use standard float for maximum CPU compatibility

	# Global variables for model and tokenizer
	model = None
	tokenizer = None

	# --- API Data Structure ---
	class CodeRequest(BaseModel):
	"""Defines the expected input structure from the front-end website."""
	user_prompt: str # The user's request (e.g., "Fix the bug in this function")
	code_context: str # The block of code the user provided

	# --- FastAPI App Setup ---
	app = FastAPI(title="CodeFlow AI Agent Backend - DeepSeek SLM")

	@app.on_event("startup")
	async def startup_event():
	"""Load the DeepSeek SLM Model and Tokenizer ONLY ONCE when the server starts."""
	global model, tokenizer
	print(f"--- Starting CodeFlow AI Agent (DeepSeek 1.3B) on {DEVICE} ---")
	start_time = time.time()

	try:
	# Load the Tokenizer
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)

	# Load the Model
	# Using device_map="cpu" is essential for the free tier.
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	dtype=TORCH_DTYPE, # Changed the argument name from torch_dtype to dtype
	device_map=DEVICE,
	trust_remote_code=True
	)
	model.eval() # Set model to evaluation mode
	print(f"DeepSeek Model loaded successfully in {time.time() - start_time:.2f} seconds.")

	except Exception as e:
	# If the model fails to load, log the error and stop
	print(f"ERROR: Failed to load DeepSeek model on CPU: {e}")
	# Raising an exception will cause the Docker container to exit, which is the correct behavior
	# when a critical component (the model) fails to load.
	raise RuntimeError(f"Model failed to load on startup: {e}")

	# --- CRITICAL HEALTH CHECK ENDPOINT (Fixes the Launch Timeout Error) ---
	# This must be outside of any other function and use correct indentation.
	@app.get("/")
	def read_root():
	"""Simple health check endpoint for Hugging Face to confirm the app is running."""
	return {"status": "ok", "agent": "CodeBuddy DeepSeek-Coder-1.3B"}

	# --- The Code Fixing API Endpoint ---
	@app.post("/fix_code")
	async def fix_code_endpoint(request: CodeRequest):
	"""
	Accepts code context and task, processes it with DeepSeek-Coder, and returns the fix.
	"""
	if model is None or tokenizer is None:
	raise HTTPException(status_code=503, detail="AI Agent is still loading or failed to start.")

	# --- CONSTRUCT AGENT PROMPT (DeepSeek Instruction Format) ---
	instruction = (
	f"You are Arya's CodeBuddy, an elite Full-Stack Software Engineer. Your only job is to analyze "
	f"the user's request and provide the complete, fixed, or generated code. You must ONLY output "
	f"a single, complete, and corrected Markdown code block. Use a friendly and encouraging tone.\n\n"
	f"TASK: {request.user_prompt}\n\n"
	f"CODE_CONTEXT:\n{request.code_context}"
	)

	# Format the prompt correctly for the model
	prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"

	# Tokenize and send tensors to CPU
	model_inputs = tokenizer([prompt], return_tensors="pt").to(DEVICE)

	try:
	# --- GENERATE CODE (CPU Inference) ---
	generated_ids = model.generate(
	**model_inputs,
	max_new_tokens=MAX_NEW_TOKENS,
	do_sample=False, # Deterministic output
	temperature=0.1, # Low temperature for reliable coding
	)

	# Decode the output
	response_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

	# Post-processing: Extract ONLY the text after the '### Response:' tag.
	final_code_only = response_text.split("### Response:")[1].strip()

	return {"fixed_code": final_code_only}

	except Exception as e:
	print(f"Generation error: {e}")
	# Return a generic error to the user
	raise HTTPException(status_code=500, detail="The DeepSeek CodeBuddy encountered a processing error.")