from fastapi import FastAPI from transformers import pipeline from pydantic import BaseModel from fastapi.responses import HTMLResponse import uvicorn app = FastAPI() # Use Hugging Face model identifier (adjust it if necessary) model_name = "meta-llama/Llama-4-Maverick-17B-128E-Instruct" generator = pipeline("text-generation", model=model_name) class ChatRequest(BaseModel): message: str history: list[tuple[str, str]] system_message: str max_tokens: int temperature: float top_p: float @app.post("/chat") async def chat(request: ChatRequest): # Process the history and create messages list messages = [{"role": "system", "content": request.system_message}] for val in request.history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": request.message}) # Use Hugging Face pipeline for response generation response = generator(request.message, max_length=request.max_tokens, num_return_sequences=1) return {"response": response[0]["generated_text"]} @app.get("/", response_class=HTMLResponse) async def serve_ui(): return """