Spaces:

fugthchat
/

fugthdes

Sleeping

App Files Files Community

fugthchat commited on Nov 7

Commit

a867634

verified ·

1 Parent(s): c8d6e8a

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -112

app.py CHANGED Viewed

@@ -1,19 +1,18 @@
 import os
-import uuid
-import threading
-import logging
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from llama_cpp import Llama
 from fastapi.middleware.cors import CORSMiddleware
 from huggingface_hub import hf_hub_download
 from contextlib import asynccontextmanager
-# --- Setup ---
 logging.basicConfig(level=logging.INFO)
-# --- Model Map (Using the smarter Phi-3) ---
 MODEL_MAP = {
     "light": {
         "repo_id": "microsoft/Phi-3-mini-4k-instruct-gguf",
@@ -29,17 +28,41 @@ MODEL_MAP = {
     }
 }
-# --- Global Caches & Locks ---
 llm_cache = {}
-model_lock = threading.Lock() # Ensures only one model loads at a time
-llm_lock = threading.Lock() # Ensures only one generation job runs at a time
-# This is our new in-memory "database" for jobs
-# It will hold the status and results of background tasks
-JOBS = {}
-# --- Helper: Load Model ---
 def get_llm_instance(choice: str) -> Llama:
     with model_lock:
         if choice not in MODEL_MAP:
             logging.error(f"Invalid model choice: {choice}")
@@ -76,76 +99,16 @@ def get_llm_instance(choice: str) -> Llama:
             logging.critical(f"CRITICAL ERROR: Failed to download/load model {filename}. Error: {e}", exc_info=True)
             return None
-# --- Helper: The Background AI Task ---
-def run_generation_in_background(job_id: str, model_choice: str, prompt: str):
-    """
-    This function runs in a separate thread.
-    It performs the long-running AI generation.
-    """
-    global JOBS
-    try:
-        # Acquire the lock. If another job is running, this will wait.
-        logging.info(f"Job {job_id}: Waiting to acquire LLM lock...")
-        with llm_lock:
-            logging.info(f"Job {job_id}: Lock acquired. Loading model.")
-            llm = get_llm_instance(model_choice)
-            if llm is None:
-                raise Exception("Model could not be loaded.")
-            JOBS[job_id]["status"] = "processing"
-            logging.info(f"Job {job_id}: Processing prompt...")
-            output = llm(
-                prompt,
-                max_tokens=512,
-                stop=["<|user|>", "<|endoftext|>", "user:"],
-                echo=False
-            )
-            generated_text = output["choices"][0]["text"].strip()
-            # Save the result and mark as complete
-            JOBS[job_id]["status"] = "complete"
-            JOBS[job_id]["result"] = generated_text
-            logging.info(f"Job {job_id}: Complete.")
-    except Exception as e:
-        logging.error(f"Job {job_id}: Failed. Error: {e}")
-        JOBS[job_id]["status"] = "error"
-        JOBS[job_id]["result"] = str(e)
-    finally:
-        # The lock is automatically released by the 'with' statement
-        logging.info(f"Job {job_id}: LLM lock released.")
-# --- FastAPI App & Lifespan ---
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    logging.info("Server starting up... Pre-loading 'light' model.")
-    get_llm_instance("light")
-    logging.info("Server is ready and 'light' model is loaded.")
-    yield
-    logging.info("Server shutting down...")
-    llm_cache.clear()
-app = FastAPI(lifespan=lifespan)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# --- API Data Models ---
-class SubmitPrompt(BaseModel):
     prompt: str
     model_choice: str
 # --- API Endpoints ---
 @app.get("/")
 def get_status():
-    """This is the 'wake up' and status check endpoint."""
     loaded_model = list(llm_cache.keys())[0] if llm_cache else "None"
     return {
         "status": "AI server is online",
@@ -153,42 +116,40 @@ def get_status():
         "models": list(MODEL_MAP.keys())
     }
-@app.post("/submit_job")
-async def submit_job(prompt: SubmitPrompt):
     """
-    NEW: Instantly accepts a job and starts it in the background.
     """
-    job_id = str(uuid.uuid4())
-    # Store the job as "pending"
-    JOBS[job_id] = {"status": "pending", "result": None}
-    # Start the background thread
-    thread = threading.Thread(
-        target=run_generation_in_background,
-        args=(job_id, prompt.model_choice, prompt.prompt)
-    )
-    thread.start()
-    logging.info(f"Job {job_id} submitted.")
-    # Return the Job ID to the user immediately
-    return {"job_id": job_id}
-@app.get("/get_job_status/{job_id}")
-async def get_job_status(job_id: str):
-    """
-    NEW: Allows the frontend to check on a job.
-    """
-    job = JOBS.get(job_id)
-    if job is None:
-        return JSONResponse(status_code=404, content={"error": "Job not found."})
-    # If the job is done, send the result and remove it from memory
-    if job["status"] in ["complete", "error"]:
-        result = job
-        del JOBS[job_id] # Clean up
-        return result
-    # If not done, just send the current status
-    return {"status": job["status"]}

 import os
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from llama_cpp import Llama
 from fastapi.middleware.cors import CORSMiddleware
 from huggingface_hub import hf_hub_download
+import logging
+import threading
 from contextlib import asynccontextmanager
+# Set up logging
 logging.basicConfig(level=logging.INFO)
+# --- MODEL MAP (Using the smarter Phi-3) ---
 MODEL_MAP = {
     "light": {
         "repo_id": "microsoft/Phi-3-mini-4k-instruct-gguf",
     }
 }
+# --- GLOBAL CACHE & LOCK ---
 llm_cache = {}
+model_lock = threading.Lock() # For loading models
+llm_lock = threading.Lock()   # For running generation
+# --- LIFESPAN FUNCTION ---
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # This code runs ON STARTUP
+    logging.info("Server starting up... Acquiring lock to pre-load 'light' model (Phi-3).")
+    with model_lock:
+        get_llm_instance("light")
+    logging.info("Server is ready and 'light' model (Phi-3) is loaded.")
+    yield
+    # This code runs ON SHUTDOWN
+    logging.info("Server shutting down...")
+    llm_cache.clear()
+# Pass the lifespan function to FastAPI
+app = FastAPI(lifespan=lifespan)
+# --- CORS ---
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# --- Helper Function to Load Model ---
 def get_llm_instance(choice: str) -> Llama:
+    # Use the *model* lock for loading
     with model_lock:
         if choice not in MODEL_MAP:
             logging.error(f"Invalid model choice: {choice}")
             logging.critical(f"CRITICAL ERROR: Failed to download/load model {filename}. Error: {e}", exc_info=True)
             return None
+# --- API Data Models (SIMPLIFIED) ---
+class StoryPrompt(BaseModel):
     prompt: str
     model_choice: str
+    feedback: str = ""
+    story_memory: str = ""
 # --- API Endpoints ---
 @app.get("/")
 def get_status():
     loaded_model = list(llm_cache.keys())[0] if llm_cache else "None"
     return {
         "status": "AI server is online",
         "models": list(MODEL_MAP.keys())
     }
+@app.post("/generate")
+async def generate_story(prompt: StoryPrompt):
     """
+    Main generation endpoint.
+    This is simple and stable.
     """
+    logging.info("Request received. Waiting to acquire LLM lock...")
+    # Use the *generation* lock
+    with llm_lock:
+        logging.info("Lock acquired. Processing request.")
+        try:
+            llm = get_llm_instance(prompt.model_choice)
+            if llm is None:
+                logging.error(f"Failed to get model for choice: {prompt.model_choice}")
+                return JSONResponse(status_code=503, content={"error": "The AI model is not available or failed to load."})
+            # We trust the frontend to build the full prompt
+            final_prompt = prompt.prompt
+            logging.info(f"Generating with {prompt.model_choice}...")
+            output = llm(
+                final_prompt,
+                max_tokens=512,
+                stop=["<|user|>", "<|endoftext|>", "user:"],
+                echo=False
+            )
+            generated_text = output["choices"][0]["text"].strip()
+            logging.info("Generation complete.")
+            return {"story_text": generated_text}
+        except Exception as e:
+            logging.error(f"An internal error occurred during generation: {e}", exc_info=True)
+            return JSONResponse(status_code=500, content={"error": "An unexpected error occurred."})
+        finally:
+            logging.info("Releasing LLM lock.")