Spaces:

fugthchat
/

fugthdes

Sleeping

App Files Files Community

fugthchat commited on Nov 7

Commit

4b99165

verified ·

1 Parent(s): a867634

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -74

app.py CHANGED Viewed

@@ -1,15 +1,16 @@
 import os
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from llama_cpp import Llama
 from fastapi.middleware.cors import CORSMiddleware
 from huggingface_hub import hf_hub_download
-import logging
-import threading
 from contextlib import asynccontextmanager
-# Set up logging
 logging.basicConfig(level=logging.INFO)
 # --- MODEL MAP (Using the smarter Phi-3) ---
@@ -28,46 +29,18 @@ MODEL_MAP = {
     }
 }
-# --- GLOBAL CACHE & LOCK ---
 llm_cache = {}
-model_lock = threading.Lock() # For loading models
-llm_lock = threading.Lock()   # For running generation
-# --- LIFESPAN FUNCTION ---
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    # This code runs ON STARTUP
-    logging.info("Server starting up... Acquiring lock to pre-load 'light' model (Phi-3).")
-    with model_lock:
-        get_llm_instance("light")
-    logging.info("Server is ready and 'light' model (Phi-3) is loaded.")
-    yield
-    # This code runs ON SHUTDOWN
-    logging.info("Server shutting down...")
-    llm_cache.clear()
-# Pass the lifespan function to FastAPI
-app = FastAPI(lifespan=lifespan)
-# --- CORS ---
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# --- Helper Function to Load Model ---
 def get_llm_instance(choice: str) -> Llama:
-    # Use the *model* lock for loading
     with model_lock:
         if choice not in MODEL_MAP:
             logging.error(f"Invalid model choice: {choice}")
             return None
         if choice in llm_cache:
             logging.info(f"Using cached model: {choice}")
             return llm_cache[choice]
@@ -99,16 +72,83 @@ def get_llm_instance(choice: str) -> Llama:
             logging.critical(f"CRITICAL ERROR: Failed to download/load model {filename}. Error: {e}", exc_info=True)
             return None
-# --- API Data Models (SIMPLIFIED) ---
-class StoryPrompt(BaseModel):
     prompt: str
     model_choice: str
-    feedback: str = ""
-    story_memory: str = ""
 # --- API Endpoints ---
 @app.get("/")
 def get_status():
     loaded_model = list(llm_cache.keys())[0] if llm_cache else "None"
     return {
         "status": "AI server is online",
@@ -116,40 +156,36 @@ def get_status():
         "models": list(MODEL_MAP.keys())
     }
-@app.post("/generate")
-async def generate_story(prompt: StoryPrompt):
     """
-    Main generation endpoint.
-    This is simple and stable.
     """
-    logging.info("Request received. Waiting to acquire LLM lock...")
-    # Use the *generation* lock
-    with llm_lock:
-        logging.info("Lock acquired. Processing request.")
-        try:
-            llm = get_llm_instance(prompt.model_choice)
-            if llm is None:
-                logging.error(f"Failed to get model for choice: {prompt.model_choice}")
-                return JSONResponse(status_code=503, content={"error": "The AI model is not available or failed to load."})
-            # We trust the frontend to build the full prompt
-            final_prompt = prompt.prompt
-            logging.info(f"Generating with {prompt.model_choice}...")
-            output = llm(
-                final_prompt,
-                max_tokens=512,
-                stop=["<|user|>", "<|endoftext|>", "user:"],
-                echo=False
-            )
-            generated_text = output["choices"][0]["text"].strip()
-            logging.info("Generation complete.")
-            return {"story_text": generated_text}
-        except Exception as e:
-            logging.error(f"An internal error occurred during generation: {e}", exc_info=True)
-            return JSONResponse(status_code=500, content={"error": "An unexpected error occurred."})
-        finally:
-            logging.info("Releasing LLM lock.")

 import os
+import uuid
+import threading
+import logging
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from llama_cpp import Llama
 from fastapi.middleware.cors import CORSMiddleware
 from huggingface_hub import hf_hub_download
 from contextlib import asynccontextmanager
+# --- Setup ---
 logging.basicConfig(level=logging.INFO)
 # --- MODEL MAP (Using the smarter Phi-3) ---
     }
 }
+# --- GLOBAL CACHE & LOCKS ---
 llm_cache = {}
+model_lock = threading.Lock() # Ensures only one model loads at a time
+llm_lock = threading.Lock() # Ensures only one generation job runs at a time
+JOBS = {} # Our in-memory "database" for background jobs
+# --- Helper: Load Model ---
 def get_llm_instance(choice: str) -> Llama:
     with model_lock:
         if choice not in MODEL_MAP:
             logging.error(f"Invalid model choice: {choice}")
             return None
         if choice in llm_cache:
             logging.info(f"Using cached model: {choice}")
             return llm_cache[choice]
             logging.critical(f"CRITICAL ERROR: Failed to download/load model {filename}. Error: {e}", exc_info=True)
             return None
+# --- Helper: The Background AI Task ---
+def run_generation_in_background(job_id: str, model_choice: str, prompt: str):
+    """
+    This function runs in a separate thread.
+    It performs the long-running AI generation.
+    """
+    global JOBS
+    try:
+        logging.info(f"Job {job_id}: Waiting to acquire LLM lock...")
+        with llm_lock:
+            logging.info(f"Job {job_id}: Lock acquired. Loading model.")
+            llm = get_llm_instance(model_choice)
+            if llm is None:
+                raise Exception("Model could not be loaded.")
+            JOBS[job_id]["status"] = "processing"
+            logging.info(f"Job {job_id}: Processing prompt...")
+            output = llm(
+                prompt,
+                max_tokens=512,
+                stop=["<|user|>", "<|endoftext|>", "user:"],
+                echo=False
+            )
+            generated_text = output["choices"][0]["text"].strip()
+            JOBS[job_id]["status"] = "complete"
+            JOBS[job_id]["result"] = generated_text
+            logging.info(f"Job {job_id}: Complete.")
+    except Exception as e:
+        logging.error(f"Job {job_id}: Failed. Error: {e}")
+        JOBS[job_id]["status"] = "error"
+        JOBS[job_id]["result"] = str(e)
+    finally:
+        logging.info(f"Job {job_id}: LLM lock released.")
+# --- FastAPI App & Lifespan ---
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    logging.info("Server starting up... Pre-loading 'light' model.")
+    get_llm_instance("light")
+    logging.info("Server is ready and 'light' model is loaded.")
+    yield
+    logging.info("Server shutting down...")
+    llm_cache.clear()
+app = FastAPI(lifespan=lifespan)
+# --- !!! THIS IS THE CORS FIX !!! ---
+# We are explicitly adding your GitHub Pages URL
+origins = [
+    "https://fugthchat.github.io", # <-- YOUR LIVE SITE
+    "http://localhost",           # For local testing
+    "http://127.0.0.1:5500"       # For local testing
+]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# --- END OF CORS FIX ---
+# --- API Data Models ---
+class SubmitPrompt(BaseModel):
     prompt: str
     model_choice: str
 # --- API Endpoints ---
 @app.get("/")
 def get_status():
+    """This is the 'wake up' and status check endpoint."""
     loaded_model = list(llm_cache.keys())[0] if llm_cache else "None"
     return {
         "status": "AI server is online",
         "models": list(MODEL_MAP.keys())
     }
+@app.post("/submit_job")
+async def submit_job(prompt: SubmitPrompt):
     """
+    Instantly accepts a job and starts it in the background.
     """
+    job_id = str(uuid.uuid4())
+    JOBS[job_id] = {"status": "pending", "result": None}
+    thread = threading.Thread(
+        target=run_generation_in_background,
+        args=(job_id, prompt.model_choice, prompt.prompt)
+    )
+    thread.start()
+    logging.info(f"Job {job_id} submitted.")
+    return {"job_id": job_id}
+@app.get("/get_job_status/{job_id}")
+async def get_job_status(job_id: str):
+    """
+    Allows the frontend to check on a job.
+    """
+    job = JOBS.get(job_id)
+    if job is None:
+        return JSONResponse(status_code=404, content={"error": "Job not found."})
+    if job["status"] in ["complete", "error"]:
+        result = job
+        del JOBS[job_id] # Clean up
+        return result
+    return {"status": job["status"]}