Spaces:

Do0rMaMu
/

phi3-sql

Runtime error

Do0rMaMu commited on Jun 25, 2024

Commit

73bce0b

verified ·

1 Parent(s): f2e8041

Create main.py

Files changed (1) hide show

main.py ADDED Viewed

+from fastapi import FastAPI
+from pydantic import BaseModel
+# Assuming Llama class has been correctly imported and set up
+from llama_cpp import Llama
+# Model loading with specified path and configuration
+llm = Llama(
+    model_path="Anoop03031988/Phi-3-mini-4k-instruct-text-to-sql-GGUF",  # Update the path as necessary
+    n_ctx=4096,       # Maximum number of tokens for context (input + output)
+    n_threads=2,      # Number of CPU cores used
+)
+# Pydantic object for validation
+class Validation(BaseModel):
+    user_prompt: str
+    system_prompt: str
+    max_tokens: int = 1024
+    temperature: float = 0.01
+# FastAPI application initialization
+app = FastAPI()
+# Endpoint for generating responses
+@app.post("/generate_response")
+async def generate_response(item: Validation):
+    # Construct the complete prompt using the given system and user prompts
+    prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|> \n
+{ item.system_prompt }<|eot_id|> \n <|start_header_id|>user<|end_header_id|>
+{ item.user_prompt }<|eot_id|> \n <|start_header_id|>assistant<|end_header_id|>"""
+    # Call the Llama model to generate a response
+    output = llm(prompt, max_tokens = item.max_tokens,temperature = item.temperature, echo=True)
+    # Extract and return the text from the response
+    return output['choices'][0]['text']