Update main.py
Browse files
main.py
CHANGED
|
@@ -2,12 +2,15 @@ from fastapi import FastAPI
|
|
| 2 |
from pydantic import BaseModel, Field
|
| 3 |
from typing import List, Dict
|
| 4 |
from llama_cpp import Llama
|
|
|
|
|
|
|
| 5 |
|
| 6 |
# Load the Llama model with the specified path and configuration
|
| 7 |
llm = Llama.from_pretrained(
|
| 8 |
repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF", # Replace with the actual model repository ID
|
| 9 |
filename="Llama-3.2-3B-Instruct-Q8_0.gguf", # Replace with your actual model filename if necessary
|
| 10 |
n_ctx=4096,
|
|
|
|
| 11 |
n_threads=2,
|
| 12 |
)
|
| 13 |
|
|
|
|
| 2 |
from pydantic import BaseModel, Field
|
| 3 |
from typing import List, Dict
|
| 4 |
from llama_cpp import Llama
|
| 5 |
+
import os
|
| 6 |
+
os.environ["HF_HOME"] = "./cache"
|
| 7 |
|
| 8 |
# Load the Llama model with the specified path and configuration
|
| 9 |
llm = Llama.from_pretrained(
|
| 10 |
repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF", # Replace with the actual model repository ID
|
| 11 |
filename="Llama-3.2-3B-Instruct-Q8_0.gguf", # Replace with your actual model filename if necessary
|
| 12 |
n_ctx=4096,
|
| 13 |
+
cache_dir="./cache",
|
| 14 |
n_threads=2,
|
| 15 |
)
|
| 16 |
|