Spaces:
Sleeping
Sleeping
| from pydantic import BaseModel | |
| from huggingface_hub import hf_hub_download | |
| import logging | |
| from typing import ( | |
| List, | |
| Optional, | |
| Literal, | |
| ) | |
| MODEL_ARGS = { | |
| "llama3.2": dict( | |
| repo_id="hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF", | |
| filename="llama-3.2-3b-instruct-q8_0.gguf", | |
| ), | |
| "falcon-mamba": dict( | |
| repo_id="bartowski/falcon-mamba-7b-GGUF", | |
| filename="falcon-mamba-7b-Q4_K_M.gguf", | |
| ), | |
| "mistral-nemo": dict( | |
| repo_id="lmstudio-community/Mistral-Nemo-Instruct-2407-GGUF", | |
| filename="Mistral-Nemo-Instruct-2407-Q4_K_M.gguf", | |
| ), | |
| } | |
| logger = logging.getLogger("uvicorn.error") | |
| for model_arg in MODEL_ARGS.values(): | |
| logger.info(f"Checking for {model_arg['repo_id']}") | |
| hf_hub_download(**model_arg) | |
| class Message(BaseModel): | |
| role: str | |
| content: str | |
| class ChatRequest(BaseModel): | |
| chat_history: List[Message] | |
| model: Literal["llama3.2", "falcon-mamba", "mistral-nemo"] = "llama3.2" | |
| max_tokens: Optional[int] = 65536 | |
| temperature: float = 0.8 | |
| top_p: float = 0.95 | |
| min_p: float = 0.05 | |
| typical_p: float = 1.0 | |
| frequency_penalty: float = 0.0 | |
| presence_penalty: float = 0.0 | |
| repeat_penalty: float = 1.0 | |
| top_k: int = 40 | |
| seed: Optional[int] = None | |
| tfs_z: float = 1.0 | |
| mirostat_mode: int = 0 | |
| mirostat_tau: float = 5.0 | |
| mirostat_eta: float = 0.1 | |
| # logprobs: Optional[int] = None | |
| # logit_bias: Optional[Dict[str, float]] = None | |