Spaces:

FractalAI
/

Research

Sleeping

App Files Files Community

Proff12 commited on Sep 24

Commit

e618a4f

verified ·

1 Parent(s): c57d186

Part 2

Browse files

Files changed (9) hide show

backend/app/__init__.py +0 -0
backend/app/main.py +172 -0
backend/requirements.txt +8 -0
frontend/index.html +13 -0
frontend/package.json +20 -0
frontend/src/App.jsx +90 -0
frontend/src/main.jsx +7 -0
frontend/src/styles.css +29 -0
frontend/vite.config.js +14 -0

backend/app/__init__.py ADDED Viewed

File without changes

backend/app/main.py ADDED Viewed

	@@ -0,0 +1,172 @@

+        import os
+        from typing import List, Literal, Optional
+        from fastapi import FastAPI, HTTPException
+        from fastapi.middleware.cors import CORSMiddleware
+        from fastapi.staticfiles import StaticFiles
+        from pydantic import BaseModel
+        from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+        import torch
+        APP_TITLE = "HF Chat (Fathom-R1-14B)"
+        APP_VERSION = "0.2.0"
+        # ---- Config via ENV ----
+        MODEL_ID = os.getenv("MODEL_ID", "FractalAIResearch/Fathom-R1-14B")
+        PIPELINE_TASK = os.getenv("PIPELINE_TASK", "text-generation")
+        MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "8192"))  # keep prompt reasonable
+        STATIC_DIR = os.getenv("STATIC_DIR", "/app/static")
+        ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "")
+        QUANTIZE = os.getenv("QUANTIZE", "auto")  # auto|4bit|8bit|none
+        app = FastAPI(title=APP_TITLE, version=APP_VERSION)
+        if ALLOWED_ORIGINS:
+            origins = [o.strip() for o in ALLOWED_ORIGINS.split(",") if o.strip()]
+            app.add_middleware(
+                CORSMiddleware,
+                allow_origins=origins,
+                allow_credentials=True,
+                allow_methods=["*"],
+                allow_headers=["*"],
+            )
+        class Message(BaseModel):
+            role: Literal["system", "user", "assistant"]
+            content: str
+        class ChatRequest(BaseModel):
+            messages: List[Message]
+            max_new_tokens: int = 512
+            temperature: float = 0.7
+            top_p: float = 0.95
+            repetition_penalty: Optional[float] = 1.0
+            stop: Optional[List[str]] = None
+        class ChatResponse(BaseModel):
+            reply: str
+            model: str
+        tokenizer = None
+        model = None
+        generator = None
+        def load_pipeline():
+            global tokenizer, model, generator
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            # Load tokenizer
+            tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
+            if tokenizer.pad_token is None and tokenizer.eos_token is not None:
+                tokenizer.pad_token = tokenizer.eos_token
+            # Determine load strategy
+            load_kwargs = {}
+            dtype = torch.bfloat16 if device == "cuda" else torch.float32
+            if device == "cuda":
+                # try quantization if requested
+                if QUANTIZE.lower() in ("4bit", "8bit", "auto"):
+                    try:
+                        import bitsandbytes as bnb  # noqa: F401
+                        if QUANTIZE.lower() == "8bit":
+                            load_kwargs.update(dict(load_in_8bit=True))
+                        else:
+                            # 4bit or auto (prefer 4bit)
+                            load_kwargs.update(dict(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16))
+                    except Exception:
+                        # bitsandbytes not available; fall back to full precision on GPU
+                        pass
+                load_kwargs.setdefault("torch_dtype", dtype)
+                load_kwargs.setdefault("device_map", "auto")
+            else:
+                # CPU fallback
+                load_kwargs.setdefault("torch_dtype", dtype)
+            model = AutoModelForCausalLM.from_pretrained(MODEL_ID, **load_kwargs)
+            generator = pipeline(
+                PIPELINE_TASK,
+                model=model,
+                tokenizer=tokenizer,
+                device_map=load_kwargs.get("device_map", None) or (0 if device == "cuda" else -1),
+            )
+        @app.on_event("startup")
+        def _startup():
+            load_pipeline()
+        def messages_to_prompt(messages: List[Message]) -> str:
+            """
+            Prefer tokenizer chat template (Qwen-based models ship one). Fallback to a simple transcript.
+            """
+            try:
+                # Convert to HF chat format: list of dicts with role/content
+                chat = [{"role": m.role, "content": m.content} for m in messages]
+                return tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
+            except Exception:
+                # Fallback formatting
+                parts = []
+                for m in messages:
+                    if m.role == "system":
+                        parts.append(f"System: {m.content}
+")
+                    elif m.role == "user":
+                        parts.append(f"User: {m.content}
+")
+                    else:
+                        parts.append(f"Assistant: {m.content}
+")
+                parts.append("Assistant:")
+                return "
+".join(parts)
+        def truncate_prompt(prompt: str, max_tokens: int) -> str:
+            ids = tokenizer(prompt, return_tensors="pt", truncation=False)["input_ids"][0]
+            if len(ids) <= max_tokens:
+                return prompt
+            trimmed = ids[-max_tokens:]
+            return tokenizer.decode(trimmed, skip_special_tokens=True)
+        @app.get("/api/health")
+        def health():
+            device = next(model.parameters()).device.type if model is not None else "N/A"
+            return {"status": "ok", "model": MODEL_ID, "task": PIPELINE_TASK, "device": device}
+        @app.post("/api/chat", response_model=ChatResponse)
+        def chat(req: ChatRequest):
+            if generator is None:
+                raise HTTPException(status_code=503, detail="Model not loaded")
+            if not req.messages:
+                raise HTTPException(status_code=400, detail="messages cannot be empty")
+            raw_prompt = messages_to_prompt(req.messages)
+            prompt = truncate_prompt(raw_prompt, MAX_INPUT_TOKENS)
+            gen_kwargs = {
+                "max_new_tokens": req.max_new_tokens,
+                "do_sample": req.temperature > 0,
+                "temperature": req.temperature,
+                "top_p": req.top_p,
+                "repetition_penalty": req.repetition_penalty,
+                "eos_token_id": tokenizer.eos_token_id,
+                "pad_token_id": tokenizer.pad_token_id,
+                "return_full_text": True,
+            }
+            if req.stop:
+                gen_kwargs["stop"] = req.stop
+            outputs = generator(prompt, **gen_kwargs)
+            if isinstance(outputs, list) and outputs and "generated_text" in outputs[0]:
+                full = outputs[0]["generated_text"]
+                reply = full[len(prompt):].strip() if full.startswith(prompt) else full
+            else:
+                reply = str(outputs)
+            if not reply:
+                reply = "(No response generated.)"
+            return ChatResponse(reply=reply, model=MODEL_ID)
+        # Serve frontend build (if present)
+        if os.path.isdir(STATIC_DIR):
+            app.mount("/", StaticFiles(directory=STATIC_DIR, html=True), name="static")

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi>=0.115,<1
+uvicorn[standard]>=0.30,<1
+transformers>=4.44.0
+accelerate>=0.33.0
+bitsandbytes>=0.43.0
+pydantic>=2.8,<3
+safetensors

frontend/index.html ADDED Viewed

	@@ -0,0 +1,13 @@

+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Fathom R1 Chat</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>

frontend/package.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "name": "hf-fathom-chat-frontend",
+  "version": "0.1.0",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "preview": "vite preview --port 5173"
+  },
+  "dependencies": {
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1"
+  },
+  "devDependencies": {
+    "@vitejs/plugin-react": "^4.3.1",
+    "vite": "^5.4.9"
+  }
+}

frontend/src/App.jsx ADDED Viewed

	@@ -0,0 +1,90 @@

+import React, { useEffect, useRef, useState } from 'react'
+export default function App() {
+  const [messages, setMessages] = useState([
+    { role: 'system', content: 'You are a helpful assistant that explains your reasoning clearly and concisely.' }
+  ])
+  const [input, setInput] = useState('')
+  const [loading, setLoading] = useState(false)
+  const [model, setModel] = useState('')
+  const endRef = useRef(null)
+  useEffect(() => {
+    endRef.current?.scrollIntoView({ behavior: 'smooth' })
+  }, [messages, loading])
+  async function sendChat(nextMessages) {
+    const res = await fetch('/api/chat', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        messages: nextMessages,
+        max_new_tokens: 512,
+        temperature: 0.7,
+        top_p: 0.95
+      })
+    })
+    if (!res.ok) {
+      const t = await res.text()
+      throw new Error(`API ${res.status}: ${t}`)
+    }
+    return res.json()
+  }
+  const onSend = async () => {
+    const text = input.trim()
+    if (!text || loading) return
+    const next = [...messages, { role: 'user', content: text }]
+    setMessages(next)
+    setInput('')
+    setLoading(true)
+    try {
+      const { reply, model } = await sendChat(next)
+      setModel(model)
+      setMessages([...next, { role: 'assistant', content: reply }])
+    } catch (e) {
+      setMessages([...next, { role: 'assistant', content: `(Error) ${e.message}` }])
+    } finally {
+      setLoading(false)
+    }
+  }
+  const onKeyDown = (e) => {
+    if (e.key === 'Enter' && !e.shiftKey) {
+      e.preventDefault()
+      onSend()
+    }
+  }
+  return (
+    <div className="app">
+      <header className="header">
+        <div className="brand">Fathom R1 Chat</div>
+        {model && <div className="model">{model}</div>}
+      </header>
+      <main className="chat">
+        {messages.filter(m => m.role !== 'system').map((m, i) => (
+          <div key={i} className={`bubble ${m.role}`}>
+            <div className="sender">{m.role === 'user' ? 'You' : 'Assistant'}</div>
+            <div className="content">{m.content}</div>
+          </div>
+        ))}
+        {loading && <div className="bubble assistant"><div className="content">Thinking…</div></div>}
+        <div ref={endRef} />
+      </main>
+      <footer className="composer">
+        <textarea
+          value={input}
+          onChange={(e) => setInput(e.target.value)}
+          onKeyDown={onKeyDown}
+          placeholder="Ask a question…"
+          rows={2}
+        />
+        <button onClick={onSend} disabled={loading || !input.trim()}>Send</button>
+      </footer>
+    </div>
+  )
+}

frontend/src/main.jsx ADDED Viewed

	@@ -0,0 +1,7 @@

+import React from 'react'
+import { createRoot } from 'react-dom/client'
+import App from './App.jsx'
+import './styles.css'
+createRoot(document.getElementById('root')).render(<App />)

frontend/src/styles.css ADDED Viewed

	@@ -0,0 +1,29 @@

+:root {
+  color-scheme: light dark;
+  --bg: #0b0f1a;
+  --panel: #0f172a;
+  --border: #1f2937;
+  --text: #e5e7eb;
+  --muted: #94a3b8;
+  --user: #2563eb;
+  --assistant: #374151;
+  --accent: #22c55e;
+}
+* { box-sizing: border-box; }
+html, body, #root { height: 100%; margin: 0; }
+body { background: var(--bg); color: var(--text); font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial; }
+.app { display: grid; grid-template-rows: auto 1fr auto; height: 100%; max-width: 900px; margin: 0 auto; }
+.header { display: flex; align-items: center; gap: 12px; padding: 12px 16px; border-bottom: 1px solid var(--border); background: var(--panel); }
+.brand { font-weight: 700; }
+.model { font-size: 12px; color: var(--muted); margin-left: auto; }
+.chat { padding: 16px; display: flex; flex-direction: column; gap: 12px; overflow-y: auto; }
+.bubble { max-width: 80%; padding: 10px 12px; border-radius: 12px; }
+.bubble .sender { font-size: 11px; color: var(--muted); margin-bottom: 4px; }
+.bubble .content { white-space: pre-wrap; line-height: 1.4; }
+.bubble.user { margin-left: auto; background: var(--user); color: white; }
+.bubble.assistant { margin-right: auto; background: var(--assistant); color: #f3f4f6; }
+.composer { display: flex; gap: 8px; padding: 12px; border-top: 1px solid var(--border); background: var(--panel); }
+textarea { flex: 1; resize: none; padding: 10px; border-radius: 8px; border: 1px solid #263144; background: #0b1220; color: var(--text); }
+button { padding: 10px 16px; border-radius: 8px; background: var(--accent); border: none; color: #062010; font-weight: 600; cursor: pointer; }
+button:disabled { opacity: 0.6; cursor: default; }

frontend/vite.config.js ADDED Viewed

	@@ -0,0 +1,14 @@

+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+export default defineConfig({
+  plugins: [react()],
+  server: {
+    port: 5173,
+    proxy: {
+      '/api': 'http://localhost:8000'
+    }
+  },
+  build: { outDir: 'dist' }
+})