Spaces:

HugScriptKitty
/

auto-bot-971

Runtime error

HugScriptKitty commited on Oct 25

Commit

0fe9663

verified ·

1 Parent(s): c26aced

Deploy Gradio app with multiple files

Files changed (5) hide show

app.py ADDED Viewed

+import gradio as gr
+from models import generate_response
+from config import MODEL_NAME
+with gr.Blocks(title="MobileLLM-Pro Chat") as demo:
+    gr.Markdown("# MobileLLM-Pro Chat Interface")
+    gr.Markdown("Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
+    system_prompt = gr.Textbox(
+        label="System Prompt",
+        value="You are a helpful AI assistant.",
+        lines=2
+    )
+    temperature = gr.Slider(
+        label="Temperature",
+        minimum=0.0,
+        maximum=2.0,
+        value=0.7,
+        step=0.1
+    )
+    max_tokens = gr.Slider(
+        label="Max New Tokens",
+        minimum=10,
+        maximum=1000,
+        value=256,
+        step=10
+    )
+    top_p = gr.Slider(
+        label="Top P",
+        minimum=0.0,
+        maximum=1.0,
+        value=0.9,
+        step=0.05
+    )
+    chat_interface = gr.ChatInterface(
+        fn=lambda message, history, sys_prompt, temp, max_tok, top_p_val: generate_response(
+            message, history, sys_prompt, temp, max_tok, top_p_val
+        ),
+        additional_inputs=[system_prompt, temperature, max_tokens, top_p],
+        title="Chat with MobileLLM-Pro",
+        description="Adjust parameters and chat with the model."
+    )
+if __name__ == "__main__":
+    demo.launch()

config.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ MODEL_NAME = "facebook/MobileLLM-Pro"

models.py ADDED Viewed

+import spaces
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from config import MODEL_NAME
+import torch
+# Load model and tokenizer globally for efficiency
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
+@spaces.GPU(duration=60)
+def generate_response(message, history, system_prompt, temperature, max_tokens, top_p):
+    # Format conversation history
+    conversation = [{"role": "system", "content": system_prompt}]
+    for user_msg, assistant_msg in history:
+        conversation.append({"role": "user", "content": user_msg})
+        if assistant_msg:
+            conversation.append({"role": "assistant", "content": assistant_msg})
+    conversation.append({"role": "user", "content": message})
+    # Format for chat model
+    input_text = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
+    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            do_sample=temperature > 0,
+            pad_token_id=tokenizer.eos_token_id
+        )
+    response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
+    return response

requirements.txt ADDED Viewed

+gradio
+transformers
+torch
+accelerate
+huggingface-hub
+spaces

utils.py ADDED Viewed

+# Utility functions can be added here if needed
+# For example, any text processing helpers
+pass