HugScriptKitty commited on
Commit
0fe9663
·
verified ·
1 Parent(s): c26aced

Deploy Gradio app with multiple files

Browse files
Files changed (5) hide show
  1. app.py +46 -0
  2. config.py +1 -0
  3. models.py +38 -0
  4. requirements.txt +6 -0
  5. utils.py +3 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from models import generate_response
3
+ from config import MODEL_NAME
4
+
5
+ with gr.Blocks(title="MobileLLM-Pro Chat") as demo:
6
+ gr.Markdown("# MobileLLM-Pro Chat Interface")
7
+ gr.Markdown("Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
8
+
9
+ system_prompt = gr.Textbox(
10
+ label="System Prompt",
11
+ value="You are a helpful AI assistant.",
12
+ lines=2
13
+ )
14
+ temperature = gr.Slider(
15
+ label="Temperature",
16
+ minimum=0.0,
17
+ maximum=2.0,
18
+ value=0.7,
19
+ step=0.1
20
+ )
21
+ max_tokens = gr.Slider(
22
+ label="Max New Tokens",
23
+ minimum=10,
24
+ maximum=1000,
25
+ value=256,
26
+ step=10
27
+ )
28
+ top_p = gr.Slider(
29
+ label="Top P",
30
+ minimum=0.0,
31
+ maximum=1.0,
32
+ value=0.9,
33
+ step=0.05
34
+ )
35
+
36
+ chat_interface = gr.ChatInterface(
37
+ fn=lambda message, history, sys_prompt, temp, max_tok, top_p_val: generate_response(
38
+ message, history, sys_prompt, temp, max_tok, top_p_val
39
+ ),
40
+ additional_inputs=[system_prompt, temperature, max_tokens, top_p],
41
+ title="Chat with MobileLLM-Pro",
42
+ description="Adjust parameters and chat with the model."
43
+ )
44
+
45
+ if __name__ == "__main__":
46
+ demo.launch()
config.py ADDED
@@ -0,0 +1 @@
 
 
1
+ MODEL_NAME = "facebook/MobileLLM-Pro"
models.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ from config import MODEL_NAME
4
+ import torch
5
+
6
+ # Load model and tokenizer globally for efficiency
7
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
9
+
10
+ @spaces.GPU(duration=60)
11
+ def generate_response(message, history, system_prompt, temperature, max_tokens, top_p):
12
+ # Format conversation history
13
+ conversation = [{"role": "system", "content": system_prompt}]
14
+
15
+ for user_msg, assistant_msg in history:
16
+ conversation.append({"role": "user", "content": user_msg})
17
+ if assistant_msg:
18
+ conversation.append({"role": "assistant", "content": assistant_msg})
19
+
20
+ conversation.append({"role": "user", "content": message})
21
+
22
+ # Format for chat model
23
+ input_text = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
24
+
25
+ inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
26
+
27
+ with torch.no_grad():
28
+ outputs = model.generate(
29
+ **inputs,
30
+ max_new_tokens=max_tokens,
31
+ temperature=temperature,
32
+ top_p=top_p,
33
+ do_sample=temperature > 0,
34
+ pad_token_id=tokenizer.eos_token_id
35
+ )
36
+
37
+ response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
38
+ return response
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch
4
+ accelerate
5
+ huggingface-hub
6
+ spaces
utils.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Utility functions can be added here if needed
2
+ # For example, any text processing helpers
3
+ pass