Spaces:

Zenithwang
/

infly-OpenCoder-8B-Instruct

Sleeping

Zenithwang commited on Nov 13, 2024

Commit

25a236c

verified ·

1 Parent(s): 3f6e344

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -46,21 +46,20 @@ def predict(message, history):
     stop = StopOnTokens()
     # Formatting the input for the model.
-    messages =  system_prompt + sft_end_token.join([sft_end_token.join([f"\n{sft_start_token}{user_role}\n" + item[0], f"\n{sft_start_token}{assistant_role}\n" + item[1]])
-                        for item in history_transformer_format])
-    model_inputs = tokenizer([messages], return_tensors="pt").to(device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
-        max_new_tokens=512,
-        do_sample=True,
-        top_p= 0.75,
-        top_k= 60,
-        temperature=0.2,
-        num_beams=1,
-        stopping_criteria=StoppingCriteriaList([stop]),
-        repetition_penalty=1.1,
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()  # Starting the generation in a separate thread.
@@ -93,7 +92,7 @@ placeholder = """
 </div>
 """
-chatbot = gr.Chatbot(label='Sailor', placeholder=placeholder)
 with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     # gr.Markdown("""<center><font size=8>Sailor-Chat Bot⚓</center>""")
     gr.Markdown("""<p align="center"><img src="https://github.com/sail-sg/sailor-llm/raw/main/misc/wide_sailor_banner.jpg" style="height: 110px"/><p>""")

     stop = StopOnTokens()
     # Formatting the input for the model.
+    # messages =  system_prompt + sft_end_token.join([sft_end_token.join([f"\n{sft_start_token}{user_role}\n" + item[0], f"\n{sft_start_token}{assistant_role}\n" + item[1]])
+    #                     for item in history_transformer_format])
+    messages = [{user_role: item[0], assistant_role: item[1]} for item in history_transformer_format]
+    model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(device)
+    # model_inputs = tokenizer([messages], return_tensors="pt").to(device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         model_inputs,
         streamer=streamer,
+        max_new_tokens=1024,
+        do_sample=False,
+        # stopping_criteria=StoppingCriteriaList([stop])
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()  # Starting the generation in a separate thread.
 </div>
 """
+chatbot = gr.Chatbot(label='Sailor', placeholder=None)
 with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     # gr.Markdown("""<center><font size=8>Sailor-Chat Bot⚓</center>""")
     gr.Markdown("""<p align="center"><img src="https://github.com/sail-sg/sailor-llm/raw/main/misc/wide_sailor_banner.jpg" style="height: 110px"/><p>""")