Spaces:

vetrovvlad
/

protobench

Sleeping

App Files Files Community

vtrv.vls commited on Jul 7, 2024

Commit

4fa4c7b

1 Parent(s): 9540a56

Functionality rework

Browse files

Files changed (3) hide show

app.py +92 -59
models.py +114 -5
utils.py +42 -68

app.py CHANGED Viewed

@@ -2,70 +2,92 @@ import gradio
 import argparse
 import os
 import boto3
-from datetime import datetime
 import pandas as pd
 from copy import copy
-from utils import generate, send_to_s3
-from models import get_tinyllama, get_qwen2ins1b, response_tinyllama, response_qwen2ins1b
-from constants import css, js_code, js_light
-MERA_table = None
-TINYLLAMA = None
-QWEN2INS1B = None
-RIGHT_MODEL = None
-LEFT_MODEL = None
 S3_SESSION = None
-def giga_gen(content, chat_history):
-    chat_history.append([content])
-    res = generate(chat_history,'auth_token.json')
-    chat_history[-1].append(res)
-    send_to_s3(res, f'protobench/giga_{str(datetime.now()).replace(" ", "_")}.json', S3_SESSION)
-    return '', chat_history
-def tiny_gen(content, chat_history):
-    chat_history.append([content])
-    res = response_tinyllama(TINY_LLAMA, chat_history)
-    chat_history[-1].append(res)
-    send_to_s3(res, f'protobench/tiny_{str(datetime.now()).replace(" ", "_")}.json', S3_SESSION)
-    return '', chat_history
-def qwen_gen(content, chat_history):
-    chat_history.append([content])
-    res = response_qwen2ins1b(QWEN2INS1B, chat_history)
-    chat_history[-1].append(res)
-    send_to_s3(res, f'protobench/tiny_{str(datetime.now()).replace(" ", "_")}.json', S3_SESSION)
-    return '', chat_history
-def model_gen(content, chat_history, model_type: str):
     if content is None:
         return '', []
     if len(content) == 0:
         return '', []
-    gen = MODEL_LIB[model_type]
-    return gen(content, chat_history)
-def model_regen(content, chat_history, model_type: str):
     if chat_history is None:
         return '', []
-    if len(chat_history) == 0:
-        return '', []
-    gen = MODEL_LIB[model_type]
-    msg = copy(chat_history[-1][0])
-    return gen(msg, chat_history[:-1])
-def clear_chat():
-    return '', []
-MODEL_LIB = {'RUBASE': giga_gen, 'TINYLLAMA': tiny_gen, 'QWEN2INS1B': qwen_gen}
 def tab_arena():
     with gradio.Row():
         with gradio.Column():
             model_left = gradio.Dropdown(["TINYLLAMA", "QWEN2INS1B", "RUBASE"], value="TINYLLAMA", interactive=True, multiselect=False, label="Left model")
@@ -85,7 +107,7 @@ def tab_arena():
     with gradio.Row():
         with gradio.Accordion("Parameters", open=False):
-            context = gradio.Checkbox(label="No context", value=False)
             top_p = gradio.Slider(label='Top P', minimum=0, maximum=1, value=1, step=0.05, interactive=True)
             temp = gradio.Slider(label='Temperature', minimum=0, maximum=1, value=0.7, step=0.05, interactive=True)
             max_tokens = gradio.Slider(label='Max ouput tokens', minimum=1, maximum=2048, value=512, step=1, interactive=True)
@@ -94,14 +116,30 @@ def tab_arena():
         clear = gradio.ClearButton([msg, chatbot_left, chatbot_right], value='Clear history')
         regen_left = gradio.Button(value='Regenerate left answer')
         regen_right = gradio.Button(value='Regenerate right answer')
-        regen_left.click(model_regen, [msg, chatbot_left, model_left], [msg, chatbot_left])
-        regen_right.click(model_regen, [msg, chatbot_right, model_right], [msg, chatbot_right])
     with gradio.Blocks():
         model_left.change(clear_chat, [], [msg, chatbot_left])
         model_right.change(clear_chat, [], [msg, chatbot_right])
-        msg.submit(model_gen, [msg, chatbot_left, model_left], [msg, chatbot_left])
-        msg.submit(model_gen, [msg, chatbot_right, model_right], [msg, chatbot_right])
         # with gradio.Column():
         #     gradio.ChatInterface(
@@ -161,8 +199,6 @@ def tab_leaderboard():
 with open("test.md", "r") as f:
     TEST_MD = f.read()
-available_models = ["GigaChat", ""] # list(model_info.keys())
 def build_demo():
     # global original_dfs, available_models, gpt4t_dfs, haiku_dfs, llama_dfs
@@ -206,9 +242,6 @@ if __name__ == "__main__":
     # data_load(args.result_file)
     # TYPES = ["number", "markdown", "number"]
-    TINY_LLAMA =  get_tinyllama()
-    QWEN2INS1B = get_qwen2ins1b()
     try:
         session = boto3.session.Session()
         S3_SESSION = session.client(
@@ -220,8 +253,8 @@ if __name__ == "__main__":
     except:
         print('Failed to start s3 session')
-    demo = build_demo()
-    demo.launch(share=args.share, height=3000, width="110%") # share=args.share
     # demo = gradio.Interface(fn=gen, inputs="text", outputs="text")
     # demo.launch()

 import argparse
 import os
 import boto3
 import pandas as pd
 from copy import copy
+import queue
+from constants import css, js_code, js_light
+from utils import model_response, clear_chat
+from models import get_tinyllama, get_qwen2ins1b, GigaChat, response_gigachat, response_qwen2ins1b, response_tinyllama
+INIT_MODELS = dict()
 S3_SESSION = None
+CURRENT_MODELS = queue.LifoQueue()
+MODEL_LIB = {'TINYLLAMA': get_tinyllama, "QWEN2INS1B": get_qwen2ins1b, "RUBASE": GigaChat.get_giga}
+GEN_LIB = {'TINYLLAMA': response_tinyllama, "QWEN2INS1B": response_qwen2ins1b, "RUBASE": response_gigachat}
+def model_gen(
+        content,
+        chat_history,
+        model_name: str,
+        top_p,
+        temp,
+        max_tokens,
+        no_context=False
+        ):
+    global INIT_MODELS, S3_SESSION, GEN_LIB, MODEL_LIB
+    model_manager(model_name, MODEL_LIB, 3)
     if content is None:
         return '', []
     if len(content) == 0:
         return '', []
+    chat_history = chat_history[-1] if no_context else chat_history
+    return model_response(
+        content,
+        chat_history,
+        S3_SESSION,
+        INIT_MODELS,
+        GEN_LIB,
+        model_name,
+        {"top_p": top_p, "temperature": temp, "max_tokens": max_tokens}
+        )
+def model_regen(
+        content,
+        chat_history,
+        model_name: str,
+        top_p,
+        temp,
+        max_tokens,
+        no_context=False
+        ):
+    global INIT_MODELS, S3_SESSION, GEN_LIB, MODEL_LIB
+    model_manager(model_name, MODEL_LIB, 3)
     if chat_history is None:
         return '', []
+    chat_history = chat_history[-1] if no_context else chat_history
+    content = copy(chat_history[-1][0])
+    return model_response(
+        content,
+        chat_history[:-1],
+        S3_SESSION,
+        INIT_MODELS,
+        GEN_LIB,
+        model_name,
+        {"top_p": top_p, "temperature": temp, "max_tokens": max_tokens}
+        )
+def model_manager(
+        add_model,
+        model_lib,
+        max_models=3
+        ):
+    global INIT_MODELS, CURRENT_MODELS
+    while CURRENT_MODELS.qsize() >= max_models:
+        model_del = CURRENT_MODELS.get()
+        INIT_MODELS[model_del] = None
+    CURRENT_MODELS.put(add_model)
+    INIT_MODELS[add_model] = model_lib[add_model]()
 def tab_arena():
+    global S3_SESSION, GEN_LIB, MODEL_LIB, INIT_MODELS, CURRENT_MODELS
     with gradio.Row():
         with gradio.Column():
             model_left = gradio.Dropdown(["TINYLLAMA", "QWEN2INS1B", "RUBASE"], value="TINYLLAMA", interactive=True, multiselect=False, label="Left model")
     with gradio.Row():
         with gradio.Accordion("Parameters", open=False):
+            no_context = gradio.Checkbox(label="No context", value=False)
             top_p = gradio.Slider(label='Top P', minimum=0, maximum=1, value=1, step=0.05, interactive=True)
             temp = gradio.Slider(label='Temperature', minimum=0, maximum=1, value=0.7, step=0.05, interactive=True)
             max_tokens = gradio.Slider(label='Max ouput tokens', minimum=1, maximum=2048, value=512, step=1, interactive=True)
         clear = gradio.ClearButton([msg, chatbot_left, chatbot_right], value='Clear history')
         regen_left = gradio.Button(value='Regenerate left answer')
         regen_right = gradio.Button(value='Regenerate right answer')
+        regen_left.click(
+            model_gen,
+            [msg, chatbot_left, model_left, top_p, temp, max_tokens, no_context],
+             [msg, chatbot_left]
+             )
+        regen_right.click(
+            model_gen,
+            [msg, chatbot_right, model_right, top_p, temp, max_tokens, no_context],
+             [msg, chatbot_right]
+             )
     with gradio.Blocks():
         model_left.change(clear_chat, [], [msg, chatbot_left])
         model_right.change(clear_chat, [], [msg, chatbot_right])
+        msg.submit(
+            model_gen,
+            [msg, chatbot_left, model_left, top_p, temp, max_tokens, no_context],
+             [msg, chatbot_left]
+             )
+        msg.submit(
+            model_gen,
+            [msg, chatbot_right, model_right, top_p, temp, max_tokens, no_context],
+             [msg, chatbot_right]
+             )
         # with gradio.Column():
         #     gradio.ChatInterface(
 with open("test.md", "r") as f:
     TEST_MD = f.read()
 def build_demo():
     # global original_dfs, available_models, gpt4t_dfs, haiku_dfs, llama_dfs
     # data_load(args.result_file)
     # TYPES = ["number", "markdown", "number"]
     try:
         session = boto3.session.Session()
         S3_SESSION = session.client(
     except:
         print('Failed to start s3 session')
+    app = build_demo()
+    app.launch(share=args.share, height=3000, width="110%") # share=args.share
     # demo = gradio.Interface(fn=gen, inputs="text", outputs="text")
     # demo.launch()

models.py CHANGED Viewed

@@ -1,6 +1,77 @@
 import torch
 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 def get_tinyllama():
     tinyllama = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")
     return tinyllama
@@ -17,9 +88,13 @@ def get_qwen2ins1b():
 def response_tinyllama(
         model=None,
-        messages=None
         ):
     messages_dict = [
         {
             "role": "system",
@@ -32,13 +107,20 @@ def response_tinyllama(
             messages_dict.append({'role': 'assistant', 'content': step[1]})
     prompt = model.tokenizer.apply_chat_template(messages_dict, tokenize=False, add_generation_prompt=True)
-    outputs = model(prompt, max_new_tokens=64, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
     return outputs[0]['generated_text'].split('<|assistant|>')[1].strip()
 def response_qwen2ins1b(
         model=None,
-        messages=None
         ):
     messages_dict = [
@@ -61,7 +143,10 @@ def response_qwen2ins1b(
     generated_ids = model['model'].generate(
         model_inputs.input_ids,
-        max_new_tokens=512
     )
     generated_ids = [
         output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
@@ -69,4 +154,28 @@ def response_qwen2ins1b(
     response = model['tokenizer'].batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return response # outputs[0]['generated_text'] #.split('<|assistant|>')[1].strip()

+import requests
+import json
 import torch
+import os
+from datetime import datetime, timedelta
 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
+class GigaChat:
+    def __init__(self, auth_file='auth_token.json'):
+        # url = "https://ngw.devices.sberbank.ru:9443/api/v2/oauth"
+        self.auth_url = "https://api.mlrnd.ru/api/v2/oauth"
+        # url = "https://gigachat.devices.sberbank.ru/api/v1/chat/completions"
+        self.gen_url = "https://api.mlrnd.ru/api/v1/chat/completions"
+        # payload='scope=GIGACHAT_API_CORP'
+        self.payload='scope=API_v1'
+        self.auth_file = None
+        if self.auth_file is None or not os.path.isfile(auth_file):
+            self.gen_giga_token(auth_file)
+    @classmethod
+    def get_giga(cls, auth_file='auth_token.json'):
+        return cls(auth_file)
+    def gen_giga_token(self, auth_file):
+        headers = {
+            'Content-Type': 'application/x-www-form-urlencoded',
+            'Accept': 'application/json',
+            'RqUID': '1b519047-0ee9-4b63-8599-e5ffc9c77e72',
+            'Authorization': os.getenv('GIGACHAT_API_TOKEN')
+        }
+        response = requests.request(
+            "POST",
+            self.auth_url,
+            headers=headers,
+            data=self.payload,
+            verify=False
+            )
+        with open(auth_file, 'w') as f:
+            json.dump(json.loads(response.text), f, ensure_ascii=False)
+    def get_text(self, content, auth_token=None, params=None):
+        if params is None:
+            params = dict()
+        payload = json.dumps(
+            {
+                 "model": "Test_model",
+                 "messages": content,
+                 "temperature": params.get("temperature") if params.get("temperature") else 1,
+                 "top_p": params.get("top_p") if params.get("top_p") else 0.9,
+                 "n": params.get("n") if params.get("n") else 1,
+                 "stream": False,
+                 "max_tokens": params.get("max_tokens") if params.get("max_tokens") else 512,
+                 "repetition_penalty":  params.get("repetition_penalty") if params.get("repetition_penalty") else 1
+            }
+        )
+        headers = {
+            'Content-Type': 'application/json',
+            'Accept': 'application/json',
+            'Authorization': f'Bearer {auth_token}'
+        }
+        response = requests.request("POST", self.gen_url, headers=headers, data=payload, verify=False)
+        return json.loads(response.text)
 def get_tinyllama():
     tinyllama = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")
     return tinyllama
 def response_tinyllama(
         model=None,
+        messages=None,
+        params=None
         ):
+    if params is None:
+        params = dict()
     messages_dict = [
         {
             "role": "system",
             messages_dict.append({'role': 'assistant', 'content': step[1]})
     prompt = model.tokenizer.apply_chat_template(messages_dict, tokenize=False, add_generation_prompt=True)
+    outputs = model(
+        prompt,
+        max_new_tokens = params.get("max_tokens") if params.get("max_tokens") else 512,
+        temperature = params.get("temperature") if params.get("temperature") else 1,
+        top_p = params.get("top_p") if params.get("top_p") else 0.9,
+        repetition_penalty = params.get("repetition_penalty") if params.get("repetition_penalty") else 1
+        )
     return outputs[0]['generated_text'].split('<|assistant|>')[1].strip()
 def response_qwen2ins1b(
         model=None,
+        messages=None,
+        params=None
         ):
     messages_dict = [
     generated_ids = model['model'].generate(
         model_inputs.input_ids,
+        max_new_tokens = params.get("max_tokens") if params.get("max_tokens") else 512,
+        temperature = params.get("temperature") if params.get("temperature") else 1,
+        top_p = params.get("top_p") if params.get("top_p") else 0.9,
+        repetition_penalty = params.get("repetition_penalty") if params.get("repetition_penalty") else 1
     )
     generated_ids = [
         output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
     response = model['tokenizer'].batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return response # outputs[0]['generated_text'] #.split('<|assistant|>')[1].strip()
+def response_gigachat(
+        model=None,
+        messages=None,
+        model_params=None
+        ): # content=None, auth_file=None
+    with open(model.auth_file) as f:
+        auth_token = json.load(f)
+    if datetime.fromtimestamp(auth_token['expires_at']/1000) <= datetime.now() - timedelta(seconds=60):
+        model.gen_giga_token(model.auth_file)
+        with open(model.auth_file) as f:
+            auth_token = json.load(f)
+    content = []
+    for step in messages:
+        content.append({'role': 'user', 'content': step[0]})
+        if len(step) >= 2:
+            content.append({'role': 'assistant', 'content': step[1]})
+    resp = model.get_text(content, auth_token['access_token'], model_params)
+    return resp["choices"][0]["message"]["content"]

utils.py CHANGED Viewed

@@ -1,76 +1,50 @@
 import requests
 import json
 import os
-from datetime import datetime, timedelta
 import boto3
-def gen_auth_token(auth_file):
-    # url = "https://ngw.devices.sberbank.ru:9443/api/v2/oauth"
-    url = "https://api.mlrnd.ru/api/v2/oauth"
-    # payload='scope=GIGACHAT_API_CORP'
-    payload='scope=API_v1'
-    headers = {
-    'Content-Type': 'application/x-www-form-urlencoded',
-    'Accept': 'application/json',
-    'RqUID': '1b519047-0ee9-4b63-8599-e5ffc9c77e72',
-    'Authorization': os.getenv('GIGACHAT_API_TOKEN')
-    }
-    response = requests.request("POST", url, headers=headers, data=payload, verify=False)
-    with open(auth_file, 'w') as f:
-        json.dump(json.loads(response.text), f, ensure_ascii=False)
-def get_text(content, auth_token=None):
-    # url = "https://gigachat.devices.sberbank.ru/api/v1/chat/completions"
-    url = "https://api.mlrnd.ru/api/v1/chat/completions"
-    payload = json.dumps({
-    "model": "Test_model",
-    "messages": content,
-    "temperature": 1,
-    "top_p": 0.1,
-    "n": 1,
-    "stream": False,
-    "max_tokens": 512,
-    "repetition_penalty": 1
-    })
-    headers = {
-    'Content-Type': 'application/json',
-    'Accept': 'application/json',
-    'Authorization': f'Bearer {auth_token}'
-    }
-    response = requests.request("POST", url, headers=headers, data=payload, verify=False)
-    return json.loads(response.text)
-def generate(content=None, auth_file=None):
-    if auth_file is None or not os.path.isfile(auth_file):
-        gen_auth_token(auth_file)
-    with open(auth_file) as f:
-        auth_token = json.load(f)
-    if datetime.fromtimestamp(auth_token['expires_at']/1000) <= datetime.now() - timedelta(seconds=60):
-        gen_auth_token(auth_file)
-        with open(auth_file) as f:
-            auth_token = json.load(f)
-    content_giga = []
-    for step in content:
-        content_giga.append({'role': 'user', 'content': step[0]})
-        if len(step) >= 2:
-            content_giga.append({'role': 'assistant', 'content': step[1]})
-    resp = get_text(content_giga, auth_token['access_token'])
-    return resp["choices"][0]["message"]["content"]
 def send_to_s3(data, name, session):
     session.put_object(Bucket=os.getenv('S3_BUCKET'), Key=name, Body=json.dumps(data))

 import requests
 import json
 import os
 import boto3
+from datetime import datetime
+from copy import copy
+def clear_chat():
+    return '', []
+def model_response(
+        content,
+        chat_history,
+        s3_session,
+        initialized_models,
+        gen_lib,
+        model_name,
+        model_params
+        ):
+    chat_history.append([content])
+    res = gen_lib[model_name](initialized_models[model_name], chat_history, model_params)
+    chat_history[-1].append(res)
+    send_to_s3(res, f'protobench/{model_name}_{str(datetime.now()).replace(" ", "_")}.json', s3_session)
+    return '', chat_history
 def send_to_s3(data, name, session):
     session.put_object(Bucket=os.getenv('S3_BUCKET'), Key=name, Body=json.dumps(data))
+# def giga_gen(content, chat_history, model, s3_session):
+#     chat_history.append([content])
+#     res = response_gigachat(chat_history,'auth_token.json')
+#     chat_history[-1].append(res)
+#     send_to_s3(res, f'protobench/giga_{str(datetime.now()).replace(" ", "_")}.json', s3_session)
+#     return '', chat_history
+# def tiny_gen(content, chat_history, model, s3_session):
+#     chat_history.append([content])
+#     res = response_tinyllama(model, chat_history)
+#     chat_history[-1].append(res)
+#     send_to_s3(res, f'protobench/tiny_{str(datetime.now()).replace(" ", "_")}.json', s3_session)
+#     return '', chat_history
+# def qwen_gen(content, chat_history, model, s3_session):
+#     chat_history.append([content])
+#     res = response_qwen2ins1b(model, chat_history)
+#     chat_history[-1].append(res)
+#     send_to_s3(res, f'protobench/qwen_{str(datetime.now()).replace(" ", "_")}.json', s3_session)
+#     return '', chat_history