Spaces:
Build error
Build error
| import requests | |
| from transformers import pipeline | |
| import nltk | |
| from nltk import sent_tokenize | |
| from transformers import MBartForConditionalGeneration, MBart50TokenizerFast | |
| from transformers import pipeline | |
| # nltk.download('punkt') # Run only once | |
| tokenizer = MBart50TokenizerFast.from_pretrained("SnypzZz/Llama2-13b-Language-translate", src_lang="en_XX") | |
| #pipe = pipeline("text2text-generation", model="SnypzZz/Llama2-13b-Language-translate", tokenizer=tokenizer) | |
| model = None | |
| model_loaded = False | |
| api_token_header = "" | |
| with open('./secret.py', 'r') as f: | |
| api_token_header = f.read() | |
| def load_model(): | |
| global model, model_loaded | |
| model = MBartForConditionalGeneration.from_pretrained("SnypzZz/Llama2-13b-Language-translate") | |
| model_loaded =True | |
| return model | |
| def translation(text,dest_lang,dest_lang_code, src_lang_code): | |
| if(dest_lang_code == src_lang_code): | |
| return "Please select different languages to translate between." | |
| # headers = {"Authorization": f"Bearer {secrets_sih.api_token_header}"} | |
| headers = {"Authorization": f"Bearer {api_token_header}"} | |
| # Bengali Done | |
| if(dest_lang == "Bengali" and src_lang_code == "en_XX"): | |
| API_URL = "https://api-inference.huggingface.co/models/csebuetnlp/banglat5_nmt_en_bn" | |
| def query(payload): | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| return response.json() | |
| output = query({ | |
| "inputs": text, | |
| }) | |
| print(output) | |
| return output[0]['translation_text'] | |
| else: | |
| global model | |
| if model: | |
| pass | |
| else: | |
| model = load_model() | |
| loaded_model = model | |
| tokenizer = MBart50TokenizerFast.from_pretrained("SnypzZz/Llama2-13b-Language-translate", src_lang=src_lang_code) | |
| #model_inputs = tokenizer(text, return_tensors="pt") | |
| loaded_model_inputs = tokenizer(text, return_tensors="pt") | |
| # translate | |
| generated_tokens = loaded_model.generate( | |
| **loaded_model_inputs, | |
| forced_bos_token_id=tokenizer.lang_code_to_id[dest_lang_code] | |
| ) | |
| output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) | |
| print(output) | |
| return output[0] | |
| def main_translation(text,dest_lang_code,src_lang_code): | |
| codes = {"en_XX":"English","bn_IN":"Bengali", "en_GB":"English","gu_IN":"Gujarati","hi_IN":"Hindi","ta_IN":"Tamil","te_IN":"Telugu","mr_IN":"Marathi"} | |
| dest_lang = codes[dest_lang_code] | |
| src_lang = codes[src_lang_code] | |
| sentences = sent_tokenize(text) | |
| output = "" | |
| for line in sentences: | |
| output += translation(line,dest_lang,dest_lang_code, src_lang_code) | |
| return {"output":output} | |
| print(main_translation("hello world", "hi_IN", "en_XX")) |