Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
| # Modelo leve que roda em CPU (bom p/ demo). Se preferir PT “de verdade”, troque por: | |
| # MODEL_ID = "unicamp-dl/ptt5-small-portuguese-vocab" | |
| MODEL_ID = "google/flan-t5-small" | |
| tok = AutoTokenizer.from_pretrained(MODEL_ID) | |
| mdl = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID) | |
| pipe = pipeline( | |
| "text2text-generation", | |
| model=mdl, | |
| tokenizer=tok | |
| ) | |
| def gen(prompt: str): | |
| if not prompt or not prompt.strip(): | |
| return "" | |
| out = pipe( | |
| prompt, | |
| max_new_tokens=120, | |
| do_sample=True, | |
| top_p=0.9, | |
| temperature=0.7, | |
| repetition_penalty=1.15, | |
| num_return_sequences=1, | |
| ) | |
| return out[0]["generated_text"] | |
| # Gradio já expõe /api/predict automaticamente | |
| demo = gr.Interface( | |
| fn=gen, | |
| inputs=gr.Textbox(label="Prompt"), | |
| outputs=gr.Textbox(label="Saída"), | |
| title="T5 Mini Reply", | |
| description="Geração de respostas curtas (CPU)." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |