File size: 1,062 Bytes
6a0f724
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# Modelo leve que roda em CPU (bom p/ demo). Se preferir PT “de verdade”, troque por:
# MODEL_ID = "unicamp-dl/ptt5-small-portuguese-vocab"
MODEL_ID = "google/flan-t5-small"

tok = AutoTokenizer.from_pretrained(MODEL_ID)
mdl = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID)

pipe = pipeline(
    "text2text-generation",
    model=mdl,
    tokenizer=tok
)

def gen(prompt: str):
    if not prompt or not prompt.strip():
        return ""
    out = pipe(
        prompt,
        max_new_tokens=120,
        do_sample=True,
        top_p=0.9,
        temperature=0.7,
        repetition_penalty=1.15,
        num_return_sequences=1,
    )
    return out[0]["generated_text"]

# Gradio já expõe /api/predict automaticamente
demo = gr.Interface(
    fn=gen,
    inputs=gr.Textbox(label="Prompt"),
    outputs=gr.Textbox(label="Saída"),
    title="T5 Mini Reply",
    description="Geração de respostas curtas (CPU)."
)

if __name__ == "__main__":
    demo.launch()