| from transformers import T5Tokenizer, T5ForConditionalGeneration | |
| import torch | |
| class SummarizationModel: | |
| def __init__(self): | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| self.tokenizer = T5Tokenizer.from_pretrained('unicamp-dl/ptt5-base-portuguese-vocab') | |
| self.model = T5ForConditionalGeneration.from_pretrained('recogna-nlp/ptt5-base-summ').to(self.device) | |
| def summarize(self, text: str, max_length: int = 256, min_length: int = 128) -> str: | |
| inputs = self.tokenizer.encode( | |
| text, | |
| max_length=512, | |
| truncation=True, | |
| return_tensors='pt' | |
| ).to(self.device) | |
| summary_ids = self.model.generate( | |
| inputs, | |
| max_length=max_length, | |
| min_length=min_length, | |
| num_beams=5, | |
| no_repeat_ngram_size=3, | |
| early_stopping=False | |
| ) | |
| return self.tokenizer.decode(summary_ids[0], skip_special_tokens=True) |