| import spacy | |
| import nltk | |
| class NLPModel: | |
| def __init__(self): | |
| self.nlp = spacy.load("pt_core_news_md") | |
| nltk.download('punkt') | |
| def extract_entities(self, text: str): | |
| doc = self.nlp(text) | |
| return [(ent.text.lower(), ent.label_) for ent in doc.ents] | |
| def tokenize_sentences(self, text: str): | |
| return nltk.sent_tokenize(text) |