Spaces:
Running
Running
Upload tfidf_similarity.py
Browse files- utils/tfidf_similarity.py +2 -11
utils/tfidf_similarity.py
CHANGED
|
@@ -6,8 +6,7 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
|
| 6 |
|
| 7 |
class TFIDF_Vectorizer():
|
| 8 |
def __init__(self, load_vectorizer=None, stop_words='english', min_df=2):
|
| 9 |
-
self.vectorizer_path = "tfidf_vectorizer.pkl"
|
| 10 |
-
self.tfidf_matrix_path = "tfidf_matrix.npz"
|
| 11 |
|
| 12 |
if load_vectorizer:
|
| 13 |
with open(self.vectorizer_path, 'rb') as file:
|
|
@@ -16,15 +15,7 @@ class TFIDF_Vectorizer():
|
|
| 16 |
self.vectorizer = TfidfVectorizer(stop_words=stop_words, min_df=min_df)
|
| 17 |
|
| 18 |
def compute_tfidf_matrix(self, texts):
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
# save vectorizer
|
| 22 |
-
with open(self.vectorizer_path, 'wb') as file:
|
| 23 |
-
pickle.dump(self.vectorizer, file)
|
| 24 |
-
|
| 25 |
-
# save tfidf matrix
|
| 26 |
-
save_npz(self.tfidf_matrix_path, features)
|
| 27 |
-
return features
|
| 28 |
|
| 29 |
def transform(self, texts: list) -> any:
|
| 30 |
return self.vectorizer.transform(texts)
|
|
|
|
| 6 |
|
| 7 |
class TFIDF_Vectorizer():
|
| 8 |
def __init__(self, load_vectorizer=None, stop_words='english', min_df=2):
|
| 9 |
+
self.vectorizer_path = "data/tfidf_vectorizer.pkl"
|
|
|
|
| 10 |
|
| 11 |
if load_vectorizer:
|
| 12 |
with open(self.vectorizer_path, 'rb') as file:
|
|
|
|
| 15 |
self.vectorizer = TfidfVectorizer(stop_words=stop_words, min_df=min_df)
|
| 16 |
|
| 17 |
def compute_tfidf_matrix(self, texts):
|
| 18 |
+
return self.vectorizer.fit_transform(texts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
def transform(self, texts: list) -> any:
|
| 21 |
return self.vectorizer.transform(texts)
|