CodeMind / src /retriever.py
devjas1
(FEAT)[Implement document search functionality]: enhance the search_documents function to load FAISS index and metadata, enabling semantic document retrieval.
593e022
"""
Retriever module for semantic document search using FAISS.
Provides functions to perform similarity-based lookups over embedded document vectors.
Integrates with FAISS for efficient vector search and returns relevant document matches.
"""
import os
import pickle
import faiss
from sentence_transformers import SentenceTransformer
def search_documents(query: str, config: dict):
"""
Search for semantically similar documents using FAISS index.
Args:
query (str): Search query
config (dict): Configuration dictionary
Returns:
list: List of relevant text chunks with similarity scores
"""
# Check if FAISS index exists
if not os.path.exists("vector_cache/faiss_index.bin"):
print("No FAISS index found. Please run 'init' command first.")
return []
try:
# Load FAISS index and metadata
index = faiss.read_index("vector_cache/faiss_index.bin")
with open("vector_cache/metadata.pkl", "rb") as f:
metadata = pickle.load(f)
texts = metadata["texts"]
filenames = metadata["filenames"]
# Embed the query
model = SentenceTransformer(config["embedding"]["model_path"])
query_embedding = model.encode([query]).astype("float32")
faiss.normalize_L2(query_embedding)
# Search similar documents
top_k = config.get("retrieval", {}).get("top_k", 5)
similarity_threshold = config.get("retrieval", {}).get(
"similarity_threshold", 0.75
)
scores, indices = index.search(query_embedding, top_k)
results = []
for i, (score, idx) in enumerate(zip(scores[0], indices[0])):
if score >= similarity_threshold:
results.append(
f"[{filenames[idx]}] (score: {score:.3f}): {texts[idx][:200]}..."
)
else:
break
if not results:
results.append(f"No matches found above threshold {similarity_threshold}")
return results
except (
FileNotFoundError,
pickle.UnpicklingError,
KeyError,
ValueError,
) as e:
print(f"Error during search: {e}")
return [f"Search failed: {e}"]