Spaces:

viboognesh
/

SampleSetAssignment

Sleeping

App Files Files Community

SampleSetAssignment / streamlit_functions.py

viboognesh

Upload 2 files

44dcf43 verified about 1 year ago

raw

history blame contribute delete

3.53 kB

	import PyPDF2
	import os
	from langchain_openai import ChatOpenAI
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import FAISS
	from langchain_openai import OpenAIEmbeddings

	from dotenv import load_dotenv
	load_dotenv()

	class RAGChain:
	def __init__(self, pdf_file_path, api_key=os.getenv("OPENAI_API_KEY")):
	pdf_text = self.extract_text_from_pdf_with_pypdf2(pdf_file_path)
	chunked_documents = self.create_chunks_for_pypdf2_parse(pdf_text)
	vectorstore = self.create_vectorstore_with_faiss(chunked_documents)
	# Creating LLM
	self.llm = ChatOpenAI(model="gpt-4o-mini", api_key=api_key)
	# Creating Retriever from vectorstore
	self.retriever = vectorstore.as_retriever()
	# Create a chat history to store the conversation history
	self.chat_history = []

	def ask_question(self, question):
	# Create a chat history text to pass to LLM to create a single question
	chat_history_text = "\n".join([f"User:{f['user']}\nAssistant:{f['assistant']}" for f in self.chat_history[:-10]])
	# Prompt to create a single question that will help us retrieve relevant context
	single_question_prompt = f"You will be given chat history and the user question. Your task is to reply with a single question that accurately represents the user question based on the context of the chat history. \n\nChat history:\n{chat_history_text}\n\nUser question: {question}\n\n Reply with the single question and nothing else.\n\nSingle Answer:"
	# Use LLM to create a single question
	single_question = self.llm.invoke(single_question_prompt).content
	# Retrieve the relevant context from the vectorstore
	context = self.retriever.invoke(single_question)
	context = [f.page_content for f in context]
	context_text = "\n\n".join(context)
	# Prompt to answer the single question
	answer_prompt = f"You will be given a context and a question. Your task is to answer the question based on the context. \n\nContext:\n{context_text}\n\nQuestion: {single_question}\n\n Answer:"
	# Use LLM to answer the question
	answer = self.llm.invoke(answer_prompt).content
	# Update the chat history
	self.chat_history.append({"user": question, "assistant": answer})
	# Return the answer
	return answer, context

	def clear_history(self):
	self.chat_history = []

	def get_chat_history(self):
	return self.chat_history

	def extract_text_from_pdf_with_pypdf2(self, file_path):
	pdf_reader = PyPDF2.PdfReader(file_path)

	full_text = ""
	for page_num in range(len(pdf_reader.pages)):
	page = pdf_reader.pages[page_num]
	text = page.extract_text()
	full_text += text + "\n"

	return full_text

	def create_chunks_for_pypdf2_parse(self, pdf_text):
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
	chunks = text_splitter.split_text(pdf_text)
	return chunks

	def create_vectorstore_with_faiss(self, chunked_documents):
	embedding_function = OpenAIEmbeddings()
	if type(chunked_documents[0]) == str:
	vectorstore = FAISS.from_texts(chunked_documents, embedding_function)
	else:
	vectorstore = FAISS.from_documents(chunked_documents, embedding_function)
	# Save the vectorstore to local path
	return vectorstore