Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from llama_index.core import SimpleDirectoryReader, VectorStoreIndex | |
| from llama_index.embeddings.mixedbreadai import MixedbreadAIEmbedding | |
| from llama_index.llms.groq import Groq | |
| from llama_parse import LlamaParse | |
| # API keys | |
| llama_cloud_key = os.environ.get("LLAMA_CLOUD_API_KEY") | |
| groq_key = os.environ.get("GROQ_API_KEY") | |
| mxbai_key = os.environ.get("MXBAI_API_KEY") | |
| if not (llama_cloud_key and groq_key and mxbai_key): | |
| raise ValueError("API Keys not found! Ensure they are passed to the Docker container.") | |
| # Model names | |
| llm_model_name = "llama-3.1-70b-versatile" | |
| embed_model_name = "mixedbread-ai/mxbai-embed-large-v1" | |
| # Initialize the parser | |
| parser = LlamaParse(api_key=llama_cloud_key, result_type="markdown") | |
| # Define file extractor | |
| file_extractor = { | |
| ".pdf": parser, | |
| ".docx": parser, | |
| ".doc": parser, | |
| ".txt": parser, | |
| ".csv": parser, | |
| ".xlsx": parser, | |
| ".pptx": parser, | |
| ".html": parser, | |
| ".jpg": parser, | |
| ".jpeg": parser, | |
| ".png": parser, | |
| ".webp": parser, | |
| ".svg": parser, | |
| } | |
| # Initialize models with error handling | |
| try: | |
| embed_model = MixedbreadAIEmbedding(api_key=mxbai_key, model_name=embed_model_name) | |
| llm = Groq(model=llm_model_name, api_key=groq_key) | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to initialize models: {str(e)}") | |
| # Global variable for vector index | |
| vector_index = None | |
| # File processing function | |
| def load_files(file_path: str): | |
| global vector_index | |
| if not file_path: | |
| return "No file path provided. Please upload a file." | |
| valid_extensions = ', '.join(file_extractor.keys()) | |
| if not any(file_path.endswith(ext) for ext in file_extractor): | |
| return f"The parser can only parse the following file types: {valid_extensions}" | |
| try: | |
| document = SimpleDirectoryReader( | |
| input_files=[file_path], | |
| file_extractor=file_extractor | |
| ).load_data() | |
| vector_index = VectorStoreIndex.from_documents( | |
| document, | |
| embed_model=embed_model | |
| ) | |
| filename = os.path.basename(file_path) | |
| return f"Ready to provide responses based on: {filename}" | |
| except Exception as e: | |
| return f"Error processing file: {str(e)}" | |
| # Respond function | |
| def respond(message, history): | |
| if not vector_index: | |
| return "Please upload a file first." | |
| try: | |
| query_engine = vector_index.as_query_engine(streaming=True, llm=llm) | |
| streaming_response = query_engine.query(message) | |
| partial_text = "" | |
| for new_text in streaming_response.response_gen: | |
| partial_text += new_text | |
| yield partial_text | |
| except Exception as e: | |
| yield f"Error processing query: {str(e)}" | |
| # Clear function | |
| def clear_state(): | |
| global vector_index | |
| vector_index = None | |
| return None, None, None | |
| # UI Setup | |
| with gr.Blocks( | |
| theme=gr.themes.Default( | |
| primary_hue="green", | |
| secondary_hue="blue", | |
| font=[gr.themes.GoogleFont("Poppins")], | |
| ), | |
| css="footer {visibility: hidden}", | |
| ) as demo: | |
| gr.Markdown("# DataCamp Doc Q&A π€π") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| file_input = gr.File( | |
| file_count="single", | |
| type="filepath", | |
| label="Upload Document" | |
| ) | |
| with gr.Row(): | |
| btn = gr.Button("Submit", variant="primary") | |
| clear = gr.Button("Clear") | |
| output = gr.Textbox(label="Status") | |
| with gr.Column(scale=3): | |
| chatbot = gr.ChatInterface( | |
| fn=respond, | |
| chatbot=gr.Chatbot(height=300, type="messages"), # Fixed deprecated warning | |
| theme="soft", | |
| show_progress="full", | |
| textbox=gr.Textbox( | |
| placeholder="Ask questions about the uploaded document!", | |
| container=False, | |
| ), | |
| ) | |
| # Set up Gradio interactions | |
| btn.click(fn=load_files, inputs=file_input, outputs=output) | |
| clear.click( | |
| fn=clear_state, | |
| outputs=[file_input, output, chatbot], | |
| ) | |
| # Launch the demo | |
| if __name__ == "__main__": | |
| try: | |
| demo.launch() | |
| except Exception as e: | |
| print(f"Failed to launch application: {str(e)}") |