Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import gradio as gr | |
| from llama_index.core import SimpleDirectoryReader, VectorStoreIndex | |
| from llama_index.embeddings.mixedbreadai import MixedbreadAIEmbedding | |
| from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
| from llama_index.llms.groq import Groq | |
| from llama_parse import LlamaParse | |
| import mixedbread_ai | |
| from mixedbread_ai.core.api_error import ApiError | |
| # API keys | |
| llama_cloud_key = os.environ.get("LLAMA_CLOUD_API_KEY") | |
| groq_key = os.environ.get("GROQ_API_KEY") | |
| mxbai_key = os.environ.get("MXBAI_API_KEY") | |
| if not (llama_cloud_key and groq_key and mxbai_key): | |
| raise ValueError("API Keys not found! Ensure they are passed to the Docker container.") | |
| # Model names | |
| llm_model_name = "llama-3.1-70b-versatile" | |
| embed_model_name = "mxbai-embed-large-v1" # Mixedbread AI model | |
| fallback_embed_model = "sentence-transformers/all-MiniLM-L6-v2" # Fallback model | |
| # Configure Mixedbread AI SDK | |
| mixedbread_config = mixedbread_ai.Configuration( | |
| api_key=mxbai_key, | |
| retry_on=[503], # Retry on 503 Service Unavailable | |
| max_retries=3, | |
| retry_delay=2.0, # Seconds between retries | |
| timeout=30.0, # Request timeout | |
| ) | |
| mixedbread_client = mixedbread_ai.Client(configuration=mixedbread_config) | |
| # Initialize the parser | |
| parser = LlamaParse(api_key=llama_cloud_key, result_type="markdown") | |
| # Define file extractor | |
| file_extractor = { | |
| ".pdf": parser, | |
| ".docx": parser, | |
| ".doc": parser, | |
| ".txt": parser, | |
| ".csv": parser, | |
| ".xlsx": parser, | |
| ".pptx": parser, | |
| ".html": parser, | |
| ".jpg": parser, | |
| ".jpeg": parser, | |
| ".png": parser, | |
| ".webp": parser, | |
| ".svg": parser, | |
| } | |
| # Initialize models with error handling | |
| def initialize_embed_model(): | |
| try: | |
| return MixedbreadAIEmbedding( | |
| api_key=mxbai_key, | |
| model_name=embed_model_name, | |
| mxbai_client=mixedbread_client, # Use configured SDK client | |
| ) | |
| except Exception as e: | |
| print(f"Failed to initialize Mixedbread AI embedding: {str(e)}") | |
| print("Falling back to local HuggingFace embedding model.") | |
| return HuggingFaceEmbedding(model_name=fallback_embed_model) | |
| try: | |
| embed_model = initialize_embed_model() | |
| llm = Groq(model=llm_model_name, api_key=groq_key) | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to initialize models: {str(e)}") | |
| # Global variable for vector index | |
| vector_index = None | |
| # File processing function | |
| def load_files(file_path: str): | |
| global vector_index | |
| if not file_path: | |
| return "No file path provided. Please upload a file." | |
| valid_extensions = ', '.join(file_extractor.keys()) | |
| if not any(file_path.endswith(ext) for ext in file_extractor): | |
| return f"The parser can only parse the following file types: {valid_extensions}" | |
| try: | |
| document = SimpleDirectoryReader( | |
| input_files=[file_path], | |
| file_extractor=file_extractor | |
| ).load_data() | |
| try: | |
| vector_index = VectorStoreIndex.from_documents( | |
| document, | |
| embed_model=embed_model | |
| ) | |
| filename = os.path.basename(file_path) | |
| return f"Ready to provide responses based on: {filename}" | |
| except ApiError as e: | |
| return f"Error processing file with Mixedbread AI API: {str(e)}. Status code: {e.status_code}" | |
| except Exception as e: | |
| return f"Unexpected error processing file: {str(e)}" | |
| except Exception as e: | |
| return f"Error loading file: {str(e)}" | |
| # Respond function | |
| def respond(message, history): | |
| if not vector_index: | |
| return "Please upload a file first." | |
| try: | |
| query_engine = vector_index.as_query_engine(streaming=True, llm=llm) | |
| streaming_response = query_engine.query(message) | |
| partial_text = "" | |
| for new_text in streaming_response.response_gen: | |
| partial_text += new_text | |
| yield partial_text | |
| except Exception as e: | |
| yield f"Error processing query: {str(e)}" | |
| # Clear function | |
| def clear_state(): | |
| global vector_index | |
| vector_index = None | |
| return None, None, None | |
| # UI Setup | |
| with gr.Blocks( | |
| theme=gr.themes.Default( | |
| primary_hue="green", | |
| secondary_hue="blue", | |
| font=[gr.themes.GoogleFont("Poppins")], | |
| ), | |
| css="footer {visibility: hidden}", | |
| ) as demo: | |
| gr.Markdown("# DataCamp Doc Q&A π€π") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| file_input = gr.File( | |
| file_count="single", | |
| type="filepath", | |
| label="Upload Document" | |
| ) | |
| with gr.Row(): | |
| btn = gr.Button("Submit", variant="primary") | |
| clear = gr.Button("Clear") | |
| output = gr.Textbox(label="Status") | |
| with gr.Column(scale=3): | |
| chatbot = gr.ChatInterface( | |
| fn=respond, | |
| chatbot=gr.Chatbot(height=300, type="messages"), | |
| theme="soft", | |
| show_progress="full", | |
| textbox=gr.Textbox( | |
| placeholder="Ask questions about the uploaded document!", | |
| container=False, | |
| ), | |
| ) | |
| # Set up Gradio interactions | |
| btn.click(fn=load_files, inputs=file_input, outputs=output) | |
| clear.click( | |
| fn=clear_state, | |
| outputs=[file_input, output, chatbot], | |
| ) | |
| # Launch the demo | |
| if __name__ == "__main__": | |
| try: | |
| demo.launch() | |
| except Exception as e: | |
| print(f"Failed to launch application: {str(e)}") |