Spaces:

Shreneek
/

chat-with-csv

Build error

App Files Files Community

Shreneek commited on Mar 1

Commit

28545e3

verified ·

1 Parent(s): fe6105e

Create app.py

Browse files

Files changed (1) hide show

app.py +231 -0

app.py ADDED Viewed

	@@ -0,0 +1,231 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from ydata_profiling import ProfileReport
+import json
+import os
+from langchain.llms import HuggingFaceHub
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain.tools.python.tool import PythonAstREPLTool
+from langchain.agents import AgentExecutor, create_react_agent
+from langchain_experimental.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent
+from langchain.agents.agent_types import AgentType
+# Set page configuration
+st.set_page_config(page_title="Interactive Data Profiler & Chat", layout="wide", page_icon="📊")
+# Create session states for DataFrame and chat history if they don't exist
+if 'df' not in st.session_state:
+    st.session_state.df = None
+if 'chat_history' not in st.session_state:
+    st.session_state.chat_history = []
+if 'suggestions' not in st.session_state:
+    st.session_state.suggestions = []
+# Initialize Hugging Face API
+def get_llm():
+    # Using a small but capable open-source model
+    llm = HuggingFaceHub(
+        repo_id="google/flan-t5-large",
+        model_kwargs={"temperature": 0.1, "max_length": 512},
+        huggingfacehub_api_token=os.environ.get("HUGGINGFACE_API_TOKEN", "")
+    )
+    return llm
+# Function to generate report
+def generate_profile_report(df):
+    with st.spinner("Generating profile report..."):
+        profile = ProfileReport(df,
+                               title="Profiling Report",
+                               explorative=True,
+                               minimal=True)  # Minimal for faster processing
+        return profile
+# Function to generate query suggestions
+def generate_suggestions(df):
+    # Get basic info about the dataframe
+    num_rows = df.shape[0]
+    num_cols = df.shape[1]
+    column_names = df.columns.tolist()
+    data_types = df.dtypes.astype(str).tolist()
+    # Sample suggestions based on dataframe structure
+    suggestions = [
+        f"How many rows are in this dataset?",
+        f"What are all the column names?",
+        f"Show me the first 5 rows",
+        f"What is the average of {column_names[0] if len(column_names) > 0 else 'column'}"
+    ]
+    # Add column-specific suggestions
+    for col, dtype in zip(column_names[:min(3, len(column_names))], data_types[:min(3, len(data_types))]):
+        if 'int' in dtype or 'float' in dtype:
+            suggestions.append(f"What is the mean value of {col}?")
+            suggestions.append(f"What is the maximum value of {col}?")
+        elif 'object' in dtype or 'str' in dtype:
+            suggestions.append(f"What are the unique values in {col}?")
+            suggestions.append(f"How many missing values in {col}?")
+    return suggestions
+# Function to execute pandas operations safely
+def execute_pandas_query(df, query):
+    try:
+        # Create pandas agent
+        agent = create_pandas_dataframe_agent(
+            llm=get_llm(),
+            df=df,
+            agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+            verbose=True
+        )
+        # Execute query
+        result = agent.run(query)
+        return result
+    except Exception as e:
+        # Fallback to basic operations if agent fails
+        if "rows" in query.lower() and "how many" in query.lower():
+            return f"The dataset has {df.shape[0]} rows."
+        elif "columns" in query.lower() and "how many" in query.lower():
+            return f"The dataset has {df.shape[1]} columns."
+        elif "column names" in query.lower():
+            return f"The column names are: {', '.join(df.columns.tolist())}"
+        elif "first" in query.lower() and "rows" in query.lower():
+            num = 5  # Default
+            for word in query.split():
+                if word.isdigit():
+                    num = int(word)
+                    break
+            return df.head(num).to_string()
+        elif "describe" in query.lower():
+            return df.describe().to_string()
+        else:
+            return f"I couldn't process that query. Error: {str(e)}"
+# Main app header
+st.title("🔍 Interactive Data Profiler & Chat")
+st.markdown("""
+Upload your CSV file to get detailed profiling and ask questions about your data!
+This app combines interactive data profiling with a chat interface for data exploration.
+""")
+# File uploader
+uploaded_file = st.file_uploader("Upload a CSV file", type="csv")
+# Process uploaded file
+if uploaded_file is not None:
+    try:
+        # Read CSV into DataFrame
+        df = pd.read_csv(uploaded_file)
+        st.session_state.df = df
+        st.success(f"✅ File uploaded successfully! Found {df.shape[0]} rows and {df.shape[1]} columns.")
+        # Generate suggestions when a new file is uploaded
+        if len(st.session_state.suggestions) == 0:
+            st.session_state.suggestions = generate_suggestions(df)
+        # Create tabs for different functionalities
+        tab1, tab2 = st.tabs(["📊 Data Profiling", "💬 Data Chat"])
+        # Tab 1: Data Profiling
+        with tab1:
+            st.header("Data Profiling")
+            # Basic info
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                st.metric("Rows", df.shape[0])
+            with col2:
+                st.metric("Columns", df.shape[1])
+            with col3:
+                st.metric("Missing Values", df.isna().sum().sum())
+            # Show raw data sample
+            with st.expander("Preview Data"):
+                st.dataframe(df.head(10))
+            # Generate the profile report
+            profile = generate_profile_report(df)
+            # Convert report to HTML and display
+            report_html = profile.to_html()
+            st.components.v1.html(report_html, height=1000, scrolling=True)
+            # Provide download button
+            st.write("### Download the Profiling Report")
+            report_bytes = report_html.encode('utf-8')
+            st.download_button(
+                label="Download Report (HTML)",
+                data=report_bytes,
+                file_name="profiling_report.html",
+                mime="text/html"
+            )
+        # Tab 2: Interactive Chat
+        with tab2:
+            st.header("Chat with Your Data")
+            st.info("Ask questions about your data and get instant answers!")
+            # Chat input and suggested questions
+            user_question = st.text_input("Your question:", key="question_input")
+            # Show suggestion chips
+            st.write("Suggested questions:")
+            cols = st.columns(2)
+            for i, suggestion in enumerate(st.session_state.suggestions):
+                col_idx = i % 2
+                with cols[col_idx]:
+                    if st.button(suggestion, key=f"suggestion_{i}"):
+                        user_question = suggestion
+                        st.session_state.question_input = suggestion
+                        st.experimental_rerun()
+            # Process question
+            if user_question:
+                st.session_state.chat_history.append({"role": "user", "content": user_question})
+                # Get answer
+                with st.spinner("Thinking..."):
+                    answer = execute_pandas_query(df, user_question)
+                # Add answer to chat history
+                st.session_state.chat_history.append({"role": "assistant", "content": answer})
+            # Display chat history
+            st.write("### Conversation History")
+            for message in st.session_state.chat_history:
+                if message["role"] == "user":
+                    st.markdown(f"**You:** {message['content']}")
+                else:
+                    st.markdown(f"**Assistant:** {message['content']}")
+                st.markdown("---")
+            # Clear chat button
+            if st.button("Clear Chat History"):
+                st.session_state.chat_history = []
+                st.experimental_rerun()
+    except Exception as e:
+        st.error(f"An error occurred: {str(e)}")
+else:
+    st.info("👆 Please upload a CSV file to begin.")
+    # Placeholder visuals
+    st.markdown("### What you can do with this app:")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.markdown("**📊 Data Profiling**")
+        st.markdown("- Automatic data quality assessment")
+        st.markdown("- Column statistics and distributions")
+        st.markdown("- Correlation analysis")
+        st.markdown("- Missing values analysis")
+    with col2:
+        st.markdown("**💬 Interactive Data Chat**")
+        st.markdown("- Ask natural language questions")
+        st.markdown("- Get instant insights")
+        st.markdown("- Suggested questions for quick exploration")
+        st.markdown("- No coding required!")