Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from ydata_profiling import ProfileReport | |
| import json | |
| import os | |
| from langchain.llms import HuggingFaceHub | |
| from langchain.chains import LLMChain | |
| from langchain.prompts import PromptTemplate | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain.tools.python.tool import PythonAstREPLTool | |
| from langchain.agents import AgentExecutor, create_react_agent | |
| from langchain_experimental.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent | |
| from langchain.agents.agent_types import AgentType | |
| # Set page configuration | |
| st.set_page_config(page_title="Interactive Data Profiler & Chat", layout="wide", page_icon="π") | |
| # Create session states for DataFrame and chat history if they don't exist | |
| if 'df' not in st.session_state: | |
| st.session_state.df = None | |
| if 'chat_history' not in st.session_state: | |
| st.session_state.chat_history = [] | |
| if 'suggestions' not in st.session_state: | |
| st.session_state.suggestions = [] | |
| # Initialize Hugging Face API | |
| def get_llm(): | |
| # Using a small but capable open-source model | |
| llm = HuggingFaceHub( | |
| repo_id="google/flan-t5-large", | |
| model_kwargs={"temperature": 0.1, "max_length": 512}, | |
| huggingfacehub_api_token=os.environ.get("HUGGINGFACE_API_TOKEN", "") | |
| ) | |
| return llm | |
| # Function to generate report | |
| def generate_profile_report(df): | |
| with st.spinner("Generating profile report..."): | |
| profile = ProfileReport(df, | |
| title="Profiling Report", | |
| explorative=True, | |
| minimal=True) # Minimal for faster processing | |
| return profile | |
| # Function to generate query suggestions | |
| def generate_suggestions(df): | |
| # Get basic info about the dataframe | |
| num_rows = df.shape[0] | |
| num_cols = df.shape[1] | |
| column_names = df.columns.tolist() | |
| data_types = df.dtypes.astype(str).tolist() | |
| # Sample suggestions based on dataframe structure | |
| suggestions = [ | |
| f"How many rows are in this dataset?", | |
| f"What are all the column names?", | |
| f"Show me the first 5 rows", | |
| f"What is the average of {column_names[0] if len(column_names) > 0 else 'column'}" | |
| ] | |
| # Add column-specific suggestions | |
| for col, dtype in zip(column_names[:min(3, len(column_names))], data_types[:min(3, len(data_types))]): | |
| if 'int' in dtype or 'float' in dtype: | |
| suggestions.append(f"What is the mean value of {col}?") | |
| suggestions.append(f"What is the maximum value of {col}?") | |
| elif 'object' in dtype or 'str' in dtype: | |
| suggestions.append(f"What are the unique values in {col}?") | |
| suggestions.append(f"How many missing values in {col}?") | |
| return suggestions | |
| # Function to execute pandas operations safely | |
| def execute_pandas_query(df, query): | |
| try: | |
| # Create pandas agent | |
| agent = create_pandas_dataframe_agent( | |
| llm=get_llm(), | |
| df=df, | |
| agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, | |
| verbose=True | |
| ) | |
| # Execute query | |
| result = agent.run(query) | |
| return result | |
| except Exception as e: | |
| # Fallback to basic operations if agent fails | |
| if "rows" in query.lower() and "how many" in query.lower(): | |
| return f"The dataset has {df.shape[0]} rows." | |
| elif "columns" in query.lower() and "how many" in query.lower(): | |
| return f"The dataset has {df.shape[1]} columns." | |
| elif "column names" in query.lower(): | |
| return f"The column names are: {', '.join(df.columns.tolist())}" | |
| elif "first" in query.lower() and "rows" in query.lower(): | |
| num = 5 # Default | |
| for word in query.split(): | |
| if word.isdigit(): | |
| num = int(word) | |
| break | |
| return df.head(num).to_string() | |
| elif "describe" in query.lower(): | |
| return df.describe().to_string() | |
| else: | |
| return f"I couldn't process that query. Error: {str(e)}" | |
| # Main app header | |
| st.title("π Interactive Data Profiler & Chat") | |
| st.markdown(""" | |
| Upload your CSV file to get detailed profiling and ask questions about your data! | |
| This app combines interactive data profiling with a chat interface for data exploration. | |
| """) | |
| # File uploader | |
| uploaded_file = st.file_uploader("Upload a CSV file", type="csv") | |
| # Process uploaded file | |
| if uploaded_file is not None: | |
| try: | |
| # Read CSV into DataFrame | |
| df = pd.read_csv(uploaded_file) | |
| st.session_state.df = df | |
| st.success(f"β File uploaded successfully! Found {df.shape[0]} rows and {df.shape[1]} columns.") | |
| # Generate suggestions when a new file is uploaded | |
| if len(st.session_state.suggestions) == 0: | |
| st.session_state.suggestions = generate_suggestions(df) | |
| # Create tabs for different functionalities | |
| tab1, tab2 = st.tabs(["π Data Profiling", "π¬ Data Chat"]) | |
| # Tab 1: Data Profiling | |
| with tab1: | |
| st.header("Data Profiling") | |
| # Basic info | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("Rows", df.shape[0]) | |
| with col2: | |
| st.metric("Columns", df.shape[1]) | |
| with col3: | |
| st.metric("Missing Values", df.isna().sum().sum()) | |
| # Show raw data sample | |
| with st.expander("Preview Data"): | |
| st.dataframe(df.head(10)) | |
| # Generate the profile report | |
| profile = generate_profile_report(df) | |
| # Convert report to HTML and display | |
| report_html = profile.to_html() | |
| st.components.v1.html(report_html, height=1000, scrolling=True) | |
| # Provide download button | |
| st.write("### Download the Profiling Report") | |
| report_bytes = report_html.encode('utf-8') | |
| st.download_button( | |
| label="Download Report (HTML)", | |
| data=report_bytes, | |
| file_name="profiling_report.html", | |
| mime="text/html" | |
| ) | |
| # Tab 2: Interactive Chat | |
| with tab2: | |
| st.header("Chat with Your Data") | |
| st.info("Ask questions about your data and get instant answers!") | |
| # Chat input and suggested questions | |
| user_question = st.text_input("Your question:", key="question_input") | |
| # Show suggestion chips | |
| st.write("Suggested questions:") | |
| cols = st.columns(2) | |
| for i, suggestion in enumerate(st.session_state.suggestions): | |
| col_idx = i % 2 | |
| with cols[col_idx]: | |
| if st.button(suggestion, key=f"suggestion_{i}"): | |
| user_question = suggestion | |
| st.session_state.question_input = suggestion | |
| st.experimental_rerun() | |
| # Process question | |
| if user_question: | |
| st.session_state.chat_history.append({"role": "user", "content": user_question}) | |
| # Get answer | |
| with st.spinner("Thinking..."): | |
| answer = execute_pandas_query(df, user_question) | |
| # Add answer to chat history | |
| st.session_state.chat_history.append({"role": "assistant", "content": answer}) | |
| # Display chat history | |
| st.write("### Conversation History") | |
| for message in st.session_state.chat_history: | |
| if message["role"] == "user": | |
| st.markdown(f"**You:** {message['content']}") | |
| else: | |
| st.markdown(f"**Assistant:** {message['content']}") | |
| st.markdown("---") | |
| # Clear chat button | |
| if st.button("Clear Chat History"): | |
| st.session_state.chat_history = [] | |
| st.experimental_rerun() | |
| except Exception as e: | |
| st.error(f"An error occurred: {str(e)}") | |
| else: | |
| st.info("π Please upload a CSV file to begin.") | |
| # Placeholder visuals | |
| st.markdown("### What you can do with this app:") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("**π Data Profiling**") | |
| st.markdown("- Automatic data quality assessment") | |
| st.markdown("- Column statistics and distributions") | |
| st.markdown("- Correlation analysis") | |
| st.markdown("- Missing values analysis") | |
| with col2: | |
| st.markdown("**π¬ Interactive Data Chat**") | |
| st.markdown("- Ask natural language questions") | |
| st.markdown("- Get instant insights") | |
| st.markdown("- Suggested questions for quick exploration") | |
| st.markdown("- No coding required!") |