Spaces:

awacke1
/

CodeCompetitionClaudeVsGPT

Sleeping

App Files Files Community

awacke1 commited on Dec 19, 2024

Commit

90807c0

verified ·

1 Parent(s): 0108f8c

Create app.py

Browse files

Files changed (1) hide show

app.py +276 -0

app.py ADDED Viewed

	@@ -0,0 +1,276 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+import torch
+import json
+import os
+from pathlib import Path
+from datetime import datetime
+import edge_tts
+import asyncio
+import base64
+from openai import OpenAI
+import anthropic
+import streamlit.components.v1 as components
+# Page configuration
+st.set_page_config(
+    page_title="Video Search with Speech",
+    page_icon="🎥",
+    layout="wide"
+)
+# Initialize session state
+if 'messages' not in st.session_state:
+    st.session_state['messages'] = []
+if 'search_history' not in st.session_state:
+    st.session_state['search_history'] = []
+if 'last_voice_input' not in st.session_state:
+    st.session_state['last_voice_input'] = ""
+# Load environment variables
+openai_client = OpenAI()
+claude_client = anthropic.Anthropic()
+# Initialize the speech component
+speech_component = components.declare_component("speech_recognition", path="mycomponent")
+class VideoSearch:
+    def __init__(self):
+        self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
+        self.load_dataset()
+    def load_dataset(self):
+        """Load the Omega Multimodal dataset"""
+        try:
+            # Load dataset from Hugging Face
+            self.dataset = pd.read_csv("paste.txt")
+            self.prepare_features()
+        except Exception as e:
+            st.error(f"Error loading dataset: {e}")
+            self.create_dummy_data()
+    def prepare_features(self):
+        """Prepare and cache embeddings"""
+        # Convert string representations of embeddings back to numpy arrays
+        self.video_embeds = np.array([json.loads(e) if isinstance(e, str) else e
+                                    for e in self.dataset.video_embed])
+        self.text_embeds = np.array([json.loads(e) if isinstance(e, str) else e
+                                   for e in self.dataset.description_embed])
+    def create_dummy_data(self):
+        """Create dummy data for testing"""
+        self.dataset = pd.DataFrame({
+            'video_id': [f'video_{i}' for i in range(10)],
+            'youtube_id': ['dQw4w9WgXcQ'] * 10,  # Example YouTube ID
+            'description': ['Sample video description'] * 10,
+            'views': [1000] * 10,
+            'start_time': [0] * 10,
+            'end_time': [60] * 10
+        })
+        # Create dummy embeddings
+        self.video_embeds = np.random.randn(10, 384)  # Match model dimensions
+        self.text_embeds = np.random.randn(10, 384)
+    def search(self, query, top_k=5):
+        """Search videos using query"""
+        query_embedding = self.text_model.encode([query])[0]
+        # Compute similarities
+        video_sims = cosine_similarity([query_embedding], self.video_embeds)[0]
+        text_sims = cosine_similarity([query_embedding], self.text_embeds)[0]
+        # Combine similarities
+        combined_sims = 0.5 * video_sims + 0.5 * text_sims
+        # Get top results
+        top_indices = np.argsort(combined_sims)[-top_k:][::-1]
+        results = []
+        for idx in top_indices:
+            results.append({
+                'video_id': self.dataset.iloc[idx]['video_id'],
+                'youtube_id': self.dataset.iloc[idx]['youtube_id'],
+                'description': self.dataset.iloc[idx]['description'],
+                'start_time': self.dataset.iloc[idx]['start_time'],
+                'end_time': self.dataset.iloc[idx]['end_time'],
+                'relevance_score': float(combined_sims[idx]),
+                'views': self.dataset.iloc[idx]['views']
+            })
+        return results
+async def generate_speech(text, voice="en-US-AriaNeural"):
+    """Generate speech using Edge TTS"""
+    if not text.strip():
+        return None
+    communicate = edge_tts.Communicate(text, voice)
+    audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
+    await communicate.save(audio_file)
+    return audio_file
+def process_with_gpt4(prompt):
+    """Process text with GPT-4"""
+    try:
+        response = openai_client.chat.completions.create(
+            model="gpt-4",
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        st.error(f"Error with GPT-4: {e}")
+        return None
+def process_with_claude(prompt):
+    """Process text with Claude"""
+    try:
+        response = claude_client.messages.create(
+            model="claude-3-sonnet-20240229",
+            max_tokens=1000,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return response.content[0].text
+    except Exception as e:
+        st.error(f"Error with Claude: {e}")
+        return None
+def main():
+    st.title("🎥 Video Search with Speech Recognition")
+    # Initialize video search
+    search = VideoSearch()
+    # Create tabs
+    tab1, tab2, tab3 = st.tabs(["🔍 Search", "🎙️ Voice Input", "💾 History"])
+    with tab1:
+        st.subheader("Search Videos")
+        # Text search
+        query = st.text_input("Enter your search query:")
+        col1, col2 = st.columns(2)
+        with col1:
+            search_button = st.button("🔍 Search")
+        with col2:
+            num_results = st.slider("Number of results:", 1, 10, 5)
+        if search_button and query:
+            results = search.search(query, num_results)
+            st.session_state['search_history'].append({
+                'query': query,
+                'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                'results': results
+            })
+            for i, result in enumerate(results, 1):
+                with st.expander(f"Result {i}: {result['description'][:100]}...", expanded=i==1):
+                    cols = st.columns([2, 1])
+                    with cols[0]:
+                        st.markdown(f"**Full Description:**")
+                        st.write(result['description'])
+                        st.markdown(f"**Time Range:** {result['start_time']}s - {result['end_time']}s")
+                        st.markdown(f"**Views:** {result['views']:,}")
+                    with cols[1]:
+                        st.markdown(f"**Relevance Score:** {result['relevance_score']:.2%}")
+                        if result['youtube_id']:
+                            st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
+                        # Generate audio summary
+                        if st.button(f"🔊 Generate Audio Summary", key=f"audio_{i}"):
+                            summary = f"Video summary: {result['description'][:200]}"
+                            audio_file = asyncio.run(generate_speech(summary))
+                            if audio_file:
+                                st.audio(audio_file)
+                                # Cleanup audio file
+                                if os.path.exists(audio_file):
+                                    os.remove(audio_file)
+    with tab2:
+        st.subheader("Voice Input")
+        # Speech recognition component
+        voice_input = speech_component()
+        if voice_input and voice_input != st.session_state['last_voice_input']:
+            st.session_state['last_voice_input'] = voice_input
+            st.markdown("**Transcribed Text:**")
+            st.write(voice_input)
+            cols = st.columns(3)
+            with cols[0]:
+                if st.button("🔍 Search Videos"):
+                    results = search.search(voice_input, num_results)
+                    st.session_state['search_history'].append({
+                        'query': voice_input,
+                        'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        'results': results
+                    })
+                    for i, result in enumerate(results, 1):
+                        with st.expander(f"Result {i}: {result['description'][:100]}...", expanded=i==1):
+                            st.write(result['description'])
+                            if result['youtube_id']:
+                                st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
+            with cols[1]:
+                if st.button("🤖 Process with GPT-4"):
+                    gpt_response = process_with_gpt4(voice_input)
+                    if gpt_response:
+                        st.markdown("**GPT-4 Response:**")
+                        st.write(gpt_response)
+            with cols[2]:
+                if st.button("🧠 Process with Claude"):
+                    claude_response = process_with_claude(voice_input)
+                    if claude_response:
+                        st.markdown("**Claude Response:**")
+                        st.write(claude_response)
+    with tab3:
+        st.subheader("Search History")
+        if st.button("🗑️ Clear History"):
+            st.session_state['search_history'] = []
+            st.experimental_rerun()
+        for i, entry in enumerate(reversed(st.session_state['search_history'])):
+            with st.expander(f"Query: {entry['query']} ({entry['timestamp']})", expanded=False):
+                st.markdown(f"**Original Query:** {entry['query']}")
+                st.markdown(f"**Time:** {entry['timestamp']}")
+                for j, result in enumerate(entry['results'], 1):
+                    st.markdown(f"**Result {j}:**")
+                    st.write(result['description'])
+                    if result['youtube_id']:
+                        st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
+    # Sidebar configuration
+    with st.sidebar:
+        st.subheader("⚙️ Configuration")
+        st.markdown("**Video Search Settings**")
+        st.slider("Default Results:", 1, 10, 5, key="default_results")
+        st.markdown("**Voice Settings**")
+        st.selectbox("TTS Voice:",
+                    ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
+                    key="tts_voice")
+        st.markdown("**Model Settings**")
+        st.selectbox("Text Embedding Model:",
+                    ["all-MiniLM-L6-v2", "paraphrase-multilingual-MiniLM-L12-v2"],
+                    key="embedding_model")
+        if st.button("📥 Download Search History"):
+            # Convert history to JSON
+            history_json = json.dumps(st.session_state['search_history'], indent=2)
+            b64 = base64.b64encode(history_json.encode()).decode()
+            href = f'<a href="data:file/json;base64,{b64}" download="search_history.json">Download JSON</a>'
+            st.markdown(href, unsafe_allow_html=True)
+if __name__ == "__main__":
+    main()