Spaces:

Mostafa174
/

Topic_Modeling_AI

Sleeping

App Files Files Community

Mostafa174 commited on Oct 11

Commit

450a421

1 Parent(s): c1f9b9f

Initial commit

Browse files

Files changed (2) hide show

app.py +218 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,218 @@

+import gradio as gr
+import os
+import numpy as np
+from scipy.special import expit
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from PyPDF2 import PdfReader
+from docx import Document
+# Load Model and Tokenizer
+MODEL = "cardiffnlp/tweet-topic-21-multi"
+tokenizer = AutoTokenizer.from_pretrained(MODEL)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL)
+class_mapping = model.config.id2label
+# Text Analyzer
+def analyze_topics(text):
+    detected_topics = []
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
+    outputs = model(**inputs)
+    scores = outputs.logits[0].detach().numpy()
+    scores = expit(scores)
+    predictions = (scores >= 0.5).astype(int)
+    for i, pred in enumerate(predictions):
+        if pred:
+            topic_name = class_mapping[i]
+            confidence = scores[i]
+            detected_topics.append(f"• {topic_name} ({confidence:.2f})")
+    if detected_topics:
+        return "\n".join(detected_topics)
+    else:
+        return "No specific topics detected."
+# Document Analyzer Helpers
+def extract_text_from_file(file_path):
+    ext = os.path.splitext(file_path)[1].lower()
+    if ext == ".pdf":
+        reader = PdfReader(file_path)
+        text = " ".join([page.extract_text() for page in reader.pages if page.extract_text()])
+    elif ext == ".docx":
+        doc = Document(file_path)
+        text = "\n".join([p.text for p in doc.paragraphs])
+    elif ext == ".txt":
+        with open(file_path, "r", encoding="utf-8") as f:
+            text = f.read()
+    else:
+        raise ValueError("Unsupported file format. Please upload a PDF, DOCX, or TXT file.")
+    return text.strip()
+def analyze_document(file):
+    if file is None:
+        return "Please upload a document first."
+    text = extract_text_from_file(file.name)
+    if not text:
+        return "No readable text found in document."
+    # Split into chunks for large docs
+    words = text.split()
+    chunks = [" ".join(words[i:i + 400]) for i in range(0, len(words), 400)]
+    all_detected_topics = {}
+    for chunk in chunks:
+        inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=512)
+        outputs = model(**inputs)
+        scores = outputs.logits[0].detach().numpy()
+        scores = expit(scores)
+        predictions = (scores >= 0.5).astype(int)
+        for i, pred in enumerate(predictions):
+            if pred:
+                topic_name = class_mapping[i]
+                confidence = scores[i]
+                all_detected_topics.setdefault(topic_name, []).append(confidence)
+    if all_detected_topics:
+        summary = [
+            f"• {topic} (avg confidence: {np.mean(confs):.2f})"
+            for topic, confs in all_detected_topics.items()
+        ]
+        summary.sort(key=lambda x: float(x.split(': ')[-1].rstrip(')')), reverse=True)
+        return "\n".join(summary)
+    else:
+        return "No specific topics detected in document."
+css = """
+/* --- Global Layout --- */
+body {
+    background-color: #1a1a1a !important;
+    color: #f5f5f5 !important;
+    font-family: 'Inter', sans-serif !important;
+    margin: 0 !important;
+    padding: 0 !important;
+}
+/* Full width */
+#root, .gradio-container, .main {
+    max-width: 100% !important;
+    width: 100% !important;
+    background-color: #1a1a1a !important;
+    margin: 0 !important;
+    padding: 0 !important;
+    border: none !important;
+    box-shadow: none !important;
+}
+/* Headings and Labels */
+h1, h2, h3, label {
+    color: #ff9900 !important;
+    font-weight: 600 !important;
+}
+/* Text Inputs */
+textarea, input {
+    background-color: #2a2a2a !important;
+    color: #f5f5f5 !important;
+    border: 1px solid #3a3a3a !important;
+    border-radius: 10px !important;
+    padding: 12px !important;
+}
+/* Buttons */
+button {
+    background-color: #ff9900 !important;
+    color: #1a1a1a !important;
+    font-weight: 600 !important;
+    border-radius: 8px !important;
+    border: none !important;
+    padding: 8px 16px !important;
+    transition: 0.25s ease-in-out;
+}
+button:hover {
+    background-color: #ffb84d !important;
+}
+/* Output textbox */
+.output-textbox {
+    background-color: #252525 !important;
+    color: #ffd480 !important;
+    border: 1px solid #3a3a3a !important;
+    border-radius: 10px !important;
+    box-shadow: inset 0 0 6px rgba(255,153,0,0.1);
+}
+/* Tabs */
+.tabitem.svelte-1ipelgc {
+    background-color: #1a1a1a !important;
+    color: #ffb84d !important;
+}
+.tabitem.svelte-1ipelgc.selected {
+    background-color: #ff9900 !important;
+    color: #1a1a1a !important;
+    font-weight: 700 !important;
+}
+/* Footer */
+.footer, .svelte-1xdkkgx, .wrap.svelte-1ipelgc {
+    background: none !important;
+    border: none !important;
+    box-shadow: none !important;
+    color: #888 !important;
+    text-align: center !important;
+}
+"""
+# -------------------------
+# Gradio Interface
+# -------------------------
+tweet_tab = gr.Interface(
+    fn=analyze_topics,
+    inputs=gr.Textbox(
+        label="📝 Enter Text",
+        placeholder="Type or paste text here...",
+        lines=4
+    ),
+    outputs=gr.Textbox(label="🎯 Detected Topics"),
+    examples=[
+        ["Just watched the new Marvel movie, it was amazing!"],
+        ["Bitcoin prices are going up again!"],
+        ["Climate change is affecting polar bears."],
+    ],
+    title="💬 Text Topic Analyzer",
+    description="Analyze short texts or tweets to detect underlying topics using CardiffNLP’s Tweet Topic model.",
+)
+document_tab = gr.Interface(
+    fn=analyze_document,
+    inputs=gr.File(label="📄 Upload Document (PDF, DOCX, or TXT)"),
+    outputs=gr.Textbox(label="📘 Detected Topics"),
+    title="📄 Document Topic Analyzer",
+    description="Upload a document and let the AI detect key topics discussed inside.",
+)
+app = gr.TabbedInterface(
+    [tweet_tab, document_tab],
+    ["💬 Text Analyzer", "📄 Document Analyzer"],
+    title="🧠 AI Topic Analyzer",
+    css=css,
+    theme=gr.themes.Base(primary_hue="orange", secondary_hue="orange"),
+)
+if __name__ == "__main__":
+    app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio>=4.0.0
+transformers>=4.30.0
+torch>=2.0.0
+numpy>=1.21.0
+scipy>=1.7.0
+PyPDF2>=3.0.0
+python-docx>=0.8.11