Spaces:

jvillar02
/

news-classifier-demo

Running

App Files Files Community

Joaquin Villar commited on 8 days ago

Commit

d149d93

verified ·

1 Parent(s): 0f53989

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -52

app.py CHANGED Viewed

@@ -1,87 +1,137 @@
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from peft import PeftModel
-# --- CONFIGURATION ---
-# Replace with your specific repo name
 ADAPTER_REPO = "jvillar-sheff/ag-news-distilbert-lora"
 BASE_MODEL_ID = "distilbert-base-uncased"
 CLASS_NAMES = {0: "World", 1: "Sports", 2: "Business", 3: "Sci/Tech"}
 def load_model():
     print("Loading Base Model...")
-    # 1. Load the Base Model (Generic DistilBERT)
     base_model = AutoModelForSequenceClassification.from_pretrained(
         BASE_MODEL_ID,
         num_labels=len(CLASS_NAMES),
-        id2label={k: v for k, v in CLASS_NAMES.items()},
         label2id={v: k for k, v in CLASS_NAMES.items()}
     )
-    # 2. Load the Tokenizer from YOUR repo (ensures consistency)
     tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO)
-    # 3. Load and Apply your LoRA Adapters
-    print(f"Loading LoRA Adapters from {ADAPTER_REPO}...")
     model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)
-    # Optimize for CPU (Free Tier Spaces are CPU)
     device = torch.device("cpu")
     model.to(device)
     model.eval()
     return model, tokenizer, device
-# Load model once on startup
 model, tokenizer, device = load_model()
-def classify_news(text):
-    if not text:
-        return None
-    # Preprocess
     inputs = tokenizer(
-        text,
-        return_tensors="pt",
-        truncation=True,
-        padding="max_length",
-        max_length=128
     ).to(device)
-    # Predict
     with torch.no_grad():
         outputs = model(**inputs)
-    # Get Probabilities
     logits = outputs.logits
-    probabilities = torch.softmax(logits, dim=1).squeeze().cpu().numpy()
-    # Format Output
-    results = {}
-    for i, prob in enumerate(probabilities):
-        results[CLASS_NAMES[i]] = float(prob)
-    return results
-# Create Interface
-iface = gr.Interface(
-    fn=classify_news,
-    inputs=gr.Textbox(
-        lines=5,
-        placeholder="Paste a news article here...",
-        label="News Text"
-    ),
-    outputs=gr.Label(num_top_classes=4, label="Prediction"),
-    title="AI News Classifier (DistilBERT + LoRA)",
-    description="This model classifies news into World, Sports, Business, or Sci/Tech categories. Trained on AG News using Parameter-Efficient Fine-Tuning.",
-    examples=[
-        ["The stock market rallied today as tech companies reported record profits."],
-        ["The team scored a goal in the final minute to win the championship."],
-        ["New research shows that drinking coffee may increase life expectancy."],
-        ["Diplomats gathered in Geneva to discuss the peace treaty."]
-    ]
-)
-iface.launch()

 import gradio as gr
 import torch
+import numpy as np
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from peft import PeftModel
+# --- 1. CONFIGURATION ---
+MODEL_METRICS = {
+    "Accuracy": "89.20%",
+    "F1_Score": "0.8931"
+}
 ADAPTER_REPO = "jvillar-sheff/ag-news-distilbert-lora"
 BASE_MODEL_ID = "distilbert-base-uncased"
 CLASS_NAMES = {0: "World", 1: "Sports", 2: "Business", 3: "Sci/Tech"}
+# --- 2. MODEL LOADING ---
 def load_model():
     print("Loading Base Model...")
     base_model = AutoModelForSequenceClassification.from_pretrained(
         BASE_MODEL_ID,
         num_labels=len(CLASS_NAMES),
+        id2label={k: v for k, v in enumerate(CLASS_NAMES.values())},
         label2id={v: k for k, v in CLASS_NAMES.items()}
     )
+    print("Loading Tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO)
+    print("Loading Adapters...")
     model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)
     device = torch.device("cpu")
     model.to(device)
     model.eval()
     return model, tokenizer, device
 model, tokenizer, device = load_model()
+# --- 3. PREDICTION LOGIC ---
+def predict(text):
+    if not text.strip():
+        return None, None, None
     inputs = tokenizer(
+        text, return_tensors="pt", truncation=True, padding="max_length", max_length=128
     ).to(device)
     with torch.no_grad():
         outputs = model(**inputs)
     logits = outputs.logits
+    probs = torch.nn.functional.softmax(logits, dim=1).squeeze().cpu().numpy()
+    # 1. Get Top Label
+    pred_idx = np.argmax(probs)
+    pred_label = CLASS_NAMES[pred_idx]
+    conf = float(probs[pred_idx])
+    # 2. Create Probability Dict for the Chart
+    class_probs = {CLASS_NAMES[i]: float(probs[i]) for i in range(len(CLASS_NAMES))}
+    # 3. Create HTML for the "Confidence Badge" (Mimicking Streamlit)
+    if conf > 0.85:
+        bg_color, txt_color = "#d4edda", "#155724" # Green
+    elif conf > 0.60:
+        bg_color, txt_color = "#fff3cd", "#856404" # Yellow
+    else:
+        bg_color, txt_color = "#f8d7da", "#721c24" # Red
+    badge_html = f"""
+    <div style='background-color: {bg_color}; color: {txt_color};
+    padding: 8px 12px; border-radius: 5px; display: inline-block; font-weight: bold; font-size: 16px;'>
+    Confidence: {conf:.2%}
+    </div>
+    """
+    # Return: Label Text, Badge HTML, Chart Data
+    return f"# {pred_label}", badge_html, class_probs
+# --- 4. UI LAYOUT (gr.Blocks) ---
+# We use Soft theme to match Streamlit's clean look
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    # Title
+    gr.Markdown("# 📰 NLP News Classifier")
+    gr.Markdown("Classify news articles into World, Sports, Business, or Sci/Tech using DistilBERT + LoRA.")
+    # -- The "Green Banner" (HTML) --
+    gr.HTML(f"""
+    <div style="background-color: #d1e7dd; color: #0f5132; padding: 15px; border-radius: 5px; border: 1px solid #badbcc; margin-bottom: 20px;">
+        ✅ <b>Model Performance:</b> Accuracy: {MODEL_METRICS['Accuracy']} | F1 Score: {MODEL_METRICS['F1_Score']}
+    </div>
+    """)
+    with gr.Row():
+        # Left Column: Input
+        with gr.Column(scale=1):
+            input_text = gr.Textbox(
+                lines=6,
+                placeholder="Paste a news snippet here...",
+                label="News Article"
+            )
+            btn = gr.Button("Classify Article", variant="primary")
+            gr.Markdown("### Examples")
+            gr.Examples(
+                examples=[
+                    ["The stock market rallied today as tech companies reported record profits."],
+                    ["The local team won the championship after a stunning overtime goal."],
+                    ["NASA announces plans to launch a new rover to Mars next July."]
+                ],
+                inputs=input_text
+            )
+        # Right Column: Results
+        with gr.Column(scale=1):
+            gr.Markdown("### Prediction")
+            # Output 1: Big Label text
+            out_label = gr.Markdown()
+            # Output 2: The Colored Badge
+            out_badge = gr.HTML()
+            gr.Markdown("### Probability Breakdown")
+            # Output 3: Bar Chart (Label component handles this beautifully)
+            out_chart = gr.Label(num_top_classes=4, label="Confidence Scores")
+    # Wire up the button
+    btn.click(
+        fn=predict,
+        inputs=input_text,
+        outputs=[out_label, out_badge, out_chart]
+    )
+# Launch
+if __name__ == "__main__":
+    demo.launch()