Spaces:

Rsnarsna
/

sentiment_analyses

Sleeping

App Files Files Community

Rsnarsna commited on Dec 10, 2024

Commit

acbdb77

verified ·

1 Parent(s): cfcbe55

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -26

app.py CHANGED Viewed

@@ -1,38 +1,75 @@
 from fastapi import FastAPI
 from pydantic import BaseModel
-from transformers import pipeline
-import torch
-# Load the sentiment analysis pipeline
-pipe = pipeline(
-    "text-classification",
-    model="citizenlab/twitter-xlm-roberta-base-sentiment-finetunned"
-)
-# Define a request model for the input text
-class SentimentRequest(BaseModel):
-    text: str
-# Define a response model for the sentiment analysis output
-class SentimentResponse(BaseModel):
     text: str
-    analysis: dict  # Contains sentiment labels and their respective confidence scores
-# Initialize FastAPI app
-app = FastAPI()
 @app.get("/")
-async def read_root():
-    return {"message": "Welcome to the Sentiment Analysis API! Use '/predict' to analyze sentiment."}
 @app.post("/predict")
-async def analyze_sentiment(input: SentimentRequest):
-    text = input.text
-    analysis = pipe(text, top_k=None)  # top_k=None to get all possible labels
-    result = {entry['label']: entry['score'] for entry in analysis}
-    # Return the sentiment analysis result as a response
-    return SentimentResponse(text=text, analysis=result)
-# Run the application with Uvicorn (from the terminal/command line)
-# uvicorn app_name:app --reload

 from fastapi import FastAPI
 from pydantic import BaseModel
+from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification, pipeline
+# Load the model and tokenizer
+model_name = "citizenlab/twitter-xlm-roberta-base-sentiment-finetunned"
+tokenizer = XLMRobertaTokenizer.from_pretrained(model_name)
+model = XLMRobertaForSequenceClassification.from_pretrained(model_name)
+# Define the sentiment analysis pipeline
+sentiment_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
+app = FastAPI()
+# Define a Pydantic model for the input text
+class TextInput(BaseModel):
     text: str
+# Function to split text into chunks
+def split_text_into_chunks(text, max_tokens=500):
+    tokens = tokenizer(text, return_tensors="pt", truncation=False, padding=False)
+    input_ids = tokens['input_ids'][0].tolist()  # Flatten input_ids into a list
+    # Create chunks of max_tokens size
+    chunks = [input_ids[i:i+max_tokens] for i in range(0, len(input_ids), max_tokens)]
+    chunk_texts = [tokenizer.decode(chunk, skip_special_tokens=True) for chunk in chunks]
+    return chunks, chunk_texts, [len(chunk) for chunk in chunks]  # Return token count for each chunk
+# Function to analyze sentiment for a list of chunks
+def analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts):
+    results = []
+    total_token_count = 0
+    for i, chunk in enumerate(chunk_texts):
+        total_token_count += chunk_token_counts[i]  # Sum the token count of all chunks
+        analysis = sentiment_pipeline(chunk, top_k=None)
+        results.append({
+            "chunk": i + 1,
+            "text": chunk,
+            "token_count": chunk_token_counts[i],  # Include token count in the result
+            "analysis": analysis,
+        })
+    return results, total_token_count
 @app.get("/")
+def read_root():
+    return {"info": "This is a sentiment analysis API. Use the /predict endpoint to analyze text."}
 @app.post("/predict")
+def predict_sentiment(input_data: TextInput):
+    # Step 1: Split text into chunks
+    chunks, chunk_texts, chunk_token_counts = split_text_into_chunks(input_data.text)
+    # Step 2: Analyze sentiment for each chunk and calculate the total token count
+    results, total_token_count = analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts)
+    # Step 3: Return the aggregated sentiment analysis results along with total token count
+    total_neutral_score = 0
+    total_positive_score = 0
+    total_negative_score = 0
+    for result in results:
+        for sentiment in result['analysis']:
+            if sentiment['label'] == "Neutral":
+                total_neutral_score += sentiment['score']
+            elif sentiment['label'] == "Positive":
+                total_positive_score += sentiment['score']
+            elif sentiment['label'] == "Negative":
+                total_negative_score += sentiment['score']
+    return {
+        "total_chunks": len(results),
+        "total_token_count": total_token_count,  # Include total token count in the response
+        "total_neutral_score": total_neutral_score,
+        "total_positive_score": total_positive_score,
+        "total_negative_score": total_negative_score,
+        "results": results
+    }