Spaces:

Rsnarsna
/

sentiment_analyses

Sleeping

App Files Files Community

Rsnarsna commited on Dec 10, 2024

Commit

cfcbe55

verified ·

1 Parent(s): 1fc75a2

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -47

app.py CHANGED Viewed

@@ -1,15 +1,13 @@
 from fastapi import FastAPI
 from pydantic import BaseModel
-from transformers import pipeline, XLMRobertaTokenizer, XLMRobertaForSequenceClassification
 import torch
-# Load the model and tokenizer
-model_name = "citizenlab/twitter-xlm-roberta-base-sentiment-finetunned"
-tokenizer = XLMRobertaTokenizer.from_pretrained(model_name)
-model = XLMRobertaForSequenceClassification.from_pretrained(model_name)
-# Define the sentiment analysis pipeline using the model and tokenizer
-pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
 # Define a request model for the input text
 class SentimentRequest(BaseModel):
@@ -23,14 +21,6 @@ class SentimentResponse(BaseModel):
 # Initialize FastAPI app
 app = FastAPI()
-# Function to split text into smaller chunks
-def chunk_text(text, max_length=512):
-    # Tokenize the text
-    tokens = tokenizer.encode(text, truncation=False)
-    # Split into chunks of max_length tokens each
-    return [tokens[i:i + max_length] for i in range(0, len(tokens), max_length)]
 @app.get("/")
 async def read_root():
     return {"message": "Welcome to the Sentiment Analysis API! Use '/predict' to analyze sentiment."}
@@ -38,38 +28,11 @@ async def read_root():
 @app.post("/predict")
 async def analyze_sentiment(input: SentimentRequest):
     text = input.text
-    # Split the input text into chunks if it exceeds the token limit
-    chunks = chunk_text(text, max_length=512)  # 512 tokens is the max for XLM-Roberta
-    # Run sentiment analysis for each chunk
-    analysis_results = []
-    for chunk in chunks:
-        # Convert chunk back to text
-        chunk_text = tokenizer.decode(chunk, skip_special_tokens=True)
-        # Tokenize the chunk text
-        inputs = tokenizer(chunk_text, padding=True, truncation=True, max_length=512, return_tensors="pt")
-        # Run sentiment analysis through the pipeline
-        with torch.no_grad():  # No need to compute gradients for inference
-            analysis = pipe(**inputs, top_k=None)  # Get all possible labels
-        # Extract the result as a dictionary of labels and confidence scores
-        result = {entry['label']: entry['score'] for entry in analysis}
-        analysis_results.append(result)
-    # Aggregate analysis results
-    combined_analysis = {}
-    for result in analysis_results:
-        for label, score in result.items():
-            if label in combined_analysis:
-                combined_analysis[label] += score  # Sum up scores for the same label
-            else:
-                combined_analysis[label] = score
     # Return the sentiment analysis result as a response
-    return SentimentResponse(text=text, analysis=combined_analysis)
 # Run the application with Uvicorn (from the terminal/command line)
-# uvicorn app_name:app --reload

 from fastapi import FastAPI
 from pydantic import BaseModel
+from transformers import pipeline
 import torch
+# Load the sentiment analysis pipeline
+pipe = pipeline(
+    "text-classification",
+    model="citizenlab/twitter-xlm-roberta-base-sentiment-finetunned"
+)
 # Define a request model for the input text
 class SentimentRequest(BaseModel):
 # Initialize FastAPI app
 app = FastAPI()
 @app.get("/")
 async def read_root():
     return {"message": "Welcome to the Sentiment Analysis API! Use '/predict' to analyze sentiment."}
 @app.post("/predict")
 async def analyze_sentiment(input: SentimentRequest):
     text = input.text
+    analysis = pipe(text, top_k=None)  # top_k=None to get all possible labels
+    result = {entry['label']: entry['score'] for entry in analysis}
     # Return the sentiment analysis result as a response
+    return SentimentResponse(text=text, analysis=result)
 # Run the application with Uvicorn (from the terminal/command line)
+# uvicorn app_name:app --reload