Rsnarsna commited on
Commit
acbdb77
·
verified ·
1 Parent(s): cfcbe55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -26
app.py CHANGED
@@ -1,38 +1,75 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
- from transformers import pipeline
4
- import torch
5
 
6
- # Load the sentiment analysis pipeline
7
- pipe = pipeline(
8
- "text-classification",
9
- model="citizenlab/twitter-xlm-roberta-base-sentiment-finetunned"
10
- )
11
 
12
- # Define a request model for the input text
13
- class SentimentRequest(BaseModel):
14
- text: str
 
15
 
16
- # Define a response model for the sentiment analysis output
17
- class SentimentResponse(BaseModel):
18
  text: str
19
- analysis: dict # Contains sentiment labels and their respective confidence scores
20
 
21
- # Initialize FastAPI app
22
- app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  @app.get("/")
25
- async def read_root():
26
- return {"message": "Welcome to the Sentiment Analysis API! Use '/predict' to analyze sentiment."}
27
 
28
  @app.post("/predict")
29
- async def analyze_sentiment(input: SentimentRequest):
30
- text = input.text
31
- analysis = pipe(text, top_k=None) # top_k=None to get all possible labels
32
- result = {entry['label']: entry['score'] for entry in analysis}
33
 
34
- # Return the sentiment analysis result as a response
35
- return SentimentResponse(text=text, analysis=result)
36
-
37
- # Run the application with Uvicorn (from the terminal/command line)
38
- # uvicorn app_name:app --reload
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
+ from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification, pipeline
 
4
 
5
+ # Load the model and tokenizer
6
+ model_name = "citizenlab/twitter-xlm-roberta-base-sentiment-finetunned"
7
+ tokenizer = XLMRobertaTokenizer.from_pretrained(model_name)
8
+ model = XLMRobertaForSequenceClassification.from_pretrained(model_name)
 
9
 
10
+ # Define the sentiment analysis pipeline
11
+ sentiment_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
12
+
13
+ app = FastAPI()
14
 
15
+ # Define a Pydantic model for the input text
16
+ class TextInput(BaseModel):
17
  text: str
 
18
 
19
+ # Function to split text into chunks
20
+ def split_text_into_chunks(text, max_tokens=500):
21
+ tokens = tokenizer(text, return_tensors="pt", truncation=False, padding=False)
22
+ input_ids = tokens['input_ids'][0].tolist() # Flatten input_ids into a list
23
+ # Create chunks of max_tokens size
24
+ chunks = [input_ids[i:i+max_tokens] for i in range(0, len(input_ids), max_tokens)]
25
+ chunk_texts = [tokenizer.decode(chunk, skip_special_tokens=True) for chunk in chunks]
26
+ return chunks, chunk_texts, [len(chunk) for chunk in chunks] # Return token count for each chunk
27
+
28
+ # Function to analyze sentiment for a list of chunks
29
+ def analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts):
30
+ results = []
31
+ total_token_count = 0
32
+ for i, chunk in enumerate(chunk_texts):
33
+ total_token_count += chunk_token_counts[i] # Sum the token count of all chunks
34
+ analysis = sentiment_pipeline(chunk, top_k=None)
35
+ results.append({
36
+ "chunk": i + 1,
37
+ "text": chunk,
38
+ "token_count": chunk_token_counts[i], # Include token count in the result
39
+ "analysis": analysis,
40
+ })
41
+ return results, total_token_count
42
 
43
  @app.get("/")
44
+ def read_root():
45
+ return {"info": "This is a sentiment analysis API. Use the /predict endpoint to analyze text."}
46
 
47
  @app.post("/predict")
48
+ def predict_sentiment(input_data: TextInput):
49
+ # Step 1: Split text into chunks
50
+ chunks, chunk_texts, chunk_token_counts = split_text_into_chunks(input_data.text)
 
51
 
52
+ # Step 2: Analyze sentiment for each chunk and calculate the total token count
53
+ results, total_token_count = analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts)
54
+
55
+ # Step 3: Return the aggregated sentiment analysis results along with total token count
56
+ total_neutral_score = 0
57
+ total_positive_score = 0
58
+ total_negative_score = 0
59
+ for result in results:
60
+ for sentiment in result['analysis']:
61
+ if sentiment['label'] == "Neutral":
62
+ total_neutral_score += sentiment['score']
63
+ elif sentiment['label'] == "Positive":
64
+ total_positive_score += sentiment['score']
65
+ elif sentiment['label'] == "Negative":
66
+ total_negative_score += sentiment['score']
67
+
68
+ return {
69
+ "total_chunks": len(results),
70
+ "total_token_count": total_token_count, # Include total token count in the response
71
+ "total_neutral_score": total_neutral_score,
72
+ "total_positive_score": total_positive_score,
73
+ "total_negative_score": total_negative_score,
74
+ "results": results
75
+ }