Spaces:
Sleeping
Sleeping
File size: 4,756 Bytes
a6dce17 548161c acbdb77 a6dce17 acbdb77 548161c acbdb77 548161c acbdb77 548161c f39785f a763c13 f39785f 76db442 f39785f 76db442 f39785f 76db442 f39785f 76db442 f39785f 76db442 f39785f 76db442 548161c 76db442 2f78846 76db442 2f78846 f39785f acbdb77 76db442 f39785f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification, pipeline
# Load the model and tokenizer
model_name = "citizenlab/twitter-xlm-roberta-base-sentiment-finetunned"
tokenizer = XLMRobertaTokenizer.from_pretrained(model_name)
model = XLMRobertaForSequenceClassification.from_pretrained(model_name)
# Define the sentiment analysis pipeline
sentiment_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
app = FastAPI()
# Define a Pydantic model for the input text
class TextInput(BaseModel):
text: str
# --- For /predict ---
# Function to split text into chunks
def split_text_into_chunks(text, max_tokens=500):
tokens = tokenizer(text, return_tensors="pt", truncation=False, padding=False)
input_ids = tokens['input_ids'][0].tolist()
chunks = [input_ids[i:i+max_tokens] for i in range(0, len(input_ids), max_tokens)]
chunk_texts = [tokenizer.decode(chunk, skip_special_tokens=True) for chunk in chunks]
return chunks, chunk_texts, [len(chunk) for chunk in chunks]
# Function to analyze sentiment for a list of chunks
def analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts):
results = []
total_token_count = 0
for i, chunk in enumerate(chunk_texts):
total_token_count += chunk_token_counts[i]
analysis = sentiment_pipeline(chunk, top_k=None)
results.append({
"chunk": i + 1,
"text": chunk,
"token_count": chunk_token_counts[i],
"analysis": analysis,
})
return results, total_token_count
@app.post("/predict")
def predict_sentiment(input_data: TextInput):
chunks, chunk_texts, chunk_token_counts = split_text_into_chunks(input_data.text)
results, total_token_count = analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts)
total_neutral_score = total_positive_score = total_negative_score = 0
for result in results:
for sentiment in result['analysis']:
if sentiment['label'] == "Neutral":
total_neutral_score += sentiment['score']
elif sentiment['label'] == "Positive":
total_positive_score += sentiment['score']
elif sentiment['label'] == "Negative":
total_negative_score += sentiment['score']
num_chunks = len(results)
overall_neutral_score = total_neutral_score / num_chunks if num_chunks > 0 else 0
overall_positive_score = total_positive_score / num_chunks if num_chunks > 0 else 0
overall_negative_score = total_negative_score / num_chunks if num_chunks > 0 else 0
if len(results)==1:
return {"results": results,}
return {
"total_chunks": num_chunks,
"total_token_count": total_token_count,
"total_neutral_score": total_neutral_score,
"total_positive_score": total_positive_score,
"total_negative_score": total_negative_score,
"overall_neutral_score": overall_neutral_score,
"overall_positive_score": overall_positive_score,
"overall_negative_score": overall_negative_score,
"results": results,
}
# --- For /analyse_text ---
# Function to split text into structured format
def split_conversation(conversation, default_user="You"):
conversation_lines = conversation.strip().split("\n")
split_lines = []
for line in conversation_lines:
if ":" in line:
user, text = line.split(":", 1)
text = text.strip().strip('"')
split_lines.append({"user": user.strip(), "text": text})
return split_lines
# Function to analyze sentiment for each text entry
def analyze_sentiment(conversation_list):
overall_scores = {"Negative": 0, "Neutral": 0, "Positive": 0}
total_entries = len(conversation_list)
for entry in conversation_list:
analysis = sentiment_pipeline(entry["text"], top_k=None)
entry["analysis"] = analysis
for sentiment in analysis:
overall_scores[sentiment["label"]] += sentiment["score"]
overall_analysis = [
{"label": label, "score": overall_scores[label] / total_entries}
for label in overall_scores
]
return overall_analysis
@app.post("/analyse_text")
def analyse_text(input_data: TextInput):
conversation_list = split_conversation(input_data.text)
overall_analysis = analyze_sentiment(conversation_list)
return {
"analyses": conversation_list,
"overall_analysis": overall_analysis,
}
@app.get("/")
def read_root():
return {
"info": "This is a sentiment analysis API. Use /predict for chunk-wise analysis or /analyse_text for structured conversation analysis."
}
|