File size: 4,756 Bytes
a6dce17
548161c
acbdb77
a6dce17
acbdb77
 
 
 
548161c
acbdb77
 
 
 
548161c
acbdb77
 
548161c
 
f39785f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a763c13
 
 
f39785f
 
 
 
 
 
 
 
 
 
 
 
 
 
76db442
 
f39785f
 
76db442
f39785f
76db442
f39785f
76db442
 
 
f39785f
76db442
 
 
 
 
f39785f
 
76db442
 
548161c
76db442
 
 
 
 
2f78846
 
76db442
 
 
 
2f78846
f39785f
 
 
acbdb77
76db442
f39785f
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification, pipeline

# Load the model and tokenizer
model_name = "citizenlab/twitter-xlm-roberta-base-sentiment-finetunned"
tokenizer = XLMRobertaTokenizer.from_pretrained(model_name)
model = XLMRobertaForSequenceClassification.from_pretrained(model_name)

# Define the sentiment analysis pipeline
sentiment_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)

app = FastAPI()

# Define a Pydantic model for the input text
class TextInput(BaseModel):
    text: str


# --- For /predict ---
# Function to split text into chunks
def split_text_into_chunks(text, max_tokens=500):
    tokens = tokenizer(text, return_tensors="pt", truncation=False, padding=False)
    input_ids = tokens['input_ids'][0].tolist()
    chunks = [input_ids[i:i+max_tokens] for i in range(0, len(input_ids), max_tokens)]
    chunk_texts = [tokenizer.decode(chunk, skip_special_tokens=True) for chunk in chunks]
    return chunks, chunk_texts, [len(chunk) for chunk in chunks]


# Function to analyze sentiment for a list of chunks
def analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts):
    results = []
    total_token_count = 0
    for i, chunk in enumerate(chunk_texts):
        total_token_count += chunk_token_counts[i]
        analysis = sentiment_pipeline(chunk, top_k=None)
        results.append({
            "chunk": i + 1,
            "text": chunk,
            "token_count": chunk_token_counts[i],
            "analysis": analysis,
        })
    return results, total_token_count


@app.post("/predict")
def predict_sentiment(input_data: TextInput):
    chunks, chunk_texts, chunk_token_counts = split_text_into_chunks(input_data.text)
    results, total_token_count = analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts)

    total_neutral_score = total_positive_score = total_negative_score = 0
    for result in results:
        for sentiment in result['analysis']:
            if sentiment['label'] == "Neutral":
                total_neutral_score += sentiment['score']
            elif sentiment['label'] == "Positive":
                total_positive_score += sentiment['score']
            elif sentiment['label'] == "Negative":
                total_negative_score += sentiment['score']

    num_chunks = len(results)
    overall_neutral_score = total_neutral_score / num_chunks if num_chunks > 0 else 0
    overall_positive_score = total_positive_score / num_chunks if num_chunks > 0 else 0
    overall_negative_score = total_negative_score / num_chunks if num_chunks > 0 else 0

    if len(results)==1:
        return {"results": results,}
        
    return {
        "total_chunks": num_chunks,
        "total_token_count": total_token_count,
        "total_neutral_score": total_neutral_score,
        "total_positive_score": total_positive_score,
        "total_negative_score": total_negative_score,
        "overall_neutral_score": overall_neutral_score,
        "overall_positive_score": overall_positive_score,
        "overall_negative_score": overall_negative_score,
        "results": results,
    }


# --- For /analyse_text ---
# Function to split text into structured format
def split_conversation(conversation, default_user="You"):
    conversation_lines = conversation.strip().split("\n")
    split_lines = []
    for line in conversation_lines:
        if ":" in line:
            user, text = line.split(":", 1)
            text = text.strip().strip('"')
            split_lines.append({"user": user.strip(), "text": text})
    return split_lines


# Function to analyze sentiment for each text entry
def analyze_sentiment(conversation_list):
    overall_scores = {"Negative": 0, "Neutral": 0, "Positive": 0}
    total_entries = len(conversation_list)
    for entry in conversation_list:
        analysis = sentiment_pipeline(entry["text"], top_k=None)
        entry["analysis"] = analysis
        for sentiment in analysis:
            overall_scores[sentiment["label"]] += sentiment["score"]

    overall_analysis = [
        {"label": label, "score": overall_scores[label] / total_entries}
        for label in overall_scores
    ]
    return overall_analysis


@app.post("/analyse_text")
def analyse_text(input_data: TextInput):
    conversation_list = split_conversation(input_data.text)
    overall_analysis = analyze_sentiment(conversation_list)

    return {
        "analyses": conversation_list,
        "overall_analysis": overall_analysis,
    }


@app.get("/")
def read_root():
    return {
        "info": "This is a sentiment analysis API. Use /predict for chunk-wise analysis or /analyse_text for structured conversation analysis."
    }