Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification, pipeline | |
| # Load the model and tokenizer | |
| model_name = "citizenlab/twitter-xlm-roberta-base-sentiment-finetunned" | |
| tokenizer = XLMRobertaTokenizer.from_pretrained(model_name) | |
| model = XLMRobertaForSequenceClassification.from_pretrained(model_name) | |
| # Define the sentiment analysis pipeline | |
| sentiment_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer) | |
| app = FastAPI() | |
| # Define a Pydantic model for the input text | |
| class TextInput(BaseModel): | |
| text: str | |
| # --- For /predict --- | |
| # Function to split text into chunks | |
| def split_text_into_chunks(text, max_tokens=500): | |
| tokens = tokenizer(text, return_tensors="pt", truncation=False, padding=False) | |
| input_ids = tokens['input_ids'][0].tolist() | |
| chunks = [input_ids[i:i+max_tokens] for i in range(0, len(input_ids), max_tokens)] | |
| chunk_texts = [tokenizer.decode(chunk, skip_special_tokens=True) for chunk in chunks] | |
| return chunks, chunk_texts, [len(chunk) for chunk in chunks] | |
| # Function to analyze sentiment for a list of chunks | |
| def analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts): | |
| results = [] | |
| total_token_count = 0 | |
| for i, chunk in enumerate(chunk_texts): | |
| total_token_count += chunk_token_counts[i] | |
| analysis = sentiment_pipeline(chunk, top_k=None) | |
| results.append({ | |
| "chunk": i + 1, | |
| "text": chunk, | |
| "token_count": chunk_token_counts[i], | |
| "analysis": analysis, | |
| }) | |
| return results, total_token_count | |
| def predict_sentiment(input_data: TextInput): | |
| chunks, chunk_texts, chunk_token_counts = split_text_into_chunks(input_data.text) | |
| results, total_token_count = analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts) | |
| total_neutral_score = total_positive_score = total_negative_score = 0 | |
| for result in results: | |
| for sentiment in result['analysis']: | |
| if sentiment['label'] == "Neutral": | |
| total_neutral_score += sentiment['score'] | |
| elif sentiment['label'] == "Positive": | |
| total_positive_score += sentiment['score'] | |
| elif sentiment['label'] == "Negative": | |
| total_negative_score += sentiment['score'] | |
| num_chunks = len(results) | |
| overall_neutral_score = total_neutral_score / num_chunks if num_chunks > 0 else 0 | |
| overall_positive_score = total_positive_score / num_chunks if num_chunks > 0 else 0 | |
| overall_negative_score = total_negative_score / num_chunks if num_chunks > 0 else 0 | |
| if len(results)==1: | |
| return {"results": results,} | |
| return { | |
| "total_chunks": num_chunks, | |
| "total_token_count": total_token_count, | |
| "total_neutral_score": total_neutral_score, | |
| "total_positive_score": total_positive_score, | |
| "total_negative_score": total_negative_score, | |
| "overall_neutral_score": overall_neutral_score, | |
| "overall_positive_score": overall_positive_score, | |
| "overall_negative_score": overall_negative_score, | |
| "results": results, | |
| } | |
| # --- For /analyse_text --- | |
| # Function to split text into structured format | |
| def split_conversation(conversation, default_user="You"): | |
| conversation_lines = conversation.strip().split("\n") | |
| split_lines = [] | |
| for line in conversation_lines: | |
| if ":" in line: | |
| user, text = line.split(":", 1) | |
| text = text.strip().strip('"') | |
| split_lines.append({"user": user.strip(), "text": text}) | |
| return split_lines | |
| # Function to analyze sentiment for each text entry | |
| def analyze_sentiment(conversation_list): | |
| overall_scores = {"Negative": 0, "Neutral": 0, "Positive": 0} | |
| total_entries = len(conversation_list) | |
| for entry in conversation_list: | |
| analysis = sentiment_pipeline(entry["text"], top_k=None) | |
| entry["analysis"] = analysis | |
| for sentiment in analysis: | |
| overall_scores[sentiment["label"]] += sentiment["score"] | |
| overall_analysis = [ | |
| {"label": label, "score": overall_scores[label] / total_entries} | |
| for label in overall_scores | |
| ] | |
| return overall_analysis | |
| def analyse_text(input_data: TextInput): | |
| conversation_list = split_conversation(input_data.text) | |
| overall_analysis = analyze_sentiment(conversation_list) | |
| return { | |
| "analyses": conversation_list, | |
| "overall_analysis": overall_analysis, | |
| } | |
| def read_root(): | |
| return { | |
| "info": "This is a sentiment analysis API. Use /predict for chunk-wise analysis or /analyse_text for structured conversation analysis." | |
| } | |