Rsnarsna commited on
Commit
76db442
·
verified ·
1 Parent(s): 2f78846

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -63
app.py CHANGED
@@ -16,75 +16,56 @@ app = FastAPI()
16
  class TextInput(BaseModel):
17
  text: str
18
 
19
- # Function to split text into chunks
20
- def split_text_into_chunks(text, max_tokens=500):
21
- tokens = tokenizer(text, return_tensors="pt", truncation=False, padding=False)
22
- input_ids = tokens['input_ids'][0].tolist() # Flatten input_ids into a list
23
- # Create chunks of max_tokens size
24
- chunks = [input_ids[i:i+max_tokens] for i in range(0, len(input_ids), max_tokens)]
25
- chunk_texts = [tokenizer.decode(chunk, skip_special_tokens=True) for chunk in chunks]
26
- return chunks, chunk_texts, [len(chunk) for chunk in chunks] # Return token count for each chunk
27
 
28
- # Function to analyze sentiment for a list of chunks
29
- def analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts):
30
- results = []
31
- total_token_count = 0
32
- for i, chunk in enumerate(chunk_texts):
33
- total_token_count += chunk_token_counts[i] # Sum the token count of all chunks
34
- analysis = sentiment_pipeline(chunk, top_k=None)
35
- results.append({
36
- "chunk": i + 1,
37
- "text": chunk,
38
- "token_count": chunk_token_counts[i], # Include token count in the result
39
- "analysis": analysis,
40
- })
41
- return results, total_token_count
42
 
43
- @app.get("/")
44
- def read_root():
45
- return {"info": "This is a sentiment analysis API. Use the /predict endpoint to analyze text."}
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- @app.post("/predict")
48
- def predict_sentiment(input_data: TextInput):
49
- # Step 1: Split text into chunks
50
- chunks, chunk_texts, chunk_token_counts = split_text_into_chunks(input_data.text)
 
51
 
52
- # Step 2: Analyze sentiment for each chunk and calculate the total token count
53
- results, total_token_count = analyze_sentiment_chunks(chunks, chunk_texts, chunk_token_counts)
54
 
55
- # Step 3: Calculate total and overall sentiment scores
56
- total_neutral_score = 0
57
- total_positive_score = 0
58
- total_negative_score = 0
59
- for result in results:
60
- for sentiment in result['analysis']:
61
- if sentiment['label'] == "Neutral":
62
- total_neutral_score += sentiment['score']
63
- elif sentiment['label'] == "Positive":
64
- total_positive_score += sentiment['score']
65
- elif sentiment['label'] == "Negative":
66
- total_negative_score += sentiment['score']
67
 
68
- # Calculate overall scores (average per chunk)
69
- num_chunks = len(results)
70
- overall_neutral_score = total_neutral_score / num_chunks if num_chunks > 0 else 0
71
- overall_positive_score = total_positive_score / num_chunks if num_chunks > 0 else 0
72
- overall_negative_score = total_negative_score / num_chunks if num_chunks > 0 else 0
73
 
74
- # Step 4: Return the aggregated sentiment analysis results
75
- if len(chunks) == 1:
76
- return {
77
- "results": results
78
- }
79
 
80
- return {
81
- "total_chunks": len(results),
82
- "total_token_count": total_token_count, # Include total token count in the response
83
- "total_neutral_score": total_neutral_score,
84
- "total_positive_score": total_positive_score,
85
- "total_negative_score": total_negative_score,
86
- "overall_neutral_score": overall_neutral_score,
87
- "overall_positive_score": overall_positive_score,
88
- "overall_negative_score": overall_negative_score,
89
- "results": results
90
  }
 
 
 
16
  class TextInput(BaseModel):
17
  text: str
18
 
19
+ # Function to split text into structured format
20
+ def split_conversation(conversation, default_user="You"):
21
+ conversation_lines = conversation.strip().split("\n") # Split lines
22
+ split_lines = [] # List to store split conversation
 
 
 
 
23
 
24
+ for line in conversation_lines:
25
+ if ":" in line: # Split based on the first colon
26
+ user, text = line.split(":", 1)
27
+ text = text.strip().strip('"') # Remove extra whitespace and quotes
28
+ split_lines.append({"user": user.strip(), "text": text})
 
 
 
 
 
 
 
 
 
29
 
30
+ return split_lines
31
+
32
+ # Function to analyze sentiment for each text entry
33
+ def analyze_sentiment(conversation_list):
34
+ overall_scores = {"Negative": 0, "Neutral": 0, "Positive": 0}
35
+ total_entries = len(conversation_list)
36
+
37
+ for entry in conversation_list:
38
+ analysis = sentiment_pipeline(entry["text"], top_k=None) # Analyze text
39
+ entry["analysis"] = analysis # Add analysis to the entry
40
+
41
+ # Aggregate scores for overall analysis
42
+ for sentiment in analysis:
43
+ overall_scores[sentiment["label"]] += sentiment["score"]
44
 
45
+ # Calculate overall averages
46
+ overall_analysis = [
47
+ {"label": label, "score": overall_scores[label] / total_entries}
48
+ for label in overall_scores
49
+ ]
50
 
51
+ return overall_analysis
 
52
 
53
+ @app.get("/")
54
+ def read_root():
55
+ return {"info": "This is a sentiment analysis API. Use the /analyse_text endpoint to analyze conversation text."}
 
 
 
 
 
 
 
 
 
56
 
57
+ @app.post("/analyse_text")
58
+ def analyse_text(input_data: TextInput):
59
+ # Step 1: Split the conversation into structured format
60
+ conversation_list = split_conversation(input_data.text)
 
61
 
62
+ # Step 2: Analyze sentiment for each entry and generate overall analysis
63
+ overall_analysis = analyze_sentiment(conversation_list)
 
 
 
64
 
65
+ # Step 3: Combine results into the final output
66
+ result = {
67
+ "analyses": conversation_list,
68
+ "overall_analysis": overall_analysis
 
 
 
 
 
 
69
  }
70
+
71
+ return result