Spaces:

TailsResearch
/

PersonaAnnotator

Sleeping

App Files Files Community

PersonaAnnotator / app.py

shreyansjain

Add application file

af33aa5 3 months ago

raw

history blame contribute delete

14.2 kB

	import gradio as gr
	import json
	import random
	import os
	from typing import List, Dict, Any, Optional

	# -----------------------------
	# Available JSON files (persona datasets)
	# -----------------------------
	available_files = [
	"persona_annotator_sample.json"
	]

	data = []
	index = 0
	current_file = None

	ICONS = {
	"header": "👤",
	"categories": "🏷️",
	"presenting": "🚩",
	"clinical": "🩺",
	"history": "📜",
	"functioning": "🔧",
	"summary": "🧾",
	"context": "🧩",
	"metadata": "🔖",
	"other": "🗂️",
	}

	SECTION_FIELDS = {
	"header": [
	"name", "archetype", "age", "sex", "location",
	"education_level", "bachelors_field", "ethnic_background", "marital_status",
	"version"
	],
	"categories": ["appearance_category", "behavior_category"],
	"presenting": ["presenting_problems"],
	"clinical": ["appearance", "behavior", "mood_affect", "speech",
	"thought_content", "insight_judgment", "cognition"],
	"history": ["medical_developmental_history", "family_history", "educational_vocational_history"],
	"functioning": ["emotional_behavioral_functioning", "social_functioning"],
	"summary": ["summary_of_psychological_profile"],
	"context": ["archetype_description", "memoir", "memoir_summary", "memoir_narrative"],
	"metadata": ["uid"],
	}

	# -----------------------------
	# Persistent storage path
	# -----------------------------
	PERSISTENT_DIR = "/home/user/app/storage"
	if os.path.exists(PERSISTENT_DIR):
	STORAGE_DIR = PERSISTENT_DIR
	else:
	STORAGE_DIR = "."
	os.makedirs(STORAGE_DIR, exist_ok=True)
	ANNOTATION_FILE = os.path.join(STORAGE_DIR, "persona_annotations.jsonl")

	# -----------------------------
	# Core functions
	# -----------------------------

	def _get(entry: Dict[str, Any], key: str, default: str = "—") -> str:
	v = entry.get(key, default)
	if v is None:
	return default
	if isinstance(v, (list, dict)):
	try:
	return json.dumps(v, ensure_ascii=False)
	except Exception:
	return str(v)
	return str(v).strip()

	def _truncate(s: str, limit: int = 2000) -> str:
	s = s or ""
	return (s[:limit] + " …") if len(s) > limit else s


	def load_file(file_name):
	"""Load selected JSON file and show first/random entry"""
	global data, index, current_file
	current_file = file_name
	with open(file_name, "r", encoding="utf-8") as f:
	data = json.load(f)
	index = random.randint(0, len(data) - 1)
	return show_entry()


	def save_annotation(p_uuid, *scores_and_comments):
	"""Save annotations to persistent storage as JSONL (with file name)"""
	ann = {
	"file_name": current_file,
	"persona_uuid": p_uuid,
	"annotations": {}
	}

	rubric_fields = [
	"clarity", "originality", "coherence", "diversity", "realism",
	"psychological_depth", "consistency", "informativeness",
	"ethical_considerations", "demographic_fidelity", "overall_score"
	]

	for field, value in zip(rubric_fields, scores_and_comments):
	ann["annotations"][field] = value

	with open(ANNOTATION_FILE, "a", encoding="utf-8") as f:
	f.write(json.dumps(ann, ensure_ascii=False) + "\n")

	return f"✅ Saved annotation for {p_uuid} (from {current_file}) → {ANNOTATION_FILE}"

	def export_annotations():
	"""Return path to annotations file for download"""
	if os.path.exists(ANNOTATION_FILE):
	return ANNOTATION_FILE
	else:
	with open(ANNOTATION_FILE, "w", encoding="utf-8") as f:
	pass
	return ANNOTATION_FILE


	def md_header(entry: Dict[str, Any]) -> str:
	name = _get(entry, "name")
	archetype = _get(entry, "archetype")
	age = _get(entry, "age")
	sex = _get(entry, "sex")
	location = _get(entry, "location")
	education_level = _get(entry, "education_level")
	bachelors_field = _get(entry, "bachelors_field")
	ethnic_background = _get(entry, "ethnic_background")
	marital_status = _get(entry, "marital_status")
	version = _get(entry, "version")
	return (
	f"## {ICONS['header']} Persona\n"
	f"Name: {name} \n"
	f"Archetype: {archetype} \n"
	f"Age: {age} \n"
	f"Sex: {sex} \n"
	f"Location: {location} \n"
	f"Education Level: {education_level} \n"
	f"Bachelor’s Field: {bachelors_field} \n"
	f"Ethnic Background: {ethnic_background} \n"
	f"Marital Status: {marital_status} \n"
	f"Version: {version}"
	)

	def md_categories(entry: Dict[str, Any]) -> str:
	app_cat = _get(entry, "appearance_category")
	beh_cat = _get(entry, "behavior_category")
	return (
	f"## {ICONS['categories']} Categories\n"
	f"Appearance Category: {app_cat} \n"
	f"Behavior Category: {beh_cat}"
	)

	def md_presenting(entry: Dict[str, Any]) -> str:
	raw = entry.get("presenting_problems")
	items: List[str] = []
	if isinstance(raw, list):
	items = [str(x).strip() for x in raw if str(x).strip()]
	elif isinstance(raw, str) and raw.strip():
	try:
	parsed = json.loads(raw)
	if isinstance(parsed, list):
	items = [str(x).strip() for x in parsed if str(x).strip()]
	else:
	items = [x.strip() for x in raw.split(";") if x.strip()]
	except Exception:
	items = [x.strip() for x in raw.split(";") if x.strip()]
	bullets = "\n".join(f"- {x}" for x in items) if items else "—"
	return f"## {ICONS['presenting']} Presenting Problems\n{bullets}"

	def md_clinical(entry: Dict[str, Any]) -> str:
	blocks = []
	mapping = [
	("appearance", "Appearance"),
	("behavior", "Behavior"),
	("mood_affect", "Mood / Affect"),
	("speech", "Speech"),
	("thought_content", "Thought Content"),
	("insight_judgment", "Insight & Judgment"),
	("cognition", "Cognition"),
	]
	for k, label in mapping:
	v = entry.get(k)
	if isinstance(v, str) and v.strip():
	blocks.append(f"{label}\n{_truncate(v)}")
	return f"## {ICONS['clinical']} Clinical Observations\n" + ("\n\n".join(blocks) if blocks else "—")

	def md_history(entry: Dict[str, Any]) -> str:
	blocks = []
	mapping = [
	("medical_developmental_history", "Medical / Developmental History"),
	("family_history", "Family History"),
	("educational_vocational_history", "Educational / Vocational History"),
	]
	for k, label in mapping:
	v = entry.get(k)
	if isinstance(v, str) and v.strip():
	blocks.append(f"{label}\n{_truncate(v)}")
	return f"## {ICONS['history']} Life History\n" + ("\n\n".join(blocks) if blocks else "—")

	def md_functioning(entry: Dict[str, Any]) -> str:
	blocks = []
	mapping = [
	("emotional_behavioral_functioning", "Emotional / Behavioral Functioning"),
	("social_functioning", "Social Functioning"),
	]
	for k, label in mapping:
	v = entry.get(k)
	if isinstance(v, str) and v.strip():
	blocks.append(f"{label}\n{_truncate(v)}")
	return f"## {ICONS['functioning']} Functioning\n" + ("\n\n".join(blocks) if blocks else "—")

	def md_summary(entry: Dict[str, Any]) -> str:
	v = entry.get("summary_of_psychological_profile")
	body = _truncate(v) if isinstance(v, str) and v.strip() else "—"
	return f"## {ICONS['summary']} Summary\n{body}"

	def md_context(entry: Dict[str, Any]) -> str:
	arch_desc = entry.get("archetype_description") or entry.get("archetype_summary") or "—"
	memoir_title = entry.get("memoir")
	memoir_summary = entry.get("memoir_summary")
	memoir_narr = entry.get("memoir_narrative")

	title_line = f"Memoir: {memoir_title}\n\n" if isinstance(memoir_title, str) and memoir_title.strip() else ""
	sum_line = f"Memoir Summary\n{_truncate(memoir_summary)}\n\n" if isinstance(memoir_summary, str) and memoir_summary.strip() else ""
	narr_line = f"Memoir Narrative\n{_truncate(memoir_narr)}" if isinstance(memoir_narr, str) and memoir_narr.strip() else "—"

	return (
	f"## {ICONS['context']} Context\n"
	f"Archetype Description\n{_truncate(str(arch_desc)) if isinstance(arch_desc, str) else '—'}\n\n"
	f"{title_line}{sum_line}{narr_line}"
	)

	def md_metadata(entry: Dict[str, Any]) -> str:
	uid = _get(entry, "uid")
	return f"## {ICONS['metadata']} Metadata\nUID: {uid}"

	def md_other_fields(entry: Dict[str, Any]) -> str:
	# Show any extra keys (e.g., concat_field, concat_embedding) not covered elsewhere
	known = set().union(*SECTION_FIELDS.values())
	other_keys = [k for k in entry.keys() if k not in known]
	if not other_keys:
	return f"## {ICONS['other']} Other Fields\n—"
	pairs = []
	for k in sorted(other_keys):
	v = entry.get(k)
	if isinstance(v, (dict, list)):
	try:
	s = json.dumps(v, ensure_ascii=False)
	except Exception:
	s = str(v)
	else:
	s = str(v) if v is not None else ""
	pairs.append(f"- {k}: {_truncate(s)}")
	return f"## {ICONS['other']} Other Fields\n" + ("\n".join(pairs) if pairs else "—")

	def show_entry(step=None):
	"""Navigate entries and show persona entry"""
	global index, data
	if not data:
	return [""] * (11 + 11)

	if step == "Next":
	index = (index + 1) % len(data)
	elif step == "Previous":
	index = (index - 1) % len(data)
	elif step == "Random Shuffle":
	index = random.randint(0, len(data) - 1) % len(data)

	entry = data[index]
	p_uuid = entry.get("uuid", f"persona_{index}")

	if not entry:
	empty = "_No data_"
	# diagram HTML, then the sections
	return ["", empty, empty, empty, empty, empty, empty, empty, empty, empty]

	persona_out = [
	p_uuid,
	md_header(entry),
	md_categories(entry),
	md_presenting(entry),
	md_clinical(entry),
	md_history(entry),
	md_functioning(entry),
	md_summary(entry),
	md_context(entry),
	md_metadata(entry),
	md_other_fields(entry),
	]

	# Reset rubric dropdowns to None
	resets = [None] * 11
	return persona_out + resets

	# persona_str = entry.get("persona_string", "").replace("\n", "<br>")
	# archetype = entry.get("archetype", "N/A")
	# persona_md = f"### 👤 Persona Summary\nArchetype: {archetype}\n\n{persona_str}"

	# -----------------------------
	# Gradio UI
	# -----------------------------
	with gr.Blocks() as demo:
	gr.Markdown("## Persona Annotation Tool")

	# File selection dropdown
	file_dropdown = gr.Dropdown(
	choices=available_files,
	value=available_files[0],
	label="Select Persona JSON File"
	)

	with gr.Row():
	prev_btn = gr.Button("Previous")
	next_btn = gr.Button("Next")
	shuffle_btn = gr.Button("Random Shuffle")

	phash_out = gr.Textbox(label="Persona Hash ID", interactive=False)
	# persona_out = gr.Markdown(label="Persona Description")
	md_header_out = gr.Markdown()
	md_cats_out = gr.Markdown()
	md_present_out = gr.Markdown()
	md_clinical_out = gr.Markdown()
	md_history_out = gr.Markdown()
	md_function_out = gr.Markdown()
	md_summary_out = gr.Markdown()
	md_context_out = gr.Markdown()
	md_meta_out = gr.Markdown()
	md_other_out = gr.Markdown()



	gr.Markdown("### Evaluation Rubric (0 = Worst, 5 = Best)")

	choices = [str(i) for i in range(6)]

	clarity = gr.Dropdown(choices=choices, label="Clarity", value=None)
	originality = gr.Dropdown(choices=choices, label="Originality", value=None)
	coherence = gr.Dropdown(choices=choices, label="Coherence", value=None)
	diversity = gr.Dropdown(choices=choices, label="Diversity", value=None)
	realism = gr.Dropdown(choices=choices, label="Realism", value=None)
	psychological_depth = gr.Dropdown(choices=choices, label="Psychological Depth (focus metric)", value=None)
	consistency = gr.Dropdown(choices=choices, label="Consistency", value=None)
	informativeness = gr.Dropdown(choices=choices, label="Informativeness", value=None)
	ethical_considerations = gr.Dropdown(choices=choices, label="Ethical Considerations (0–5)", value=None)
	demographic_fidelity = gr.Dropdown(choices=choices, label="Demographic Fidelity", value=None)
	overall_score = gr.Dropdown(choices=choices, label="Overall Score", value=None)

	save_btn = gr.Button("Save Annotation")
	save_status = gr.Textbox(label="Status", interactive=False)

	all_outputs = [
	phash_out, md_header_out, md_cats_out, md_present_out, md_clinical_out,
	md_history_out, md_function_out, md_summary_out, md_context_out,
	md_meta_out, md_other_out,
	clarity, originality, coherence, diversity, realism,
	psychological_depth, consistency, informativeness,
	ethical_considerations, demographic_fidelity, overall_score
	]

	with gr.Row():
	export_btn = gr.Button("Download All Annotations")
	export_file = gr.File(label="Exported Annotations", type="filepath")

	# Wiring
	file_dropdown.change(load_file, inputs=file_dropdown, outputs=all_outputs)
	prev_btn.click(show_entry, inputs=gr.State("Previous"), outputs=all_outputs)
	next_btn.click(show_entry, inputs=gr.State("Next"), outputs=all_outputs)
	shuffle_btn.click(show_entry, inputs=gr.State("Random Shuffle"), outputs=all_outputs)

	save_btn.click(
	save_annotation,
	inputs=[phash_out, clarity, originality, coherence, diversity, realism,
	psychological_depth, consistency, informativeness,
	ethical_considerations, demographic_fidelity, overall_score],
	outputs=save_status
	)

	export_btn.click(export_annotations, inputs=None, outputs=export_file)

	demo.load(load_file, inputs=gr.State(available_files[0]), outputs=all_outputs)

	demo.launch()