shreyansjain's picture
Add application file
af33aa5
import gradio as gr
import json
import random
import os
from typing import List, Dict, Any, Optional
# -----------------------------
# Available JSON files (persona datasets)
# -----------------------------
available_files = [
"persona_annotator_sample.json"
]
data = []
index = 0
current_file = None
ICONS = {
"header": "πŸ‘€",
"categories": "🏷️",
"presenting": "🚩",
"clinical": "🩺",
"history": "πŸ“œ",
"functioning": "πŸ”§",
"summary": "🧾",
"context": "🧩",
"metadata": "πŸ”–",
"other": "πŸ—‚οΈ",
}
SECTION_FIELDS = {
"header": [
"name", "archetype", "age", "sex", "location",
"education_level", "bachelors_field", "ethnic_background", "marital_status",
"version"
],
"categories": ["appearance_category", "behavior_category"],
"presenting": ["presenting_problems"],
"clinical": ["appearance", "behavior", "mood_affect", "speech",
"thought_content", "insight_judgment", "cognition"],
"history": ["medical_developmental_history", "family_history", "educational_vocational_history"],
"functioning": ["emotional_behavioral_functioning", "social_functioning"],
"summary": ["summary_of_psychological_profile"],
"context": ["archetype_description", "memoir", "memoir_summary", "memoir_narrative"],
"metadata": ["uid"],
}
# -----------------------------
# Persistent storage path
# -----------------------------
PERSISTENT_DIR = "/home/user/app/storage"
if os.path.exists(PERSISTENT_DIR):
STORAGE_DIR = PERSISTENT_DIR
else:
STORAGE_DIR = "."
os.makedirs(STORAGE_DIR, exist_ok=True)
ANNOTATION_FILE = os.path.join(STORAGE_DIR, "persona_annotations.jsonl")
# -----------------------------
# Core functions
# -----------------------------
def _get(entry: Dict[str, Any], key: str, default: str = "β€”") -> str:
v = entry.get(key, default)
if v is None:
return default
if isinstance(v, (list, dict)):
try:
return json.dumps(v, ensure_ascii=False)
except Exception:
return str(v)
return str(v).strip()
def _truncate(s: str, limit: int = 2000) -> str:
s = s or ""
return (s[:limit] + " …") if len(s) > limit else s
def load_file(file_name):
"""Load selected JSON file and show first/random entry"""
global data, index, current_file
current_file = file_name
with open(file_name, "r", encoding="utf-8") as f:
data = json.load(f)
index = random.randint(0, len(data) - 1)
return show_entry()
def save_annotation(p_uuid, *scores_and_comments):
"""Save annotations to persistent storage as JSONL (with file name)"""
ann = {
"file_name": current_file,
"persona_uuid": p_uuid,
"annotations": {}
}
rubric_fields = [
"clarity", "originality", "coherence", "diversity", "realism",
"psychological_depth", "consistency", "informativeness",
"ethical_considerations", "demographic_fidelity", "overall_score"
]
for field, value in zip(rubric_fields, scores_and_comments):
ann["annotations"][field] = value
with open(ANNOTATION_FILE, "a", encoding="utf-8") as f:
f.write(json.dumps(ann, ensure_ascii=False) + "\n")
return f"βœ… Saved annotation for {p_uuid} (from {current_file}) β†’ {ANNOTATION_FILE}"
def export_annotations():
"""Return path to annotations file for download"""
if os.path.exists(ANNOTATION_FILE):
return ANNOTATION_FILE
else:
with open(ANNOTATION_FILE, "w", encoding="utf-8") as f:
pass
return ANNOTATION_FILE
def md_header(entry: Dict[str, Any]) -> str:
name = _get(entry, "name")
archetype = _get(entry, "archetype")
age = _get(entry, "age")
sex = _get(entry, "sex")
location = _get(entry, "location")
education_level = _get(entry, "education_level")
bachelors_field = _get(entry, "bachelors_field")
ethnic_background = _get(entry, "ethnic_background")
marital_status = _get(entry, "marital_status")
version = _get(entry, "version")
return (
f"## {ICONS['header']} Persona\n"
f"**Name:** {name} \n"
f"**Archetype:** {archetype} \n"
f"**Age:** {age} \n"
f"**Sex:** {sex} \n"
f"**Location:** {location} \n"
f"**Education Level:** {education_level} \n"
f"**Bachelor’s Field:** {bachelors_field} \n"
f"**Ethnic Background:** {ethnic_background} \n"
f"**Marital Status:** {marital_status} \n"
f"**Version:** {version}"
)
def md_categories(entry: Dict[str, Any]) -> str:
app_cat = _get(entry, "appearance_category")
beh_cat = _get(entry, "behavior_category")
return (
f"## {ICONS['categories']} Categories\n"
f"**Appearance Category:** {app_cat} \n"
f"**Behavior Category:** {beh_cat}"
)
def md_presenting(entry: Dict[str, Any]) -> str:
raw = entry.get("presenting_problems")
items: List[str] = []
if isinstance(raw, list):
items = [str(x).strip() for x in raw if str(x).strip()]
elif isinstance(raw, str) and raw.strip():
try:
parsed = json.loads(raw)
if isinstance(parsed, list):
items = [str(x).strip() for x in parsed if str(x).strip()]
else:
items = [x.strip() for x in raw.split(";") if x.strip()]
except Exception:
items = [x.strip() for x in raw.split(";") if x.strip()]
bullets = "\n".join(f"- {x}" for x in items) if items else "β€”"
return f"## {ICONS['presenting']} Presenting Problems\n{bullets}"
def md_clinical(entry: Dict[str, Any]) -> str:
blocks = []
mapping = [
("appearance", "Appearance"),
("behavior", "Behavior"),
("mood_affect", "Mood / Affect"),
("speech", "Speech"),
("thought_content", "Thought Content"),
("insight_judgment", "Insight & Judgment"),
("cognition", "Cognition"),
]
for k, label in mapping:
v = entry.get(k)
if isinstance(v, str) and v.strip():
blocks.append(f"**{label}**\n{_truncate(v)}")
return f"## {ICONS['clinical']} Clinical Observations\n" + ("\n\n".join(blocks) if blocks else "β€”")
def md_history(entry: Dict[str, Any]) -> str:
blocks = []
mapping = [
("medical_developmental_history", "Medical / Developmental History"),
("family_history", "Family History"),
("educational_vocational_history", "Educational / Vocational History"),
]
for k, label in mapping:
v = entry.get(k)
if isinstance(v, str) and v.strip():
blocks.append(f"**{label}**\n{_truncate(v)}")
return f"## {ICONS['history']} Life History\n" + ("\n\n".join(blocks) if blocks else "β€”")
def md_functioning(entry: Dict[str, Any]) -> str:
blocks = []
mapping = [
("emotional_behavioral_functioning", "Emotional / Behavioral Functioning"),
("social_functioning", "Social Functioning"),
]
for k, label in mapping:
v = entry.get(k)
if isinstance(v, str) and v.strip():
blocks.append(f"**{label}**\n{_truncate(v)}")
return f"## {ICONS['functioning']} Functioning\n" + ("\n\n".join(blocks) if blocks else "β€”")
def md_summary(entry: Dict[str, Any]) -> str:
v = entry.get("summary_of_psychological_profile")
body = _truncate(v) if isinstance(v, str) and v.strip() else "β€”"
return f"## {ICONS['summary']} Summary\n{body}"
def md_context(entry: Dict[str, Any]) -> str:
arch_desc = entry.get("archetype_description") or entry.get("archetype_summary") or "β€”"
memoir_title = entry.get("memoir")
memoir_summary = entry.get("memoir_summary")
memoir_narr = entry.get("memoir_narrative")
title_line = f"**Memoir:** {memoir_title}\n\n" if isinstance(memoir_title, str) and memoir_title.strip() else ""
sum_line = f"**Memoir Summary**\n{_truncate(memoir_summary)}\n\n" if isinstance(memoir_summary, str) and memoir_summary.strip() else ""
narr_line = f"**Memoir Narrative**\n{_truncate(memoir_narr)}" if isinstance(memoir_narr, str) and memoir_narr.strip() else "β€”"
return (
f"## {ICONS['context']} Context\n"
f"**Archetype Description**\n{_truncate(str(arch_desc)) if isinstance(arch_desc, str) else 'β€”'}\n\n"
f"{title_line}{sum_line}{narr_line}"
)
def md_metadata(entry: Dict[str, Any]) -> str:
uid = _get(entry, "uid")
return f"## {ICONS['metadata']} Metadata\n**UID:** {uid}"
def md_other_fields(entry: Dict[str, Any]) -> str:
# Show any extra keys (e.g., concat_field, concat_embedding) not covered elsewhere
known = set().union(*SECTION_FIELDS.values())
other_keys = [k for k in entry.keys() if k not in known]
if not other_keys:
return f"## {ICONS['other']} Other Fields\nβ€”"
pairs = []
for k in sorted(other_keys):
v = entry.get(k)
if isinstance(v, (dict, list)):
try:
s = json.dumps(v, ensure_ascii=False)
except Exception:
s = str(v)
else:
s = str(v) if v is not None else ""
pairs.append(f"- **{k}:** {_truncate(s)}")
return f"## {ICONS['other']} Other Fields\n" + ("\n".join(pairs) if pairs else "β€”")
def show_entry(step=None):
"""Navigate entries and show persona entry"""
global index, data
if not data:
return [""] * (11 + 11)
if step == "Next":
index = (index + 1) % len(data)
elif step == "Previous":
index = (index - 1) % len(data)
elif step == "Random Shuffle":
index = random.randint(0, len(data) - 1) % len(data)
entry = data[index]
p_uuid = entry.get("uuid", f"persona_{index}")
if not entry:
empty = "_No data_"
# diagram HTML, then the sections
return ["", empty, empty, empty, empty, empty, empty, empty, empty, empty]
persona_out = [
p_uuid,
md_header(entry),
md_categories(entry),
md_presenting(entry),
md_clinical(entry),
md_history(entry),
md_functioning(entry),
md_summary(entry),
md_context(entry),
md_metadata(entry),
md_other_fields(entry),
]
# Reset rubric dropdowns to None
resets = [None] * 11
return persona_out + resets
# persona_str = entry.get("persona_string", "").replace("\n", "<br>")
# archetype = entry.get("archetype", "N/A")
# persona_md = f"### πŸ‘€ Persona Summary\n**Archetype:** {archetype}\n\n{persona_str}"
# -----------------------------
# Gradio UI
# -----------------------------
with gr.Blocks() as demo:
gr.Markdown("## Persona Annotation Tool")
# File selection dropdown
file_dropdown = gr.Dropdown(
choices=available_files,
value=available_files[0],
label="Select Persona JSON File"
)
with gr.Row():
prev_btn = gr.Button("Previous")
next_btn = gr.Button("Next")
shuffle_btn = gr.Button("Random Shuffle")
phash_out = gr.Textbox(label="Persona Hash ID", interactive=False)
# persona_out = gr.Markdown(label="Persona Description")
md_header_out = gr.Markdown()
md_cats_out = gr.Markdown()
md_present_out = gr.Markdown()
md_clinical_out = gr.Markdown()
md_history_out = gr.Markdown()
md_function_out = gr.Markdown()
md_summary_out = gr.Markdown()
md_context_out = gr.Markdown()
md_meta_out = gr.Markdown()
md_other_out = gr.Markdown()
gr.Markdown("### Evaluation Rubric (0 = Worst, 5 = Best)")
choices = [str(i) for i in range(6)]
clarity = gr.Dropdown(choices=choices, label="Clarity", value=None)
originality = gr.Dropdown(choices=choices, label="Originality", value=None)
coherence = gr.Dropdown(choices=choices, label="Coherence", value=None)
diversity = gr.Dropdown(choices=choices, label="Diversity", value=None)
realism = gr.Dropdown(choices=choices, label="Realism", value=None)
psychological_depth = gr.Dropdown(choices=choices, label="Psychological Depth (focus metric)", value=None)
consistency = gr.Dropdown(choices=choices, label="Consistency", value=None)
informativeness = gr.Dropdown(choices=choices, label="Informativeness", value=None)
ethical_considerations = gr.Dropdown(choices=choices, label="Ethical Considerations (0–5)", value=None)
demographic_fidelity = gr.Dropdown(choices=choices, label="Demographic Fidelity", value=None)
overall_score = gr.Dropdown(choices=choices, label="Overall Score", value=None)
save_btn = gr.Button("Save Annotation")
save_status = gr.Textbox(label="Status", interactive=False)
all_outputs = [
phash_out, md_header_out, md_cats_out, md_present_out, md_clinical_out,
md_history_out, md_function_out, md_summary_out, md_context_out,
md_meta_out, md_other_out,
clarity, originality, coherence, diversity, realism,
psychological_depth, consistency, informativeness,
ethical_considerations, demographic_fidelity, overall_score
]
with gr.Row():
export_btn = gr.Button("Download All Annotations")
export_file = gr.File(label="Exported Annotations", type="filepath")
# Wiring
file_dropdown.change(load_file, inputs=file_dropdown, outputs=all_outputs)
prev_btn.click(show_entry, inputs=gr.State("Previous"), outputs=all_outputs)
next_btn.click(show_entry, inputs=gr.State("Next"), outputs=all_outputs)
shuffle_btn.click(show_entry, inputs=gr.State("Random Shuffle"), outputs=all_outputs)
save_btn.click(
save_annotation,
inputs=[phash_out, clarity, originality, coherence, diversity, realism,
psychological_depth, consistency, informativeness,
ethical_considerations, demographic_fidelity, overall_score],
outputs=save_status
)
export_btn.click(export_annotations, inputs=None, outputs=export_file)
demo.load(load_file, inputs=gr.State(available_files[0]), outputs=all_outputs)
demo.launch()