Spaces:

TailsResearch
/

PersonaAnnotator

Sleeping

File size: 14,200 Bytes

import gradio as gr
import json
import random
import os
from typing import List, Dict, Any, Optional

# -----------------------------
# Available JSON files (persona datasets)
# -----------------------------
available_files = [
    "persona_annotator_sample.json"
]

data = []
index = 0
current_file = None

ICONS = {
    "header": "👤",
    "categories": "🏷️",
    "presenting": "🚩",
    "clinical": "🩺",
    "history": "📜",
    "functioning": "🔧",
    "summary": "🧾",
    "context": "🧩",
    "metadata": "🔖",
    "other": "🗂️",
}

SECTION_FIELDS = {
    "header": [
        "name", "archetype", "age", "sex", "location",
        "education_level", "bachelors_field", "ethnic_background", "marital_status",
        "version"
    ],
    "categories": ["appearance_category", "behavior_category"],
    "presenting": ["presenting_problems"],
    "clinical": ["appearance", "behavior", "mood_affect", "speech",
                 "thought_content", "insight_judgment", "cognition"],
    "history": ["medical_developmental_history", "family_history", "educational_vocational_history"],
    "functioning": ["emotional_behavioral_functioning", "social_functioning"],
    "summary": ["summary_of_psychological_profile"],
    "context": ["archetype_description", "memoir", "memoir_summary", "memoir_narrative"],
    "metadata": ["uid"],
}

# -----------------------------
# Persistent storage path
# -----------------------------
PERSISTENT_DIR = "/home/user/app/storage"
if os.path.exists(PERSISTENT_DIR):
    STORAGE_DIR = PERSISTENT_DIR
else:
    STORAGE_DIR = "."
os.makedirs(STORAGE_DIR, exist_ok=True)
ANNOTATION_FILE = os.path.join(STORAGE_DIR, "persona_annotations.jsonl")

# -----------------------------
# Core functions
# -----------------------------

def _get(entry: Dict[str, Any], key: str, default: str = "—") -> str:
    v = entry.get(key, default)
    if v is None:
        return default
    if isinstance(v, (list, dict)):
        try:
            return json.dumps(v, ensure_ascii=False)
        except Exception:
            return str(v)
    return str(v).strip()

def _truncate(s: str, limit: int = 2000) -> str:
    s = s or ""
    return (s[:limit] + " …") if len(s) > limit else s
    
    
def load_file(file_name):
    """Load selected JSON file and show first/random entry"""
    global data, index, current_file
    current_file = file_name
    with open(file_name, "r", encoding="utf-8") as f:
        data = json.load(f)
    index = random.randint(0, len(data) - 1)
    return show_entry()


def save_annotation(p_uuid, *scores_and_comments):
    """Save annotations to persistent storage as JSONL (with file name)"""
    ann = {
        "file_name": current_file,
        "persona_uuid": p_uuid,
        "annotations": {}
    }

    rubric_fields = [
        "clarity", "originality", "coherence", "diversity", "realism",
        "psychological_depth", "consistency", "informativeness",
        "ethical_considerations", "demographic_fidelity", "overall_score"
    ]

    for field, value in zip(rubric_fields, scores_and_comments):
        ann["annotations"][field] = value

    with open(ANNOTATION_FILE, "a", encoding="utf-8") as f:
        f.write(json.dumps(ann, ensure_ascii=False) + "\n")

    return f"✅ Saved annotation for {p_uuid} (from {current_file}) → {ANNOTATION_FILE}"

def export_annotations():
    """Return path to annotations file for download"""
    if os.path.exists(ANNOTATION_FILE):
        return ANNOTATION_FILE
    else:
        with open(ANNOTATION_FILE, "w", encoding="utf-8") as f:
            pass
        return ANNOTATION_FILE
    
    
def md_header(entry: Dict[str, Any]) -> str:
    name = _get(entry, "name")
    archetype = _get(entry, "archetype")
    age = _get(entry, "age")
    sex = _get(entry, "sex")
    location = _get(entry, "location")
    education_level = _get(entry, "education_level")
    bachelors_field = _get(entry, "bachelors_field")
    ethnic_background = _get(entry, "ethnic_background")
    marital_status = _get(entry, "marital_status")
    version = _get(entry, "version")
    return (
        f"## {ICONS['header']} Persona\n"
        f"**Name:** {name}  \n"
        f"**Archetype:** {archetype}  \n"
        f"**Age:** {age}  \n"
        f"**Sex:** {sex}  \n"
        f"**Location:** {location}  \n"
        f"**Education Level:** {education_level}  \n"
        f"**Bachelor’s Field:** {bachelors_field}  \n"
        f"**Ethnic Background:** {ethnic_background}  \n"
        f"**Marital Status:** {marital_status}  \n"
        f"**Version:** {version}"
    )

def md_categories(entry: Dict[str, Any]) -> str:
    app_cat = _get(entry, "appearance_category")
    beh_cat = _get(entry, "behavior_category")
    return (
        f"## {ICONS['categories']} Categories\n"
        f"**Appearance Category:** {app_cat}  \n"
        f"**Behavior Category:** {beh_cat}"
    )

def md_presenting(entry: Dict[str, Any]) -> str:
    raw = entry.get("presenting_problems")
    items: List[str] = []
    if isinstance(raw, list):
        items = [str(x).strip() for x in raw if str(x).strip()]
    elif isinstance(raw, str) and raw.strip():
        try:
            parsed = json.loads(raw)
            if isinstance(parsed, list):
                items = [str(x).strip() for x in parsed if str(x).strip()]
            else:
                items = [x.strip() for x in raw.split(";") if x.strip()]
        except Exception:
            items = [x.strip() for x in raw.split(";") if x.strip()]
    bullets = "\n".join(f"- {x}" for x in items) if items else "—"
    return f"## {ICONS['presenting']} Presenting Problems\n{bullets}"

def md_clinical(entry: Dict[str, Any]) -> str:
    blocks = []
    mapping = [
        ("appearance", "Appearance"),
        ("behavior", "Behavior"),
        ("mood_affect", "Mood / Affect"),
        ("speech", "Speech"),
        ("thought_content", "Thought Content"),
        ("insight_judgment", "Insight & Judgment"),
        ("cognition", "Cognition"),
    ]
    for k, label in mapping:
        v = entry.get(k)
        if isinstance(v, str) and v.strip():
            blocks.append(f"**{label}**\n{_truncate(v)}")
    return f"## {ICONS['clinical']} Clinical Observations\n" + ("\n\n".join(blocks) if blocks else "—")

def md_history(entry: Dict[str, Any]) -> str:
    blocks = []
    mapping = [
        ("medical_developmental_history", "Medical / Developmental History"),
        ("family_history", "Family History"),
        ("educational_vocational_history", "Educational / Vocational History"),
    ]
    for k, label in mapping:
        v = entry.get(k)
        if isinstance(v, str) and v.strip():
            blocks.append(f"**{label}**\n{_truncate(v)}")
    return f"## {ICONS['history']} Life History\n" + ("\n\n".join(blocks) if blocks else "—")

def md_functioning(entry: Dict[str, Any]) -> str:
    blocks = []
    mapping = [
        ("emotional_behavioral_functioning", "Emotional / Behavioral Functioning"),
        ("social_functioning", "Social Functioning"),
    ]
    for k, label in mapping:
        v = entry.get(k)
        if isinstance(v, str) and v.strip():
            blocks.append(f"**{label}**\n{_truncate(v)}")
    return f"## {ICONS['functioning']} Functioning\n" + ("\n\n".join(blocks) if blocks else "—")

def md_summary(entry: Dict[str, Any]) -> str:
    v = entry.get("summary_of_psychological_profile")
    body = _truncate(v) if isinstance(v, str) and v.strip() else "—"
    return f"## {ICONS['summary']} Summary\n{body}"

def md_context(entry: Dict[str, Any]) -> str:
    arch_desc = entry.get("archetype_description") or entry.get("archetype_summary") or "—"
    memoir_title = entry.get("memoir")
    memoir_summary = entry.get("memoir_summary")
    memoir_narr = entry.get("memoir_narrative")

    title_line = f"**Memoir:** {memoir_title}\n\n" if isinstance(memoir_title, str) and memoir_title.strip() else ""
    sum_line = f"**Memoir Summary**\n{_truncate(memoir_summary)}\n\n" if isinstance(memoir_summary, str) and memoir_summary.strip() else ""
    narr_line = f"**Memoir Narrative**\n{_truncate(memoir_narr)}" if isinstance(memoir_narr, str) and memoir_narr.strip() else "—"

    return (
        f"## {ICONS['context']} Context\n"
        f"**Archetype Description**\n{_truncate(str(arch_desc)) if isinstance(arch_desc, str) else '—'}\n\n"
        f"{title_line}{sum_line}{narr_line}"
    )

def md_metadata(entry: Dict[str, Any]) -> str:
    uid = _get(entry, "uid")
    return f"## {ICONS['metadata']} Metadata\n**UID:** {uid}"

def md_other_fields(entry: Dict[str, Any]) -> str:
    # Show any extra keys (e.g., concat_field, concat_embedding) not covered elsewhere
    known = set().union(*SECTION_FIELDS.values())
    other_keys = [k for k in entry.keys() if k not in known]
    if not other_keys:
        return f"## {ICONS['other']} Other Fields\n—"
    pairs = []
    for k in sorted(other_keys):
        v = entry.get(k)
        if isinstance(v, (dict, list)):
            try:
                s = json.dumps(v, ensure_ascii=False)
            except Exception:
                s = str(v)
        else:
            s = str(v) if v is not None else ""
        pairs.append(f"- **{k}:** {_truncate(s)}")
    return f"## {ICONS['other']} Other Fields\n" + ("\n".join(pairs) if pairs else "—")

def show_entry(step=None):
    """Navigate entries and show persona entry"""
    global index, data
    if not data:
        return [""] * (11 + 11)

    if step == "Next":
        index = (index + 1) % len(data)
    elif step == "Previous":
        index = (index - 1) % len(data)
    elif step == "Random Shuffle":
        index = random.randint(0, len(data) - 1) % len(data)

    entry = data[index]
    p_uuid = entry.get("uuid", f"persona_{index}")
    
    if not entry:
        empty = "_No data_"
        # diagram HTML, then the sections
        return ["", empty, empty, empty, empty, empty, empty, empty, empty, empty]
    
    persona_out = [
        p_uuid,
        md_header(entry),
        md_categories(entry),
        md_presenting(entry),
        md_clinical(entry),
        md_history(entry),
        md_functioning(entry),
        md_summary(entry),
        md_context(entry),
        md_metadata(entry),
        md_other_fields(entry),
    ]

    # Reset rubric dropdowns to None
    resets = [None] * 11
    return persona_out + resets
    
    # persona_str = entry.get("persona_string", "").replace("\n", "<br>")
    # archetype = entry.get("archetype", "N/A")
    # persona_md = f"### 👤 Persona Summary\n**Archetype:** {archetype}\n\n{persona_str}"

# -----------------------------
# Gradio UI
# -----------------------------
with gr.Blocks() as demo:
    gr.Markdown("## Persona Annotation Tool")

    # File selection dropdown
    file_dropdown = gr.Dropdown(
        choices=available_files,
        value=available_files[0],
        label="Select Persona JSON File"
    )

    with gr.Row():
        prev_btn = gr.Button("Previous")
        next_btn = gr.Button("Next")
        shuffle_btn = gr.Button("Random Shuffle")

    phash_out = gr.Textbox(label="Persona Hash ID", interactive=False)
    # persona_out = gr.Markdown(label="Persona Description")
    md_header_out = gr.Markdown()
    md_cats_out = gr.Markdown()
    md_present_out = gr.Markdown()
    md_clinical_out = gr.Markdown()
    md_history_out = gr.Markdown()
    md_function_out = gr.Markdown()
    md_summary_out = gr.Markdown()
    md_context_out = gr.Markdown()
    md_meta_out = gr.Markdown()
    md_other_out = gr.Markdown()
    
    

    gr.Markdown("### Evaluation Rubric (0 = Worst, 5 = Best)")
    
    choices = [str(i) for i in range(6)]

    clarity = gr.Dropdown(choices=choices, label="Clarity", value=None)
    originality = gr.Dropdown(choices=choices, label="Originality", value=None)
    coherence = gr.Dropdown(choices=choices, label="Coherence", value=None)
    diversity = gr.Dropdown(choices=choices, label="Diversity", value=None)
    realism = gr.Dropdown(choices=choices, label="Realism", value=None)
    psychological_depth = gr.Dropdown(choices=choices, label="Psychological Depth (focus metric)", value=None)
    consistency = gr.Dropdown(choices=choices, label="Consistency", value=None)
    informativeness = gr.Dropdown(choices=choices, label="Informativeness", value=None)
    ethical_considerations = gr.Dropdown(choices=choices, label="Ethical Considerations (0–5)", value=None)
    demographic_fidelity = gr.Dropdown(choices=choices, label="Demographic Fidelity", value=None)
    overall_score = gr.Dropdown(choices=choices, label="Overall Score", value=None)

    save_btn = gr.Button("Save Annotation")
    save_status = gr.Textbox(label="Status", interactive=False)
    
    all_outputs = [
                    phash_out, md_header_out, md_cats_out, md_present_out, md_clinical_out,
                    md_history_out, md_function_out, md_summary_out, md_context_out,
                    md_meta_out, md_other_out,
                    clarity, originality, coherence, diversity, realism,
                    psychological_depth, consistency, informativeness,
                    ethical_considerations, demographic_fidelity, overall_score
                ]

    with gr.Row():
        export_btn = gr.Button("Download All Annotations")
        export_file = gr.File(label="Exported Annotations", type="filepath")

    # Wiring
    file_dropdown.change(load_file, inputs=file_dropdown, outputs=all_outputs)
    prev_btn.click(show_entry, inputs=gr.State("Previous"), outputs=all_outputs)
    next_btn.click(show_entry, inputs=gr.State("Next"), outputs=all_outputs)
    shuffle_btn.click(show_entry, inputs=gr.State("Random Shuffle"), outputs=all_outputs)

    save_btn.click(
        save_annotation,
        inputs=[phash_out, clarity, originality, coherence, diversity, realism,
                psychological_depth, consistency, informativeness,
                ethical_considerations, demographic_fidelity, overall_score],
        outputs=save_status
    )

    export_btn.click(export_annotations, inputs=None, outputs=export_file)

    demo.load(load_file, inputs=gr.State(available_files[0]), outputs=all_outputs)

demo.launch()