Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import json | |
| import random | |
| import os | |
| from typing import List, Dict, Any, Optional | |
| # ----------------------------- | |
| # Available JSON files (persona datasets) | |
| # ----------------------------- | |
| available_files = [ | |
| "persona_annotator_sample.json" | |
| ] | |
| data = [] | |
| index = 0 | |
| current_file = None | |
| ICONS = { | |
| "header": "π€", | |
| "categories": "π·οΈ", | |
| "presenting": "π©", | |
| "clinical": "π©Ί", | |
| "history": "π", | |
| "functioning": "π§", | |
| "summary": "π§Ύ", | |
| "context": "π§©", | |
| "metadata": "π", | |
| "other": "ποΈ", | |
| } | |
| SECTION_FIELDS = { | |
| "header": [ | |
| "name", "archetype", "age", "sex", "location", | |
| "education_level", "bachelors_field", "ethnic_background", "marital_status", | |
| "version" | |
| ], | |
| "categories": ["appearance_category", "behavior_category"], | |
| "presenting": ["presenting_problems"], | |
| "clinical": ["appearance", "behavior", "mood_affect", "speech", | |
| "thought_content", "insight_judgment", "cognition"], | |
| "history": ["medical_developmental_history", "family_history", "educational_vocational_history"], | |
| "functioning": ["emotional_behavioral_functioning", "social_functioning"], | |
| "summary": ["summary_of_psychological_profile"], | |
| "context": ["archetype_description", "memoir", "memoir_summary", "memoir_narrative"], | |
| "metadata": ["uid"], | |
| } | |
| # ----------------------------- | |
| # Persistent storage path | |
| # ----------------------------- | |
| PERSISTENT_DIR = "/home/user/app/storage" | |
| if os.path.exists(PERSISTENT_DIR): | |
| STORAGE_DIR = PERSISTENT_DIR | |
| else: | |
| STORAGE_DIR = "." | |
| os.makedirs(STORAGE_DIR, exist_ok=True) | |
| ANNOTATION_FILE = os.path.join(STORAGE_DIR, "persona_annotations.jsonl") | |
| # ----------------------------- | |
| # Core functions | |
| # ----------------------------- | |
| def _get(entry: Dict[str, Any], key: str, default: str = "β") -> str: | |
| v = entry.get(key, default) | |
| if v is None: | |
| return default | |
| if isinstance(v, (list, dict)): | |
| try: | |
| return json.dumps(v, ensure_ascii=False) | |
| except Exception: | |
| return str(v) | |
| return str(v).strip() | |
| def _truncate(s: str, limit: int = 2000) -> str: | |
| s = s or "" | |
| return (s[:limit] + " β¦") if len(s) > limit else s | |
| def load_file(file_name): | |
| """Load selected JSON file and show first/random entry""" | |
| global data, index, current_file | |
| current_file = file_name | |
| with open(file_name, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| index = random.randint(0, len(data) - 1) | |
| return show_entry() | |
| def save_annotation(p_uuid, *scores_and_comments): | |
| """Save annotations to persistent storage as JSONL (with file name)""" | |
| ann = { | |
| "file_name": current_file, | |
| "persona_uuid": p_uuid, | |
| "annotations": {} | |
| } | |
| rubric_fields = [ | |
| "clarity", "originality", "coherence", "diversity", "realism", | |
| "psychological_depth", "consistency", "informativeness", | |
| "ethical_considerations", "demographic_fidelity", "overall_score" | |
| ] | |
| for field, value in zip(rubric_fields, scores_and_comments): | |
| ann["annotations"][field] = value | |
| with open(ANNOTATION_FILE, "a", encoding="utf-8") as f: | |
| f.write(json.dumps(ann, ensure_ascii=False) + "\n") | |
| return f"β Saved annotation for {p_uuid} (from {current_file}) β {ANNOTATION_FILE}" | |
| def export_annotations(): | |
| """Return path to annotations file for download""" | |
| if os.path.exists(ANNOTATION_FILE): | |
| return ANNOTATION_FILE | |
| else: | |
| with open(ANNOTATION_FILE, "w", encoding="utf-8") as f: | |
| pass | |
| return ANNOTATION_FILE | |
| def md_header(entry: Dict[str, Any]) -> str: | |
| name = _get(entry, "name") | |
| archetype = _get(entry, "archetype") | |
| age = _get(entry, "age") | |
| sex = _get(entry, "sex") | |
| location = _get(entry, "location") | |
| education_level = _get(entry, "education_level") | |
| bachelors_field = _get(entry, "bachelors_field") | |
| ethnic_background = _get(entry, "ethnic_background") | |
| marital_status = _get(entry, "marital_status") | |
| version = _get(entry, "version") | |
| return ( | |
| f"## {ICONS['header']} Persona\n" | |
| f"**Name:** {name} \n" | |
| f"**Archetype:** {archetype} \n" | |
| f"**Age:** {age} \n" | |
| f"**Sex:** {sex} \n" | |
| f"**Location:** {location} \n" | |
| f"**Education Level:** {education_level} \n" | |
| f"**Bachelorβs Field:** {bachelors_field} \n" | |
| f"**Ethnic Background:** {ethnic_background} \n" | |
| f"**Marital Status:** {marital_status} \n" | |
| f"**Version:** {version}" | |
| ) | |
| def md_categories(entry: Dict[str, Any]) -> str: | |
| app_cat = _get(entry, "appearance_category") | |
| beh_cat = _get(entry, "behavior_category") | |
| return ( | |
| f"## {ICONS['categories']} Categories\n" | |
| f"**Appearance Category:** {app_cat} \n" | |
| f"**Behavior Category:** {beh_cat}" | |
| ) | |
| def md_presenting(entry: Dict[str, Any]) -> str: | |
| raw = entry.get("presenting_problems") | |
| items: List[str] = [] | |
| if isinstance(raw, list): | |
| items = [str(x).strip() for x in raw if str(x).strip()] | |
| elif isinstance(raw, str) and raw.strip(): | |
| try: | |
| parsed = json.loads(raw) | |
| if isinstance(parsed, list): | |
| items = [str(x).strip() for x in parsed if str(x).strip()] | |
| else: | |
| items = [x.strip() for x in raw.split(";") if x.strip()] | |
| except Exception: | |
| items = [x.strip() for x in raw.split(";") if x.strip()] | |
| bullets = "\n".join(f"- {x}" for x in items) if items else "β" | |
| return f"## {ICONS['presenting']} Presenting Problems\n{bullets}" | |
| def md_clinical(entry: Dict[str, Any]) -> str: | |
| blocks = [] | |
| mapping = [ | |
| ("appearance", "Appearance"), | |
| ("behavior", "Behavior"), | |
| ("mood_affect", "Mood / Affect"), | |
| ("speech", "Speech"), | |
| ("thought_content", "Thought Content"), | |
| ("insight_judgment", "Insight & Judgment"), | |
| ("cognition", "Cognition"), | |
| ] | |
| for k, label in mapping: | |
| v = entry.get(k) | |
| if isinstance(v, str) and v.strip(): | |
| blocks.append(f"**{label}**\n{_truncate(v)}") | |
| return f"## {ICONS['clinical']} Clinical Observations\n" + ("\n\n".join(blocks) if blocks else "β") | |
| def md_history(entry: Dict[str, Any]) -> str: | |
| blocks = [] | |
| mapping = [ | |
| ("medical_developmental_history", "Medical / Developmental History"), | |
| ("family_history", "Family History"), | |
| ("educational_vocational_history", "Educational / Vocational History"), | |
| ] | |
| for k, label in mapping: | |
| v = entry.get(k) | |
| if isinstance(v, str) and v.strip(): | |
| blocks.append(f"**{label}**\n{_truncate(v)}") | |
| return f"## {ICONS['history']} Life History\n" + ("\n\n".join(blocks) if blocks else "β") | |
| def md_functioning(entry: Dict[str, Any]) -> str: | |
| blocks = [] | |
| mapping = [ | |
| ("emotional_behavioral_functioning", "Emotional / Behavioral Functioning"), | |
| ("social_functioning", "Social Functioning"), | |
| ] | |
| for k, label in mapping: | |
| v = entry.get(k) | |
| if isinstance(v, str) and v.strip(): | |
| blocks.append(f"**{label}**\n{_truncate(v)}") | |
| return f"## {ICONS['functioning']} Functioning\n" + ("\n\n".join(blocks) if blocks else "β") | |
| def md_summary(entry: Dict[str, Any]) -> str: | |
| v = entry.get("summary_of_psychological_profile") | |
| body = _truncate(v) if isinstance(v, str) and v.strip() else "β" | |
| return f"## {ICONS['summary']} Summary\n{body}" | |
| def md_context(entry: Dict[str, Any]) -> str: | |
| arch_desc = entry.get("archetype_description") or entry.get("archetype_summary") or "β" | |
| memoir_title = entry.get("memoir") | |
| memoir_summary = entry.get("memoir_summary") | |
| memoir_narr = entry.get("memoir_narrative") | |
| title_line = f"**Memoir:** {memoir_title}\n\n" if isinstance(memoir_title, str) and memoir_title.strip() else "" | |
| sum_line = f"**Memoir Summary**\n{_truncate(memoir_summary)}\n\n" if isinstance(memoir_summary, str) and memoir_summary.strip() else "" | |
| narr_line = f"**Memoir Narrative**\n{_truncate(memoir_narr)}" if isinstance(memoir_narr, str) and memoir_narr.strip() else "β" | |
| return ( | |
| f"## {ICONS['context']} Context\n" | |
| f"**Archetype Description**\n{_truncate(str(arch_desc)) if isinstance(arch_desc, str) else 'β'}\n\n" | |
| f"{title_line}{sum_line}{narr_line}" | |
| ) | |
| def md_metadata(entry: Dict[str, Any]) -> str: | |
| uid = _get(entry, "uid") | |
| return f"## {ICONS['metadata']} Metadata\n**UID:** {uid}" | |
| def md_other_fields(entry: Dict[str, Any]) -> str: | |
| # Show any extra keys (e.g., concat_field, concat_embedding) not covered elsewhere | |
| known = set().union(*SECTION_FIELDS.values()) | |
| other_keys = [k for k in entry.keys() if k not in known] | |
| if not other_keys: | |
| return f"## {ICONS['other']} Other Fields\nβ" | |
| pairs = [] | |
| for k in sorted(other_keys): | |
| v = entry.get(k) | |
| if isinstance(v, (dict, list)): | |
| try: | |
| s = json.dumps(v, ensure_ascii=False) | |
| except Exception: | |
| s = str(v) | |
| else: | |
| s = str(v) if v is not None else "" | |
| pairs.append(f"- **{k}:** {_truncate(s)}") | |
| return f"## {ICONS['other']} Other Fields\n" + ("\n".join(pairs) if pairs else "β") | |
| def show_entry(step=None): | |
| """Navigate entries and show persona entry""" | |
| global index, data | |
| if not data: | |
| return [""] * (11 + 11) | |
| if step == "Next": | |
| index = (index + 1) % len(data) | |
| elif step == "Previous": | |
| index = (index - 1) % len(data) | |
| elif step == "Random Shuffle": | |
| index = random.randint(0, len(data) - 1) % len(data) | |
| entry = data[index] | |
| p_uuid = entry.get("uuid", f"persona_{index}") | |
| if not entry: | |
| empty = "_No data_" | |
| # diagram HTML, then the sections | |
| return ["", empty, empty, empty, empty, empty, empty, empty, empty, empty] | |
| persona_out = [ | |
| p_uuid, | |
| md_header(entry), | |
| md_categories(entry), | |
| md_presenting(entry), | |
| md_clinical(entry), | |
| md_history(entry), | |
| md_functioning(entry), | |
| md_summary(entry), | |
| md_context(entry), | |
| md_metadata(entry), | |
| md_other_fields(entry), | |
| ] | |
| # Reset rubric dropdowns to None | |
| resets = [None] * 11 | |
| return persona_out + resets | |
| # persona_str = entry.get("persona_string", "").replace("\n", "<br>") | |
| # archetype = entry.get("archetype", "N/A") | |
| # persona_md = f"### π€ Persona Summary\n**Archetype:** {archetype}\n\n{persona_str}" | |
| # ----------------------------- | |
| # Gradio UI | |
| # ----------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Persona Annotation Tool") | |
| # File selection dropdown | |
| file_dropdown = gr.Dropdown( | |
| choices=available_files, | |
| value=available_files[0], | |
| label="Select Persona JSON File" | |
| ) | |
| with gr.Row(): | |
| prev_btn = gr.Button("Previous") | |
| next_btn = gr.Button("Next") | |
| shuffle_btn = gr.Button("Random Shuffle") | |
| phash_out = gr.Textbox(label="Persona Hash ID", interactive=False) | |
| # persona_out = gr.Markdown(label="Persona Description") | |
| md_header_out = gr.Markdown() | |
| md_cats_out = gr.Markdown() | |
| md_present_out = gr.Markdown() | |
| md_clinical_out = gr.Markdown() | |
| md_history_out = gr.Markdown() | |
| md_function_out = gr.Markdown() | |
| md_summary_out = gr.Markdown() | |
| md_context_out = gr.Markdown() | |
| md_meta_out = gr.Markdown() | |
| md_other_out = gr.Markdown() | |
| gr.Markdown("### Evaluation Rubric (0 = Worst, 5 = Best)") | |
| choices = [str(i) for i in range(6)] | |
| clarity = gr.Dropdown(choices=choices, label="Clarity", value=None) | |
| originality = gr.Dropdown(choices=choices, label="Originality", value=None) | |
| coherence = gr.Dropdown(choices=choices, label="Coherence", value=None) | |
| diversity = gr.Dropdown(choices=choices, label="Diversity", value=None) | |
| realism = gr.Dropdown(choices=choices, label="Realism", value=None) | |
| psychological_depth = gr.Dropdown(choices=choices, label="Psychological Depth (focus metric)", value=None) | |
| consistency = gr.Dropdown(choices=choices, label="Consistency", value=None) | |
| informativeness = gr.Dropdown(choices=choices, label="Informativeness", value=None) | |
| ethical_considerations = gr.Dropdown(choices=choices, label="Ethical Considerations (0β5)", value=None) | |
| demographic_fidelity = gr.Dropdown(choices=choices, label="Demographic Fidelity", value=None) | |
| overall_score = gr.Dropdown(choices=choices, label="Overall Score", value=None) | |
| save_btn = gr.Button("Save Annotation") | |
| save_status = gr.Textbox(label="Status", interactive=False) | |
| all_outputs = [ | |
| phash_out, md_header_out, md_cats_out, md_present_out, md_clinical_out, | |
| md_history_out, md_function_out, md_summary_out, md_context_out, | |
| md_meta_out, md_other_out, | |
| clarity, originality, coherence, diversity, realism, | |
| psychological_depth, consistency, informativeness, | |
| ethical_considerations, demographic_fidelity, overall_score | |
| ] | |
| with gr.Row(): | |
| export_btn = gr.Button("Download All Annotations") | |
| export_file = gr.File(label="Exported Annotations", type="filepath") | |
| # Wiring | |
| file_dropdown.change(load_file, inputs=file_dropdown, outputs=all_outputs) | |
| prev_btn.click(show_entry, inputs=gr.State("Previous"), outputs=all_outputs) | |
| next_btn.click(show_entry, inputs=gr.State("Next"), outputs=all_outputs) | |
| shuffle_btn.click(show_entry, inputs=gr.State("Random Shuffle"), outputs=all_outputs) | |
| save_btn.click( | |
| save_annotation, | |
| inputs=[phash_out, clarity, originality, coherence, diversity, realism, | |
| psychological_depth, consistency, informativeness, | |
| ethical_considerations, demographic_fidelity, overall_score], | |
| outputs=save_status | |
| ) | |
| export_btn.click(export_annotations, inputs=None, outputs=export_file) | |
| demo.load(load_file, inputs=gr.State(available_files[0]), outputs=all_outputs) | |
| demo.launch() | |