# sjt_diff_viewer.py # Gradio viewer: show ONLY indices where two personas give different answers. # - Loads case_study_answers.json # - Extracts ordered list of selected options for each name # - Compares by index, displays mismatches with Name A (green) and Name B (red) # - For each mismatch, shows the full question text AND all option texts in HEXACO order, # highlighting Name A's selection in green and Name B's in red. import json from pathlib import Path from typing import Dict, List, Any, Optional import gradio as gr DATA_PATH = Path("case_study_answers.json") # ---------- Normalization & Labels ---------- CANON = ["honesty_humility", "emotionality", "extraversion", "agreeableness", "conscientiousness", "openness"] LABELS = { "honesty_humility": "Honesty–Humility", "emotionality": "Emotionality", "extraversion": "Extraversion", "agreeableness": "Agreeableness", "conscientiousness": "Conscientiousness", "openness": "Openness", } # alias map for selected values ALIAS_TO_CANON = { "hh": "honesty_humility", "honesty_humility": "honesty_humility", "honesty-humility": "honesty_humility", "honesty–humility": "honesty_humility", "honesty humility": "honesty_humility", "honesty": "honesty_humility", "emotionality": "emotionality", "extraversion": "extraversion", "agreeableness": "agreeableness", "conscientiousness": "conscientiousness", "openness": "openness", "openness to experience": "openness", "openness_to_experience": "openness", } # for extracting option texts, try these aliases as prefixes CANON_TO_ALIASES = { "honesty_humility": ["honesty_humility", "hh"], "emotionality": ["emotionality"], "extraversion": ["extraversion"], "agreeableness": ["agreeableness"], "conscientiousness": ["conscientiousness"], "openness": ["openness"], } def norm_trait(s: Optional[str]) -> Optional[str]: s = (s or "").strip().lower() if s.endswith("_option"): s = s[:-7] s = s.replace("-", "_").replace(" ", "_") return ALIAS_TO_CANON.get(s, s if s in CANON else None) def disp_label(canon: str) -> str: return LABELS.get(canon, canon.capitalize()) # ---------- Data Loading ---------- def load_data(path: Path) -> Dict[str, List[Dict[str, Any]]]: if not path.exists(): return {} with path.open("r", encoding="utf-8") as f: return json.load(f) RAW = load_data(DATA_PATH) def names_list() -> List[str]: return sorted(RAW.keys()) # ---------- Extract question text + options per item ---------- def get_question_text(it: Dict[str, Any]) -> str: q = it.get("question") or {} if isinstance(q, dict): block = q.get("corrected_sjt") or q.get("original_sjt") or {} if isinstance(block, dict): return str(block.get("question") or q.get("question") or "").strip() return str(block or q.get("question") or "").strip() return "" def get_options_map(it: Dict[str, Any]) -> Dict[str, str]: """ Returns dict canonical_trait -> option text, trying both canonical and alias keys (e.g., 'honesty_humility_option' and 'hh_option'). """ result: Dict[str, str] = {} q = it.get("question") or {} block = None if isinstance(q, dict): block = q.get("corrected_sjt") or q.get("original_sjt") for canon in CANON: found = None aliases = CANON_TO_ALIASES.get(canon, [canon]) for ali in aliases: key = f"{ali}_option" # check block first (if dict), then q-level if isinstance(block, dict) and key in block and block[key]: found = str(block[key]).strip() break if isinstance(q, dict) and key in q and q[key]: found = str(q[key]).strip() break if found: result[canon] = found return result # ---------- Build ordered entries per name ---------- def build_entries(name: str) -> List[Dict[str, Any]]: items = RAW.get(name, []) out: List[Dict[str, Any]] = [] for it in items: if not isinstance(it, dict): continue selected = norm_trait(it.get("option")) q_text = get_question_text(it) options = get_options_map(it) if selected and q_text and options: out.append({"question": q_text, "selected": selected, "options": options}) return out # ---------- Diff Logic (Index-aligned) ---------- def mismatches_by_index(name_a: str, name_b: str): seq_a = build_entries(name_a) seq_b = build_entries(name_b) n = min(len(seq_a), len(seq_b)) diffs = [] for i in range(n): if seq_a[i]["selected"] != seq_b[i]["selected"]: # prefer a's options; fallback to b's where missing opts = {c: (seq_a[i]["options"].get(c) or seq_b[i]["options"].get(c)) for c in CANON if (seq_a[i]["options"].get(c) or seq_b[i]["options"].get(c))} diffs.append({ "idx": i, "question": seq_a[i]["question"] or seq_b[i]["question"], "a_sel": seq_a[i]["selected"], "b_sel": seq_b[i]["selected"], "options": opts, }) return diffs, len(seq_a), len(seq_b) def render_options_md(options: Dict[str, str], a_sel: str, b_sel: str, name_a: str, name_b: str) -> str: lines: List[str] = [] for i, canon in enumerate(CANON, start=1): txt = options.get(canon) if not txt: continue label = disp_label(canon) line = f"{i}. **{label}:** {txt}" # highlight selections if canon == a_sel and canon == b_sel: # unlikely here (only mismatches), but handle gracefully line = ( f"{i}. " f"{label}: {txt} ( {name_a} ) " f"" f"( {name_b} )" ) elif canon == a_sel: line = ( f"{i}. " f"{label}: {txt} ( {name_a} )" ) elif canon == b_sel: line = ( f"{i}. " f"{label}: {txt} ( {name_b} )" ) lines.append(line) return "\n\n".join(lines) def render_diffs(name_a: str, name_b: str) -> str: diffs, len_a, len_b = mismatches_by_index(name_a, name_b) header = f"**{len(diffs)} differences** (of {min(len_a, len_b)} compared) for **{name_a}** vs **{name_b}**" if not diffs: return header + "\n\n_No differences._" blocks: List[str] = [header, ""] for d in diffs: idx = d["idx"] q = d["question"] opts = d["options"] a_sel = d["a_sel"] b_sel = d["b_sel"] blocks.append(f"### {idx:02d}. Question") blocks.append(q if q else "_(no question text found)_") blocks.append("") blocks.append(render_options_md(opts, a_sel, b_sel, name_a, name_b)) blocks.append("") # spacing between blocks return "\n".join(blocks) # ---------- Gradio App ---------- with gr.Blocks(title="Differences by Index — Two Personas") as demo: gr.Markdown("# Differences by Index — Two Personas") gr.Markdown( "Extracts the **ordered list of selected options** per name, compares two names **by index**, and shows only where they differ.\n" "**Name A** is highlighted in **green**, **Name B** in **red**. Full question text and all option texts are shown for each mismatch." ) all_names = names_list() default_a = "Eleanor Hagedorn" if "Eleanor Hagedorn" in all_names else (all_names[0] if all_names else "") default_b = "Hung Wong" if "Hung Wong" in all_names else (all_names[1] if len(all_names) > 1 else default_a) with gr.Row(): name_a_dd = gr.Dropdown(choices=all_names, value=default_a, label="Name A (green)", interactive=True) name_b_dd = gr.Dropdown(choices=all_names, value=default_b, label="Name B (red)", interactive=True) out_md = gr.Markdown() def on_change(a: str, b: str): return render_diffs(a, b) name_a_dd.change(on_change, inputs=[name_a_dd, name_b_dd], outputs=[out_md]) name_b_dd.change(on_change, inputs=[name_a_dd, name_b_dd], outputs=[out_md]) demo.load(lambda: render_diffs(default_a, default_b), inputs=None, outputs=[out_md]) if __name__ == "__main__": demo.launch()