# sjt_diff_viewer.py
# Gradio viewer: show ONLY indices where two personas give different answers.
# - Loads case_study_answers.json
# - Extracts ordered list of selected options for each name
# - Compares by index, displays mismatches with Name A (green) and Name B (red)
# - For each mismatch, shows the full question text AND all option texts in HEXACO order,
# highlighting Name A's selection in green and Name B's in red.
import json
from pathlib import Path
from typing import Dict, List, Any, Optional
import gradio as gr
DATA_PATH = Path("case_study_answers.json")
# ---------- Normalization & Labels ----------
CANON = ["honesty_humility", "emotionality", "extraversion", "agreeableness", "conscientiousness", "openness"]
LABELS = {
"honesty_humility": "Honesty–Humility",
"emotionality": "Emotionality",
"extraversion": "Extraversion",
"agreeableness": "Agreeableness",
"conscientiousness": "Conscientiousness",
"openness": "Openness",
}
# alias map for selected values
ALIAS_TO_CANON = {
"hh": "honesty_humility",
"honesty_humility": "honesty_humility",
"honesty-humility": "honesty_humility",
"honesty–humility": "honesty_humility",
"honesty humility": "honesty_humility",
"honesty": "honesty_humility",
"emotionality": "emotionality",
"extraversion": "extraversion",
"agreeableness": "agreeableness",
"conscientiousness": "conscientiousness",
"openness": "openness",
"openness to experience": "openness",
"openness_to_experience": "openness",
}
# for extracting option texts, try these aliases as prefixes
CANON_TO_ALIASES = {
"honesty_humility": ["honesty_humility", "hh"],
"emotionality": ["emotionality"],
"extraversion": ["extraversion"],
"agreeableness": ["agreeableness"],
"conscientiousness": ["conscientiousness"],
"openness": ["openness"],
}
def norm_trait(s: Optional[str]) -> Optional[str]:
s = (s or "").strip().lower()
if s.endswith("_option"):
s = s[:-7]
s = s.replace("-", "_").replace(" ", "_")
return ALIAS_TO_CANON.get(s, s if s in CANON else None)
def disp_label(canon: str) -> str:
return LABELS.get(canon, canon.capitalize())
# ---------- Data Loading ----------
def load_data(path: Path) -> Dict[str, List[Dict[str, Any]]]:
if not path.exists():
return {}
with path.open("r", encoding="utf-8") as f:
return json.load(f)
RAW = load_data(DATA_PATH)
def names_list() -> List[str]:
return sorted(RAW.keys())
# ---------- Extract question text + options per item ----------
def get_question_text(it: Dict[str, Any]) -> str:
q = it.get("question") or {}
if isinstance(q, dict):
block = q.get("corrected_sjt") or q.get("original_sjt") or {}
if isinstance(block, dict):
return str(block.get("question") or q.get("question") or "").strip()
return str(block or q.get("question") or "").strip()
return ""
def get_options_map(it: Dict[str, Any]) -> Dict[str, str]:
"""
Returns dict canonical_trait -> option text, trying both canonical and alias keys
(e.g., 'honesty_humility_option' and 'hh_option').
"""
result: Dict[str, str] = {}
q = it.get("question") or {}
block = None
if isinstance(q, dict):
block = q.get("corrected_sjt") or q.get("original_sjt")
for canon in CANON:
found = None
aliases = CANON_TO_ALIASES.get(canon, [canon])
for ali in aliases:
key = f"{ali}_option"
# check block first (if dict), then q-level
if isinstance(block, dict) and key in block and block[key]:
found = str(block[key]).strip()
break
if isinstance(q, dict) and key in q and q[key]:
found = str(q[key]).strip()
break
if found:
result[canon] = found
return result
# ---------- Build ordered entries per name ----------
def build_entries(name: str) -> List[Dict[str, Any]]:
items = RAW.get(name, [])
out: List[Dict[str, Any]] = []
for it in items:
if not isinstance(it, dict):
continue
selected = norm_trait(it.get("option"))
q_text = get_question_text(it)
options = get_options_map(it)
if selected and q_text and options:
out.append({"question": q_text, "selected": selected, "options": options})
return out
# ---------- Diff Logic (Index-aligned) ----------
def mismatches_by_index(name_a: str, name_b: str):
seq_a = build_entries(name_a)
seq_b = build_entries(name_b)
n = min(len(seq_a), len(seq_b))
diffs = []
for i in range(n):
if seq_a[i]["selected"] != seq_b[i]["selected"]:
# prefer a's options; fallback to b's where missing
opts = {c: (seq_a[i]["options"].get(c) or seq_b[i]["options"].get(c))
for c in CANON if (seq_a[i]["options"].get(c) or seq_b[i]["options"].get(c))}
diffs.append({
"idx": i,
"question": seq_a[i]["question"] or seq_b[i]["question"],
"a_sel": seq_a[i]["selected"],
"b_sel": seq_b[i]["selected"],
"options": opts,
})
return diffs, len(seq_a), len(seq_b)
def render_options_md(options: Dict[str, str], a_sel: str, b_sel: str, name_a: str, name_b: str) -> str:
lines: List[str] = []
for i, canon in enumerate(CANON, start=1):
txt = options.get(canon)
if not txt:
continue
label = disp_label(canon)
line = f"{i}. **{label}:** {txt}"
# highlight selections
if canon == a_sel and canon == b_sel:
# unlikely here (only mismatches), but handle gracefully
line = (
f"{i}. "
f"{label}: {txt} ( {name_a} ) "
f""
f"( {name_b} )"
)
elif canon == a_sel:
line = (
f"{i}. "
f"{label}: {txt} ( {name_a} )"
)
elif canon == b_sel:
line = (
f"{i}. "
f"{label}: {txt} ( {name_b} )"
)
lines.append(line)
return "\n\n".join(lines)
def render_diffs(name_a: str, name_b: str) -> str:
diffs, len_a, len_b = mismatches_by_index(name_a, name_b)
header = f"**{len(diffs)} differences** (of {min(len_a, len_b)} compared) for **{name_a}** vs **{name_b}**"
if not diffs:
return header + "\n\n_No differences._"
blocks: List[str] = [header, ""]
for d in diffs:
idx = d["idx"]
q = d["question"]
opts = d["options"]
a_sel = d["a_sel"]
b_sel = d["b_sel"]
blocks.append(f"### {idx:02d}. Question")
blocks.append(q if q else "_(no question text found)_")
blocks.append("")
blocks.append(render_options_md(opts, a_sel, b_sel, name_a, name_b))
blocks.append("") # spacing between blocks
return "\n".join(blocks)
# ---------- Gradio App ----------
with gr.Blocks(title="Differences by Index — Two Personas") as demo:
gr.Markdown("# Differences by Index — Two Personas")
gr.Markdown(
"Extracts the **ordered list of selected options** per name, compares two names **by index**, and shows only where they differ.\n"
"**Name A** is highlighted in **green**, **Name B** in **red**. Full question text and all option texts are shown for each mismatch."
)
all_names = names_list()
default_a = "Eleanor Hagedorn" if "Eleanor Hagedorn" in all_names else (all_names[0] if all_names else "")
default_b = "Hung Wong" if "Hung Wong" in all_names else (all_names[1] if len(all_names) > 1 else default_a)
with gr.Row():
name_a_dd = gr.Dropdown(choices=all_names, value=default_a, label="Name A (green)", interactive=True)
name_b_dd = gr.Dropdown(choices=all_names, value=default_b, label="Name B (red)", interactive=True)
out_md = gr.Markdown()
def on_change(a: str, b: str):
return render_diffs(a, b)
name_a_dd.change(on_change, inputs=[name_a_dd, name_b_dd], outputs=[out_md])
name_b_dd.change(on_change, inputs=[name_a_dd, name_b_dd], outputs=[out_md])
demo.load(lambda: render_diffs(default_a, default_b), inputs=None, outputs=[out_md])
if __name__ == "__main__":
demo.launch()