Spaces:

TailsResearch
/

psychometrics_case_study_with_questions

Sleeping

File size: 8,675 Bytes


# sjt_diff_viewer.py
# Gradio viewer: show ONLY indices where two personas give different answers.
# - Loads case_study_answers.json
# - Extracts ordered list of selected options for each name
# - Compares by index, displays mismatches with Name A (green) and Name B (red)
# - For each mismatch, shows the full question text AND all option texts in HEXACO order,
#   highlighting Name A's selection in green and Name B's in red.

import json
from pathlib import Path
from typing import Dict, List, Any, Optional
import gradio as gr

DATA_PATH = Path("case_study_answers.json")

# ---------- Normalization & Labels ----------

CANON = ["honesty_humility", "emotionality", "extraversion", "agreeableness", "conscientiousness", "openness"]

LABELS = {
    "honesty_humility": "Honesty–Humility",
    "emotionality": "Emotionality",
    "extraversion": "Extraversion",
    "agreeableness": "Agreeableness",
    "conscientiousness": "Conscientiousness",
    "openness": "Openness",
}

# alias map for selected values
ALIAS_TO_CANON = {
    "hh": "honesty_humility",
    "honesty_humility": "honesty_humility",
    "honesty-humility": "honesty_humility",
    "honesty–humility": "honesty_humility",
    "honesty humility": "honesty_humility",
    "honesty": "honesty_humility",
    "emotionality": "emotionality",
    "extraversion": "extraversion",
    "agreeableness": "agreeableness",
    "conscientiousness": "conscientiousness",
    "openness": "openness",
    "openness to experience": "openness",
    "openness_to_experience": "openness",
}

# for extracting option texts, try these aliases as prefixes
CANON_TO_ALIASES = {
    "honesty_humility": ["honesty_humility", "hh"],
    "emotionality": ["emotionality"],
    "extraversion": ["extraversion"],
    "agreeableness": ["agreeableness"],
    "conscientiousness": ["conscientiousness"],
    "openness": ["openness"],
}

def norm_trait(s: Optional[str]) -> Optional[str]:
    s = (s or "").strip().lower()
    if s.endswith("_option"):
        s = s[:-7]
    s = s.replace("-", "_").replace(" ", "_")
    return ALIAS_TO_CANON.get(s, s if s in CANON else None)

def disp_label(canon: str) -> str:
    return LABELS.get(canon, canon.capitalize())

# ---------- Data Loading ----------

def load_data(path: Path) -> Dict[str, List[Dict[str, Any]]]:
    if not path.exists():
        return {}
    with path.open("r", encoding="utf-8") as f:
        return json.load(f)

RAW = load_data(DATA_PATH)

def names_list() -> List[str]:
    return sorted(RAW.keys())

# ---------- Extract question text + options per item ----------

def get_question_text(it: Dict[str, Any]) -> str:
    q = it.get("question") or {}
    if isinstance(q, dict):
        block = q.get("corrected_sjt") or q.get("original_sjt") or {}
        if isinstance(block, dict):
            return str(block.get("question") or q.get("question") or "").strip()
        return str(block or q.get("question") or "").strip()
    return ""

def get_options_map(it: Dict[str, Any]) -> Dict[str, str]:
    """
    Returns dict canonical_trait -> option text, trying both canonical and alias keys
    (e.g., 'honesty_humility_option' and 'hh_option').
    """
    result: Dict[str, str] = {}
    q = it.get("question") or {}
    block = None
    if isinstance(q, dict):
        block = q.get("corrected_sjt") or q.get("original_sjt")

    for canon in CANON:
        found = None
        aliases = CANON_TO_ALIASES.get(canon, [canon])
        for ali in aliases:
            key = f"{ali}_option"
            # check block first (if dict), then q-level
            if isinstance(block, dict) and key in block and block[key]:
                found = str(block[key]).strip()
                break
            if isinstance(q, dict) and key in q and q[key]:
                found = str(q[key]).strip()
                break
        if found:
            result[canon] = found
    return result

# ---------- Build ordered entries per name ----------

def build_entries(name: str) -> List[Dict[str, Any]]:
    items = RAW.get(name, [])
    out: List[Dict[str, Any]] = []
    for it in items:
        if not isinstance(it, dict):
            continue
        selected = norm_trait(it.get("option"))
        q_text = get_question_text(it)
        options = get_options_map(it)
        if selected and q_text and options:
            out.append({"question": q_text, "selected": selected, "options": options})
    return out

# ---------- Diff Logic (Index-aligned) ----------

def mismatches_by_index(name_a: str, name_b: str):
    seq_a = build_entries(name_a)
    seq_b = build_entries(name_b)

    n = min(len(seq_a), len(seq_b))
    diffs = []
    for i in range(n):
        if seq_a[i]["selected"] != seq_b[i]["selected"]:
            # prefer a's options; fallback to b's where missing
            opts = {c: (seq_a[i]["options"].get(c) or seq_b[i]["options"].get(c))
                    for c in CANON if (seq_a[i]["options"].get(c) or seq_b[i]["options"].get(c))}
            diffs.append({
                "idx": i,
                "question": seq_a[i]["question"] or seq_b[i]["question"],
                "a_sel": seq_a[i]["selected"],
                "b_sel": seq_b[i]["selected"],
                "options": opts,
            })
    return diffs, len(seq_a), len(seq_b)

def render_options_md(options: Dict[str, str], a_sel: str, b_sel: str, name_a: str, name_b: str) -> str:
    lines: List[str] = []
    for i, canon in enumerate(CANON, start=1):
        txt = options.get(canon)
        if not txt:
            continue
        label = disp_label(canon)
        line = f"{i}. **{label}:** {txt}"
        # highlight selections
        if canon == a_sel and canon == b_sel:
            # unlikely here (only mismatches), but handle gracefully
            line = (
                f"{i}. <span style='background:#e8ffe8;color:#0a6410;font-weight:700;'>"
                f"{label}: {txt} ( {name_a} )</span> "
                f"<span style='background:#ffe8e8;color:#a00606;font-weight:700;'>"
                f"( {name_b} )</span>"
            )
        elif canon == a_sel:
            line = (
                f"{i}. <span style='background:#e8ffe8;color:#0a6410;font-weight:700;'>"
                f"{label}: {txt} ( {name_a} )</span>"
            )
        elif canon == b_sel:
            line = (
                f"{i}. <span style='background:#ffe8e8;color:#a00606;font-weight:700;'>"
                f"{label}: {txt} ( {name_b} )</span>"
            )
        lines.append(line)
    return "\n\n".join(lines)

def render_diffs(name_a: str, name_b: str) -> str:
    diffs, len_a, len_b = mismatches_by_index(name_a, name_b)
    header = f"**{len(diffs)} differences** (of {min(len_a, len_b)} compared) for **{name_a}** vs **{name_b}**"
    if not diffs:
        return header + "\n\n_No differences._"

    blocks: List[str] = [header, ""]
    for d in diffs:
        idx = d["idx"]
        q = d["question"]
        opts = d["options"]
        a_sel = d["a_sel"]
        b_sel = d["b_sel"]

        blocks.append(f"### {idx:02d}. Question")
        blocks.append(q if q else "_(no question text found)_")
        blocks.append("")
        blocks.append(render_options_md(opts, a_sel, b_sel, name_a, name_b))
        blocks.append("")  # spacing between blocks
    return "\n".join(blocks)

# ---------- Gradio App ----------

with gr.Blocks(title="Differences by Index — Two Personas") as demo:
    gr.Markdown("# Differences by Index — Two Personas")
    gr.Markdown(
        "Extracts the **ordered list of selected options** per name, compares two names **by index**, and shows only where they differ.\n"
        "**Name A** is highlighted in **green**, **Name B** in **red**. Full question text and all option texts are shown for each mismatch."
    )

    all_names = names_list()
    default_a = "Eleanor Hagedorn" if "Eleanor Hagedorn" in all_names else (all_names[0] if all_names else "")
    default_b = "Hung Wong" if "Hung Wong" in all_names else (all_names[1] if len(all_names) > 1 else default_a)

    with gr.Row():
        name_a_dd = gr.Dropdown(choices=all_names, value=default_a, label="Name A (green)", interactive=True)
        name_b_dd = gr.Dropdown(choices=all_names, value=default_b, label="Name B (red)", interactive=True)

    out_md = gr.Markdown()

    def on_change(a: str, b: str):
        return render_diffs(a, b)

    name_a_dd.change(on_change, inputs=[name_a_dd, name_b_dd], outputs=[out_md])
    name_b_dd.change(on_change, inputs=[name_a_dd, name_b_dd], outputs=[out_md])

    demo.load(lambda: render_diffs(default_a, default_b), inputs=None, outputs=[out_md])

if __name__ == "__main__":
    demo.launch()