Spaces:

TailsResearch
/

psychometrics_case_study_with_questions

Sleeping

App Files Files Community

amirali1985 commited on Oct 6

Commit

7b2e432

1 Parent(s): 738ee45

fix this.

Browse files

Files changed (1) hide show

app.py +141 -236

app.py CHANGED Viewed

@@ -1,33 +1,22 @@
-# sjt_compare_eleanor_hung.py
-# Minimal Gradio app: show ONLY questions where Eleanor and Hung chose different options.
 # - Loads case_study_answers.json
-# - Compares two personas (defaults: "Eleanor" vs "Hung")
-# - Robust matching across personas using IDs when available, else a hash of normalized question + options
-# - Highlights Eleanor's choice in green, Hung's in red
-# - Prev / Next / Random navigation
 import json
 from pathlib import Path
-from typing import List, Dict, Any, Optional, Tuple
-import random
-import hashlib
-import difflib
 import gradio as gr
 DATA_PATH = Path("case_study_answers.json")
-# Canonical HEXACO order & labels
-CANONICAL_ORDER = [
-    "honesty_humility",
-    "emotionality",
-    "extraversion",
-    "agreeableness",
-    "conscientiousness",
-    "openness",
-]
-TRAIT_LABELS = {
-    "honesty_humility": "Honesty–Humility",
     "emotionality": "Emotionality",
     "extraversion": "Extraversion",
     "agreeableness": "Agreeableness",
@@ -35,234 +24,150 @@ TRAIT_LABELS = {
     "openness": "Openness",
 }
-ALIAS_TO_CANON = {
-    "hh": "honesty_humility",
-    "honesty_humility": "honesty_humility",
-    "honesty-humility": "honesty_humility",
-    "honestyhumility": "honesty_humility",
-    "honesty": "honesty_humility",
-    "emotionality": "emotionality",
-    "extraversion": "extraversion",
-    "agreeableness": "agreeableness",
-    "conscientiousness": "conscientiousness",
-    "openness": "openness",
-}
-def canonical_trait(x: Optional[str]) -> Optional[str]:
-    if x is None:
-        return None
-    s = str(x).strip().lower()
     if s.endswith("_option"):
         s = s[:-7]
     s = s.replace("-", "_").replace(" ", "_")
-    return ALIAS_TO_CANON.get(s, s if s in CANONICAL_ORDER else None)
-def get_option_text_from_blocks(block: Dict[str, Any], q: Dict[str, Any], canon: str) -> Optional[str]:
-    # Accept 'honesty_humility_option' and 'hh_option'
-    keys_to_try = [f"{canon}_option"]
-    if canon == "honesty_humility":
-        keys_to_try.append("hh_option")
-    for key in keys_to_try:
-        if isinstance(block, dict) and key in block:
-            return str(block[key]).strip()
-        if isinstance(q, dict) and key in q:
-            return str(q[key]).strip()
-    return None
-def norm_text(s: str) -> str:
-    return " ".join((s or "").split())
-def option_signature(opts: Dict[str, str]) -> str:
-    # Deterministic signature from canonical-order option texts
-    parts = [norm_text(opts.get(c, "")) for c in CANONICAL_ORDER]
-    sig = "||".join(parts)
-    return hashlib.sha256(sig.encode("utf-8")).hexdigest()
-def get_question_id(item: Dict[str, Any]) -> Optional[str]:
-    # Try common ID fields at item or nested question level
-    candidates = []
-    for k in ["uid", "id", "question_id", "sjt_id", "sjt_uid", "index"]:
-        if k in item: candidates.append(("item", k, item.get(k)))
-    q = item.get("question") or {}
-    if isinstance(q, dict):
-        for k in ["uid", "id", "question_id", "sjt_id", "sjt_uid", "index"]:
-            if k in q: candidates.append(("question", k, q.get(k)))
-    for scope, k, v in candidates:
-        if v is not None and str(v).strip():
-            return f"{scope}:{k}:{str(v).strip()}"
-    return None
-def _safe_get_question_block(item: Dict[str, Any]) -> Tuple[str, Dict[str, str], Optional[str]]:
-    selected = canonical_trait(item.get("option"))
-    q = item.get("question", {}) or {}
-    block = q.get("corrected_sjt") or q.get("original_sjt") or {}
-    question_text = ""
-    options: Dict[str, str] = {}
-    if isinstance(block, dict):
-        question_text = block.get("question") or q.get("question") or ""
-        for c in CANONICAL_ORDER:
-            val = get_option_text_from_blocks(block, q, c)
-            if val:
-                options[c] = val
-    else:
-        question_text = str(block) if block else str(q.get("question", ""))
-    if not options and isinstance(q, dict):
-        for c in CANONICAL_ORDER:
-            val = get_option_text_from_blocks({}, q, c)
-            if val:
-                options[c] = val
-    return norm_text(question_text), options, selected
-def flatten_entries(raw: Any) -> List[Dict[str, Any]]:
-    out: List[Dict[str, Any]] = []
-    def handle_item(obj: Dict[str, Any], default_name: str):
-        q_text, opts, sel = _safe_get_question_block(obj)
-        nm = (obj.get("name") or default_name or "Unknown").strip() or "Unknown"
-        qid = get_question_id(obj)
-        if q_text and opts and sel:
-            out.append({"name": nm, "question": q_text, "options": opts, "selected": sel, "qid": qid})
-    if isinstance(raw, list):
-        for x in raw:
-            if isinstance(x, dict):
-                handle_item(x, "Unknown")
-    elif isinstance(raw, dict):
-        for k, v in raw.items():
-            default_name = str(k)
-            if isinstance(v, list):
-                for x in v:
-                    if isinstance(x, dict):
-                        handle_item(x, default_name)
-            elif isinstance(v, dict):
-                handle_item(v, default_name)
-    return out
-def normalize_name(s: str) -> str:
-    return " ".join((s or "").strip().lower().split())
-def persona_slice(entries: List[Dict[str, Any]], name_query: str) -> List[Dict[str, Any]]:
-    q = normalize_name(name_query)
-    return [e for e in entries if q in normalize_name(e["name"])]
-def best_key_for(e: Dict[str, Any]) -> str:
-    # Prefer explicit IDs; else use text similarity friendly key
-    if e.get("qid"):
-        return f"id:{e['qid']}"
-    # fallback: hash of normalized question + options signature
-    sig = option_signature(e["options"])
-    return f"sig:{hashlib.sha256((e['question'] + '||' + sig).encode('utf-8')).hexdigest()}"
-def build_map_by_key(slice_entries: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
-    mp: Dict[str, Dict[str, Any]] = {}
-    for e in slice_entries:
-        k = best_key_for(e)
-        if k not in mp:
-            mp[k] = e
-        # if duplicate keys, keep first occurrence
-    return mp
-def build_mismatch_list(entries: List[Dict[str, Any]], name_a: str, name_b: str):
-    slice_a = persona_slice(entries, name_a)
-    slice_b = persona_slice(entries, name_b)
-    map_a = build_map_by_key(slice_a)
-    map_b = build_map_by_key(slice_b)
-    mismatches = []
-    for k in set(map_a.keys()).intersection(map_b.keys()):
-        ea = map_a[k]
-        eb = map_b[k]
-        if ea["selected"] != eb["selected"]:
-            # prefer richer options set
-            opts = ea["options"] if len(ea["options"]) >= len(eb["options"]) else eb["options"]
-            # choose question text by higher similarity (often identical)
-            q = ea["question"] if difflib.SequenceMatcher(None, ea["question"], eb["question"]).ratio() >= 0.9 else ea["question"]
-            mismatches.append({
-                "question": q,
-                "eleanor": ea,
-                "hung": eb,
-                "options": opts,
-            })
-    return mismatches
-def make_display(item: Dict[str, Any], name_a_disp: str, name_b_disp: str) -> str:
-    q = item["question"]
-    sel_a = item["eleanor"]["selected"]
-    sel_b = item["hung"]["selected"]
-    opts = item["options"]
-    a_label = TRAIT_LABELS.get(sel_a, sel_a)
-    b_label = TRAIT_LABELS.get(sel_b, sel_b)
-    a_text = opts.get(sel_a, "")
-    b_text = opts.get(sel_b, "")
-    a_span = f"<span style='background:#e8ffe8;color:#0a6410;font-weight:700;'>{a_label}: {a_text}</span>"
-    b_span = f"<span style='background:#ffe8e8;color:#a00606;font-weight:700;'>{b_label}: {b_text}</span>"
-    body = [
-        f"### ❓ Question",
-        q,
-        "",
-        f"**{name_a_disp} chose:** {a_span}",
-        f"**{name_b_disp} chose:** {b_span}",
-    ]
-    return "\n\n".join(body)
-DATA_RAW = (json.loads(Path(DATA_PATH).read_text(encoding='utf-8')) if DATA_PATH.exists() else [])
-DATA = flatten_entries(DATA_RAW)
-with gr.Blocks(title="Eleanor vs Hung — Differences Only") as demo:
-    gr.Markdown("# Eleanor vs Hung — Different Answers Only")
-    gr.Markdown("Shows only the questions where the two personas chose different options.")
-    with gr.Row():
-        name_a_in = gr.Textbox(value="Eleanor", label="Name A (green)", interactive=True)
-        name_b_in = gr.Textbox(value="Hung", label="Name B (red)", interactive=True)
-        st_pos = gr.State(0)
     with gr.Row():
-        prev_btn = gr.Button("Previous")
-        next_btn = gr.Button("Next")
-        rand_btn = gr.Button("Random")
-    status_md = gr.Markdown()
-    diff_md = gr.Markdown()
-    def recompute(name_a: str, name_b: str):
-        mismatches = build_mismatch_list(DATA, name_a, name_b)
-        total = len(mismatches)
-        if total == 0:
-            # Provide a hint with available names to help debugging
-            names = sorted(set(e['name'] for e in DATA))
-            sample = ", ".join(names[:10]) + (" ..." if len(names) > 10 else "")
-            return 0, f"**0 differences** for *{name_a}* vs *{name_b}*. Try adjusting names. Examples: {sample}", "_No differences to show._"
-        md = make_display(mismatches[0], name_a, name_b)
-        return 0, f"**{total} differences** found for *{name_a}* vs *{name_b}*.", md
-    def nav(name_a: str, name_b: str, pos: int, step: int = 0, rand: bool = False):
-        mismatches = build_mismatch_list(DATA, name_a, name_b)
-        total = len(mismatches)
-        if total == 0:
-            names = sorted(set(e['name'] for e in DATA))
-            sample = ", ".join(names[:10]) + (" ..." if len(names) > 10 else "")
-            return pos, f"**0 differences** for *{name_a}* vs *{name_b}*. Try adjusting names. Examples: {sample}", "_No differences to show._"
-        if rand:
-            pos = random.randrange(total)
-        else:
-            pos = (pos + step) % total
-        md = make_display(mismatches[pos], name_a, name_b)
-        return pos, f"**{total} differences** found • Showing {pos+1} / {total}", md
-    name_a_in.change(lambda a, b: recompute(a, b), inputs=[name_a_in, name_b_in], outputs=[st_pos, status_md, diff_md])
-    name_b_in.change(lambda a, b: recompute(a, b), inputs=[name_a_in, name_b_in], outputs=[st_pos, status_md, diff_md])
-    prev_btn.click(lambda a, b, p: nav(a, b, p, step=-1), inputs=[name_a_in, name_b_in, st_pos], outputs=[st_pos, status_md, diff_md])
-    next_btn.click(lambda a, b, p: nav(a, b, p, step=+1), inputs=[name_a_in, name_b_in, st_pos], outputs=[st_pos, status_md, diff_md])
-    rand_btn.click(lambda a, b, p: nav(a, b, p, rand=True), inputs=[name_a_in, name_b_in, st_pos], outputs=[st_pos, status_md, diff_md])
-    demo.load(lambda: recompute("Eleanor", "Hung"), inputs=None, outputs=[st_pos, status_md, diff_md])
 if __name__ == "__main__":
     demo.launch()

+# sjt_diff_viewer.py
+# Gradio viewer: show ONLY indices where two personas give different answers.
 # - Loads case_study_answers.json
+# - Extracts ordered list of selected options for each name
+# - Compares by index, displays mismatches with Name A (green) and Name B (red)
 import json
 from pathlib import Path
+from typing import Dict, List, Any, Optional
 import gradio as gr
 DATA_PATH = Path("case_study_answers.json")
+# ---------- Normalization & Labels ----------
+CANON = ["hh", "emotionality", "extraversion", "agreeableness", "conscientiousness", "openness"]
+LABELS = {
+    "hh": "Honesty–Humility",
     "emotionality": "Emotionality",
     "extraversion": "Extraversion",
     "agreeableness": "Agreeableness",
     "openness": "Openness",
 }
+def norm_trait(s: Optional[str]) -> str:
+    s = (s or "").strip().lower()
     if s.endswith("_option"):
         s = s[:-7]
     s = s.replace("-", "_").replace(" ", "_")
+    # Map common variants
+    mapping = {
+        "honesty–humility": "hh",
+        "honesty-humility": "hh",
+        "honesty_humility": "hh",
+        "honesty": "hh",
+        "openness_to_experience": "openness",
+        "openness to experience": "openness",
+    }
+    return mapping.get(s, s)
+def disp_label(s: str) -> str:
+    return LABELS.get(s, s.capitalize())
+# ---------- Data Loading ----------
+def load_data(path: Path) -> Dict[str, List[Dict[str, Any]]]:
+    """
+    Expected structure (based on your file):
+      {
+        "Person Name": [
+          {"option": "<trait or trait_option>", "question": {...}},
+          ...
+        ],
+        ...
+      }
+    Returns the raw parsed dict as-is (name -> list of items).
+    """
+    if not path.exists():
+        return {}
+    with path.open("r", encoding="utf-8") as f:
+        return json.load(f)
+RAW = load_data(DATA_PATH)
+def names_list() -> List[str]:
+    return sorted(RAW.keys())
+# ---------- Build Ordered Selected Lists ----------
+def build_selected_list(name: str) -> List[str]:
+    items = RAW.get(name, [])
+    return [norm_trait((it or {}).get("option")) for it in items if isinstance(it, dict)]
+def get_question_text(it: Dict[str, Any]) -> str:
+    q = it.get("question") or {}
+    if isinstance(q, dict):
+        block = q.get("corrected_sjt") or q.get("original_sjt") or {}
+        if isinstance(block, dict):
+            return str(block.get("question") or q.get("question") or "").strip()
+        return str(block or q.get("question") or "").strip()
+    return ""
+def build_question_list(name: str) -> List[str]:
+    items = RAW.get(name, [])
+    return [get_question_text(it) for it in items if isinstance(it, dict)]
+# ---------- Diff Logic (Index-aligned) ----------
+def mismatches_by_index(name_a: str, name_b: str):
+    sel_a = build_selected_list(name_a)
+    sel_b = build_selected_list(name_b)
+    qs_a = build_question_list(name_a)
+    qs_b = build_question_list(name_b)
+    n = min(len(sel_a), len(sel_b))
+    diffs = []
+    for i in range(n):
+        if sel_a[i] != sel_b[i]:
+            diffs.append({
+                "idx": i,
+                "q_a": qs_a[i] if i < len(qs_a) else "",
+                "q_b": qs_b[i] if i < len(qs_b) else "",
+                "a": sel_a[i],
+                "b": sel_b[i],
+            })
+    return diffs, len(sel_a), len(sel_b)
+def render_diffs(name_a: str, name_b: str) -> str:
+    diffs, len_a, len_b = mismatches_by_index(name_a, name_b)
+    header = f"**{len(diffs)} differences** (of {min(len_a, len_b)} compared) for **{name_a}** vs **{name_b}**"
+    if not diffs:
+        return header + "\n\n_No differences._"
+    lines = [header, ""]
+    for d in diffs:
+        idx = d["idx"]
+        qa = d["q_a"]
+        qb = d["q_b"]
+        # prefer showing a single question line; if different between personas, show A's version
+        q_disp = qa or qb
+        a_span = f\"\"\"<span style="
+        background:  # e8ffe8;color:#0a6410;font-weight:700;">{disp_label(d['a'])}</span>\"\"\"
+        b_span = f\"\"\"<span style="
+        background:  # ffe8e8;color:#a00606;font-weight:700;">{disp_label(d['b'])}</span>\"\"\"
+        lines.append(f"**{idx:02d}.** {q_disp}")
+        lines.append(f"• {name_a}: {a_span}")
+        lines.append(f"• {name_b}: {b_span}\n")
+    return "\n".join(lines)
+# ---------- Gradio App ----------
+with gr.Blocks(title="Differences by Index — Two Personas") as demo:
+    gr.Markdown("# Differences by Index — Two Personas")
+    gr.Markdown(
+        "This viewer extracts the **ordered list of selected options** per name, then compares two names **by index** and "
+        "shows only where they differ. Name A is highlighted **green**, Name B **red**."
+    )
+    all_names = names_list()
+    default_a = "Eleanor Hagedorn" if "Eleanor Hagedorn" in all_names else (all_names[0] if all_names else "")
+    default_b = "Hung Wong" if "Hung Wong" in all_names else (all_names[1] if len(all_names) > 1 else default_a)
     with gr.Row():
+        name_a_dd = gr.Dropdown(choices=all_names, value=default_a, label="Name A (green)", interactive=True)
+        name_b_dd = gr.Dropdown(choices=all_names, value=default_b, label="Name B (red)", interactive=True)
+    out_md = gr.Markdown()
+    def on_change(a: str, b: str):
+        return render_diffs(a, b)
+    name_a_dd.change(on_change, inputs=[name_a_dd, name_b_dd], outputs=[out_md])
+    name_b_dd.change(on_change, inputs=[name_a_dd, name_b_dd], outputs=[out_md])
+    demo.load(lambda: render_diffs(default_a, default_b), inputs=None, outputs=[out_md])
 if __name__ == "__main__":
     demo.launch()