File size: 8,675 Bytes
53b3011
7b2e432
 
76810fa
7b2e432
 
53b3011
 
8692f27
 
 
7b2e432
8692f27
 
 
 
7b2e432
 
53b3011
7b2e432
 
53b3011
8692f27
 
 
 
 
 
 
53b3011
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8692f27
53b3011
7b2e432
76810fa
 
 
53b3011
76810fa
53b3011
 
76810fa
7b2e432
76810fa
7b2e432
 
 
 
 
8692f27
7b2e432
8692f27
7b2e432
 
 
53b3011
7b2e432
 
 
 
 
 
 
 
 
 
53b3011
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b2e432
53b3011
 
 
 
 
 
 
 
 
 
7b2e432
 
 
 
53b3011
 
7b2e432
53b3011
7b2e432
 
53b3011
 
 
 
7b2e432
 
53b3011
 
 
 
7b2e432
53b3011
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b2e432
 
 
 
 
 
 
53b3011
7b2e432
 
53b3011
 
 
 
 
 
 
 
 
 
 
7b2e432
 
 
 
 
 
53b3011
 
7b2e432
 
 
 
 
8692f27
 
7b2e432
 
 
 
 
 
 
 
 
 
 
 
8692f27
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235

# sjt_diff_viewer.py
# Gradio viewer: show ONLY indices where two personas give different answers.
# - Loads case_study_answers.json
# - Extracts ordered list of selected options for each name
# - Compares by index, displays mismatches with Name A (green) and Name B (red)
# - For each mismatch, shows the full question text AND all option texts in HEXACO order,
#   highlighting Name A's selection in green and Name B's in red.

import json
from pathlib import Path
from typing import Dict, List, Any, Optional
import gradio as gr

DATA_PATH = Path("case_study_answers.json")

# ---------- Normalization & Labels ----------

CANON = ["honesty_humility", "emotionality", "extraversion", "agreeableness", "conscientiousness", "openness"]

LABELS = {
    "honesty_humility": "Honesty–Humility",
    "emotionality": "Emotionality",
    "extraversion": "Extraversion",
    "agreeableness": "Agreeableness",
    "conscientiousness": "Conscientiousness",
    "openness": "Openness",
}

# alias map for selected values
ALIAS_TO_CANON = {
    "hh": "honesty_humility",
    "honesty_humility": "honesty_humility",
    "honesty-humility": "honesty_humility",
    "honesty–humility": "honesty_humility",
    "honesty humility": "honesty_humility",
    "honesty": "honesty_humility",
    "emotionality": "emotionality",
    "extraversion": "extraversion",
    "agreeableness": "agreeableness",
    "conscientiousness": "conscientiousness",
    "openness": "openness",
    "openness to experience": "openness",
    "openness_to_experience": "openness",
}

# for extracting option texts, try these aliases as prefixes
CANON_TO_ALIASES = {
    "honesty_humility": ["honesty_humility", "hh"],
    "emotionality": ["emotionality"],
    "extraversion": ["extraversion"],
    "agreeableness": ["agreeableness"],
    "conscientiousness": ["conscientiousness"],
    "openness": ["openness"],
}

def norm_trait(s: Optional[str]) -> Optional[str]:
    s = (s or "").strip().lower()
    if s.endswith("_option"):
        s = s[:-7]
    s = s.replace("-", "_").replace(" ", "_")
    return ALIAS_TO_CANON.get(s, s if s in CANON else None)

def disp_label(canon: str) -> str:
    return LABELS.get(canon, canon.capitalize())

# ---------- Data Loading ----------

def load_data(path: Path) -> Dict[str, List[Dict[str, Any]]]:
    if not path.exists():
        return {}
    with path.open("r", encoding="utf-8") as f:
        return json.load(f)

RAW = load_data(DATA_PATH)

def names_list() -> List[str]:
    return sorted(RAW.keys())

# ---------- Extract question text + options per item ----------

def get_question_text(it: Dict[str, Any]) -> str:
    q = it.get("question") or {}
    if isinstance(q, dict):
        block = q.get("corrected_sjt") or q.get("original_sjt") or {}
        if isinstance(block, dict):
            return str(block.get("question") or q.get("question") or "").strip()
        return str(block or q.get("question") or "").strip()
    return ""

def get_options_map(it: Dict[str, Any]) -> Dict[str, str]:
    """
    Returns dict canonical_trait -> option text, trying both canonical and alias keys
    (e.g., 'honesty_humility_option' and 'hh_option').
    """
    result: Dict[str, str] = {}
    q = it.get("question") or {}
    block = None
    if isinstance(q, dict):
        block = q.get("corrected_sjt") or q.get("original_sjt")

    for canon in CANON:
        found = None
        aliases = CANON_TO_ALIASES.get(canon, [canon])
        for ali in aliases:
            key = f"{ali}_option"
            # check block first (if dict), then q-level
            if isinstance(block, dict) and key in block and block[key]:
                found = str(block[key]).strip()
                break
            if isinstance(q, dict) and key in q and q[key]:
                found = str(q[key]).strip()
                break
        if found:
            result[canon] = found
    return result

# ---------- Build ordered entries per name ----------

def build_entries(name: str) -> List[Dict[str, Any]]:
    items = RAW.get(name, [])
    out: List[Dict[str, Any]] = []
    for it in items:
        if not isinstance(it, dict):
            continue
        selected = norm_trait(it.get("option"))
        q_text = get_question_text(it)
        options = get_options_map(it)
        if selected and q_text and options:
            out.append({"question": q_text, "selected": selected, "options": options})
    return out

# ---------- Diff Logic (Index-aligned) ----------

def mismatches_by_index(name_a: str, name_b: str):
    seq_a = build_entries(name_a)
    seq_b = build_entries(name_b)

    n = min(len(seq_a), len(seq_b))
    diffs = []
    for i in range(n):
        if seq_a[i]["selected"] != seq_b[i]["selected"]:
            # prefer a's options; fallback to b's where missing
            opts = {c: (seq_a[i]["options"].get(c) or seq_b[i]["options"].get(c))
                    for c in CANON if (seq_a[i]["options"].get(c) or seq_b[i]["options"].get(c))}
            diffs.append({
                "idx": i,
                "question": seq_a[i]["question"] or seq_b[i]["question"],
                "a_sel": seq_a[i]["selected"],
                "b_sel": seq_b[i]["selected"],
                "options": opts,
            })
    return diffs, len(seq_a), len(seq_b)

def render_options_md(options: Dict[str, str], a_sel: str, b_sel: str, name_a: str, name_b: str) -> str:
    lines: List[str] = []
    for i, canon in enumerate(CANON, start=1):
        txt = options.get(canon)
        if not txt:
            continue
        label = disp_label(canon)
        line = f"{i}. **{label}:** {txt}"
        # highlight selections
        if canon == a_sel and canon == b_sel:
            # unlikely here (only mismatches), but handle gracefully
            line = (
                f"{i}. <span style='background:#e8ffe8;color:#0a6410;font-weight:700;'>"
                f"{label}: {txt} ( {name_a} )</span> "
                f"<span style='background:#ffe8e8;color:#a00606;font-weight:700;'>"
                f"( {name_b} )</span>"
            )
        elif canon == a_sel:
            line = (
                f"{i}. <span style='background:#e8ffe8;color:#0a6410;font-weight:700;'>"
                f"{label}: {txt} ( {name_a} )</span>"
            )
        elif canon == b_sel:
            line = (
                f"{i}. <span style='background:#ffe8e8;color:#a00606;font-weight:700;'>"
                f"{label}: {txt} ( {name_b} )</span>"
            )
        lines.append(line)
    return "\n\n".join(lines)

def render_diffs(name_a: str, name_b: str) -> str:
    diffs, len_a, len_b = mismatches_by_index(name_a, name_b)
    header = f"**{len(diffs)} differences** (of {min(len_a, len_b)} compared) for **{name_a}** vs **{name_b}**"
    if not diffs:
        return header + "\n\n_No differences._"

    blocks: List[str] = [header, ""]
    for d in diffs:
        idx = d["idx"]
        q = d["question"]
        opts = d["options"]
        a_sel = d["a_sel"]
        b_sel = d["b_sel"]

        blocks.append(f"### {idx:02d}. Question")
        blocks.append(q if q else "_(no question text found)_")
        blocks.append("")
        blocks.append(render_options_md(opts, a_sel, b_sel, name_a, name_b))
        blocks.append("")  # spacing between blocks
    return "\n".join(blocks)

# ---------- Gradio App ----------

with gr.Blocks(title="Differences by Index — Two Personas") as demo:
    gr.Markdown("# Differences by Index — Two Personas")
    gr.Markdown(
        "Extracts the **ordered list of selected options** per name, compares two names **by index**, and shows only where they differ.\n"
        "**Name A** is highlighted in **green**, **Name B** in **red**. Full question text and all option texts are shown for each mismatch."
    )

    all_names = names_list()
    default_a = "Eleanor Hagedorn" if "Eleanor Hagedorn" in all_names else (all_names[0] if all_names else "")
    default_b = "Hung Wong" if "Hung Wong" in all_names else (all_names[1] if len(all_names) > 1 else default_a)

    with gr.Row():
        name_a_dd = gr.Dropdown(choices=all_names, value=default_a, label="Name A (green)", interactive=True)
        name_b_dd = gr.Dropdown(choices=all_names, value=default_b, label="Name B (red)", interactive=True)

    out_md = gr.Markdown()

    def on_change(a: str, b: str):
        return render_diffs(a, b)

    name_a_dd.change(on_change, inputs=[name_a_dd, name_b_dd], outputs=[out_md])
    name_b_dd.change(on_change, inputs=[name_a_dd, name_b_dd], outputs=[out_md])

    demo.load(lambda: render_diffs(default_a, default_b), inputs=None, outputs=[out_md])

if __name__ == "__main__":
    demo.launch()