amirali1985 commited on
Commit
7b2e432
·
1 Parent(s): 738ee45
Files changed (1) hide show
  1. app.py +141 -236
app.py CHANGED
@@ -1,33 +1,22 @@
1
-
2
- # sjt_compare_eleanor_hung.py
3
- # Minimal Gradio app: show ONLY questions where Eleanor and Hung chose different options.
4
  # - Loads case_study_answers.json
5
- # - Compares two personas (defaults: "Eleanor" vs "Hung")
6
- # - Robust matching across personas using IDs when available, else a hash of normalized question + options
7
- # - Highlights Eleanor's choice in green, Hung's in red
8
- # - Prev / Next / Random navigation
9
 
10
  import json
11
  from pathlib import Path
12
- from typing import List, Dict, Any, Optional, Tuple
13
- import random
14
- import hashlib
15
- import difflib
16
  import gradio as gr
17
 
18
  DATA_PATH = Path("case_study_answers.json")
19
 
20
- # Canonical HEXACO order & labels
21
- CANONICAL_ORDER = [
22
- "honesty_humility",
23
- "emotionality",
24
- "extraversion",
25
- "agreeableness",
26
- "conscientiousness",
27
- "openness",
28
- ]
29
- TRAIT_LABELS = {
30
- "honesty_humility": "Honesty–Humility",
31
  "emotionality": "Emotionality",
32
  "extraversion": "Extraversion",
33
  "agreeableness": "Agreeableness",
@@ -35,234 +24,150 @@ TRAIT_LABELS = {
35
  "openness": "Openness",
36
  }
37
 
38
- ALIAS_TO_CANON = {
39
- "hh": "honesty_humility",
40
- "honesty_humility": "honesty_humility",
41
- "honesty-humility": "honesty_humility",
42
- "honestyhumility": "honesty_humility",
43
- "honesty": "honesty_humility",
44
- "emotionality": "emotionality",
45
- "extraversion": "extraversion",
46
- "agreeableness": "agreeableness",
47
- "conscientiousness": "conscientiousness",
48
- "openness": "openness",
49
- }
50
 
51
- def canonical_trait(x: Optional[str]) -> Optional[str]:
52
- if x is None:
53
- return None
54
- s = str(x).strip().lower()
55
  if s.endswith("_option"):
56
  s = s[:-7]
57
  s = s.replace("-", "_").replace(" ", "_")
58
- return ALIAS_TO_CANON.get(s, s if s in CANONICAL_ORDER else None)
59
-
60
- def get_option_text_from_blocks(block: Dict[str, Any], q: Dict[str, Any], canon: str) -> Optional[str]:
61
- # Accept 'honesty_humility_option' and 'hh_option'
62
- keys_to_try = [f"{canon}_option"]
63
- if canon == "honesty_humility":
64
- keys_to_try.append("hh_option")
65
- for key in keys_to_try:
66
- if isinstance(block, dict) and key in block:
67
- return str(block[key]).strip()
68
- if isinstance(q, dict) and key in q:
69
- return str(q[key]).strip()
70
- return None
71
-
72
- def norm_text(s: str) -> str:
73
- return " ".join((s or "").split())
74
-
75
- def option_signature(opts: Dict[str, str]) -> str:
76
- # Deterministic signature from canonical-order option texts
77
- parts = [norm_text(opts.get(c, "")) for c in CANONICAL_ORDER]
78
- sig = "||".join(parts)
79
- return hashlib.sha256(sig.encode("utf-8")).hexdigest()
80
-
81
- def get_question_id(item: Dict[str, Any]) -> Optional[str]:
82
- # Try common ID fields at item or nested question level
83
- candidates = []
84
- for k in ["uid", "id", "question_id", "sjt_id", "sjt_uid", "index"]:
85
- if k in item: candidates.append(("item", k, item.get(k)))
86
- q = item.get("question") or {}
87
- if isinstance(q, dict):
88
- for k in ["uid", "id", "question_id", "sjt_id", "sjt_uid", "index"]:
89
- if k in q: candidates.append(("question", k, q.get(k)))
90
- for scope, k, v in candidates:
91
- if v is not None and str(v).strip():
92
- return f"{scope}:{k}:{str(v).strip()}"
93
- return None
94
-
95
- def _safe_get_question_block(item: Dict[str, Any]) -> Tuple[str, Dict[str, str], Optional[str]]:
96
- selected = canonical_trait(item.get("option"))
97
- q = item.get("question", {}) or {}
98
- block = q.get("corrected_sjt") or q.get("original_sjt") or {}
99
-
100
- question_text = ""
101
- options: Dict[str, str] = {}
102
- if isinstance(block, dict):
103
- question_text = block.get("question") or q.get("question") or ""
104
- for c in CANONICAL_ORDER:
105
- val = get_option_text_from_blocks(block, q, c)
106
- if val:
107
- options[c] = val
108
- else:
109
- question_text = str(block) if block else str(q.get("question", ""))
110
-
111
- if not options and isinstance(q, dict):
112
- for c in CANONICAL_ORDER:
113
- val = get_option_text_from_blocks({}, q, c)
114
- if val:
115
- options[c] = val
116
- return norm_text(question_text), options, selected
117
-
118
- def flatten_entries(raw: Any) -> List[Dict[str, Any]]:
119
- out: List[Dict[str, Any]] = []
120
- def handle_item(obj: Dict[str, Any], default_name: str):
121
- q_text, opts, sel = _safe_get_question_block(obj)
122
- nm = (obj.get("name") or default_name or "Unknown").strip() or "Unknown"
123
- qid = get_question_id(obj)
124
- if q_text and opts and sel:
125
- out.append({"name": nm, "question": q_text, "options": opts, "selected": sel, "qid": qid})
126
- if isinstance(raw, list):
127
- for x in raw:
128
- if isinstance(x, dict):
129
- handle_item(x, "Unknown")
130
- elif isinstance(raw, dict):
131
- for k, v in raw.items():
132
- default_name = str(k)
133
- if isinstance(v, list):
134
- for x in v:
135
- if isinstance(x, dict):
136
- handle_item(x, default_name)
137
- elif isinstance(v, dict):
138
- handle_item(v, default_name)
139
- return out
140
-
141
- def normalize_name(s: str) -> str:
142
- return " ".join((s or "").strip().lower().split())
143
-
144
- def persona_slice(entries: List[Dict[str, Any]], name_query: str) -> List[Dict[str, Any]]:
145
- q = normalize_name(name_query)
146
- return [e for e in entries if q in normalize_name(e["name"])]
147
-
148
- def best_key_for(e: Dict[str, Any]) -> str:
149
- # Prefer explicit IDs; else use text similarity friendly key
150
- if e.get("qid"):
151
- return f"id:{e['qid']}"
152
- # fallback: hash of normalized question + options signature
153
- sig = option_signature(e["options"])
154
- return f"sig:{hashlib.sha256((e['question'] + '||' + sig).encode('utf-8')).hexdigest()}"
155
-
156
- def build_map_by_key(slice_entries: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
157
- mp: Dict[str, Dict[str, Any]] = {}
158
- for e in slice_entries:
159
- k = best_key_for(e)
160
- if k not in mp:
161
- mp[k] = e
162
- # if duplicate keys, keep first occurrence
163
- return mp
164
-
165
- def build_mismatch_list(entries: List[Dict[str, Any]], name_a: str, name_b: str):
166
- slice_a = persona_slice(entries, name_a)
167
- slice_b = persona_slice(entries, name_b)
168
-
169
- map_a = build_map_by_key(slice_a)
170
- map_b = build_map_by_key(slice_b)
171
-
172
- mismatches = []
173
- for k in set(map_a.keys()).intersection(map_b.keys()):
174
- ea = map_a[k]
175
- eb = map_b[k]
176
- if ea["selected"] != eb["selected"]:
177
- # prefer richer options set
178
- opts = ea["options"] if len(ea["options"]) >= len(eb["options"]) else eb["options"]
179
- # choose question text by higher similarity (often identical)
180
- q = ea["question"] if difflib.SequenceMatcher(None, ea["question"], eb["question"]).ratio() >= 0.9 else ea["question"]
181
- mismatches.append({
182
- "question": q,
183
- "eleanor": ea,
184
- "hung": eb,
185
- "options": opts,
186
- })
187
- return mismatches
188
 
189
- def make_display(item: Dict[str, Any], name_a_disp: str, name_b_disp: str) -> str:
190
- q = item["question"]
191
- sel_a = item["eleanor"]["selected"]
192
- sel_b = item["hung"]["selected"]
193
- opts = item["options"]
194
 
195
- a_label = TRAIT_LABELS.get(sel_a, sel_a)
196
- b_label = TRAIT_LABELS.get(sel_b, sel_b)
197
 
198
- a_text = opts.get(sel_a, "")
199
- b_text = opts.get(sel_b, "")
200
 
201
- a_span = f"<span style='background:#e8ffe8;color:#0a6410;font-weight:700;'>{a_label}: {a_text}</span>"
202
- b_span = f"<span style='background:#ffe8e8;color:#a00606;font-weight:700;'>{b_label}: {b_text}</span>"
203
 
204
- body = [
205
- f"### ❓ Question",
206
- q,
207
- "",
208
- f"**{name_a_disp} chose:** {a_span}",
209
- f"**{name_b_disp} chose:** {b_span}",
210
- ]
211
- return "\n\n".join(body)
 
 
 
 
 
 
 
 
212
 
213
- DATA_RAW = (json.loads(Path(DATA_PATH).read_text(encoding='utf-8')) if DATA_PATH.exists() else [])
214
- DATA = flatten_entries(DATA_RAW)
215
 
216
- with gr.Blocks(title="Eleanor vs Hung — Differences Only") as demo:
217
- gr.Markdown("# Eleanor vs Hung — Different Answers Only")
218
- gr.Markdown("Shows only the questions where the two personas chose different options.")
219
 
220
- with gr.Row():
221
- name_a_in = gr.Textbox(value="Eleanor", label="Name A (green)", interactive=True)
222
- name_b_in = gr.Textbox(value="Hung", label="Name B (red)", interactive=True)
223
- st_pos = gr.State(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
  with gr.Row():
226
- prev_btn = gr.Button("Previous")
227
- next_btn = gr.Button("Next")
228
- rand_btn = gr.Button("Random")
229
-
230
- status_md = gr.Markdown()
231
- diff_md = gr.Markdown()
232
-
233
- def recompute(name_a: str, name_b: str):
234
- mismatches = build_mismatch_list(DATA, name_a, name_b)
235
- total = len(mismatches)
236
- if total == 0:
237
- # Provide a hint with available names to help debugging
238
- names = sorted(set(e['name'] for e in DATA))
239
- sample = ", ".join(names[:10]) + (" ..." if len(names) > 10 else "")
240
- return 0, f"**0 differences** for *{name_a}* vs *{name_b}*. Try adjusting names. Examples: {sample}", "_No differences to show._"
241
- md = make_display(mismatches[0], name_a, name_b)
242
- return 0, f"**{total} differences** found for *{name_a}* vs *{name_b}*.", md
243
-
244
- def nav(name_a: str, name_b: str, pos: int, step: int = 0, rand: bool = False):
245
- mismatches = build_mismatch_list(DATA, name_a, name_b)
246
- total = len(mismatches)
247
- if total == 0:
248
- names = sorted(set(e['name'] for e in DATA))
249
- sample = ", ".join(names[:10]) + (" ..." if len(names) > 10 else "")
250
- return pos, f"**0 differences** for *{name_a}* vs *{name_b}*. Try adjusting names. Examples: {sample}", "_No differences to show._"
251
- if rand:
252
- pos = random.randrange(total)
253
- else:
254
- pos = (pos + step) % total
255
- md = make_display(mismatches[pos], name_a, name_b)
256
- return pos, f"**{total} differences** found • Showing {pos+1} / {total}", md
257
-
258
- name_a_in.change(lambda a, b: recompute(a, b), inputs=[name_a_in, name_b_in], outputs=[st_pos, status_md, diff_md])
259
- name_b_in.change(lambda a, b: recompute(a, b), inputs=[name_a_in, name_b_in], outputs=[st_pos, status_md, diff_md])
260
-
261
- prev_btn.click(lambda a, b, p: nav(a, b, p, step=-1), inputs=[name_a_in, name_b_in, st_pos], outputs=[st_pos, status_md, diff_md])
262
- next_btn.click(lambda a, b, p: nav(a, b, p, step=+1), inputs=[name_a_in, name_b_in, st_pos], outputs=[st_pos, status_md, diff_md])
263
- rand_btn.click(lambda a, b, p: nav(a, b, p, rand=True), inputs=[name_a_in, name_b_in, st_pos], outputs=[st_pos, status_md, diff_md])
264
-
265
- demo.load(lambda: recompute("Eleanor", "Hung"), inputs=None, outputs=[st_pos, status_md, diff_md])
266
 
267
  if __name__ == "__main__":
268
  demo.launch()
 
1
+ # sjt_diff_viewer.py
2
+ # Gradio viewer: show ONLY indices where two personas give different answers.
 
3
  # - Loads case_study_answers.json
4
+ # - Extracts ordered list of selected options for each name
5
+ # - Compares by index, displays mismatches with Name A (green) and Name B (red)
 
 
6
 
7
  import json
8
  from pathlib import Path
9
+ from typing import Dict, List, Any, Optional
 
 
 
10
  import gradio as gr
11
 
12
  DATA_PATH = Path("case_study_answers.json")
13
 
14
+ # ---------- Normalization & Labels ----------
15
+
16
+ CANON = ["hh", "emotionality", "extraversion", "agreeableness", "conscientiousness", "openness"]
17
+
18
+ LABELS = {
19
+ "hh": "Honesty–Humility",
 
 
 
 
 
20
  "emotionality": "Emotionality",
21
  "extraversion": "Extraversion",
22
  "agreeableness": "Agreeableness",
 
24
  "openness": "Openness",
25
  }
26
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ def norm_trait(s: Optional[str]) -> str:
29
+ s = (s or "").strip().lower()
 
 
30
  if s.endswith("_option"):
31
  s = s[:-7]
32
  s = s.replace("-", "_").replace(" ", "_")
33
+ # Map common variants
34
+ mapping = {
35
+ "honesty–humility": "hh",
36
+ "honesty-humility": "hh",
37
+ "honesty_humility": "hh",
38
+ "honesty": "hh",
39
+ "openness_to_experience": "openness",
40
+ "openness to experience": "openness",
41
+ }
42
+ return mapping.get(s, s)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
 
 
 
 
 
44
 
45
+ def disp_label(s: str) -> str:
46
+ return LABELS.get(s, s.capitalize())
47
 
 
 
48
 
49
+ # ---------- Data Loading ----------
 
50
 
51
+ def load_data(path: Path) -> Dict[str, List[Dict[str, Any]]]:
52
+ """
53
+ Expected structure (based on your file):
54
+ {
55
+ "Person Name": [
56
+ {"option": "<trait or trait_option>", "question": {...}},
57
+ ...
58
+ ],
59
+ ...
60
+ }
61
+ Returns the raw parsed dict as-is (name -> list of items).
62
+ """
63
+ if not path.exists():
64
+ return {}
65
+ with path.open("r", encoding="utf-8") as f:
66
+ return json.load(f)
67
 
 
 
68
 
69
+ RAW = load_data(DATA_PATH)
 
 
70
 
71
+
72
+ def names_list() -> List[str]:
73
+ return sorted(RAW.keys())
74
+
75
+
76
+ # ---------- Build Ordered Selected Lists ----------
77
+
78
+ def build_selected_list(name: str) -> List[str]:
79
+ items = RAW.get(name, [])
80
+ return [norm_trait((it or {}).get("option")) for it in items if isinstance(it, dict)]
81
+
82
+
83
+ def get_question_text(it: Dict[str, Any]) -> str:
84
+ q = it.get("question") or {}
85
+ if isinstance(q, dict):
86
+ block = q.get("corrected_sjt") or q.get("original_sjt") or {}
87
+ if isinstance(block, dict):
88
+ return str(block.get("question") or q.get("question") or "").strip()
89
+ return str(block or q.get("question") or "").strip()
90
+ return ""
91
+
92
+
93
+ def build_question_list(name: str) -> List[str]:
94
+ items = RAW.get(name, [])
95
+ return [get_question_text(it) for it in items if isinstance(it, dict)]
96
+
97
+
98
+ # ---------- Diff Logic (Index-aligned) ----------
99
+
100
+ def mismatches_by_index(name_a: str, name_b: str):
101
+ sel_a = build_selected_list(name_a)
102
+ sel_b = build_selected_list(name_b)
103
+ qs_a = build_question_list(name_a)
104
+ qs_b = build_question_list(name_b)
105
+
106
+ n = min(len(sel_a), len(sel_b))
107
+ diffs = []
108
+ for i in range(n):
109
+ if sel_a[i] != sel_b[i]:
110
+ diffs.append({
111
+ "idx": i,
112
+ "q_a": qs_a[i] if i < len(qs_a) else "",
113
+ "q_b": qs_b[i] if i < len(qs_b) else "",
114
+ "a": sel_a[i],
115
+ "b": sel_b[i],
116
+ })
117
+ return diffs, len(sel_a), len(sel_b)
118
+
119
+
120
+ def render_diffs(name_a: str, name_b: str) -> str:
121
+ diffs, len_a, len_b = mismatches_by_index(name_a, name_b)
122
+ header = f"**{len(diffs)} differences** (of {min(len_a, len_b)} compared) for **{name_a}** vs **{name_b}**"
123
+ if not diffs:
124
+ return header + "\n\n_No differences._"
125
+
126
+ lines = [header, ""]
127
+ for d in diffs:
128
+ idx = d["idx"]
129
+ qa = d["q_a"]
130
+ qb = d["q_b"]
131
+ # prefer showing a single question line; if different between personas, show A's version
132
+ q_disp = qa or qb
133
+ a_span = f\"\"\"<span style="
134
+ background: # e8ffe8;color:#0a6410;font-weight:700;">{disp_label(d['a'])}</span>\"\"\"
135
+ b_span = f\"\"\"<span style="
136
+ background: # ffe8e8;color:#a00606;font-weight:700;">{disp_label(d['b'])}</span>\"\"\"
137
+ lines.append(f"**{idx:02d}.** {q_disp}")
138
+ lines.append(f"• {name_a}: {a_span}")
139
+ lines.append(f"• {name_b}: {b_span}\n")
140
+ return "\n".join(lines)
141
+
142
+
143
+ # ---------- Gradio App ----------
144
+
145
+ with gr.Blocks(title="Differences by Index — Two Personas") as demo:
146
+ gr.Markdown("# Differences by Index — Two Personas")
147
+ gr.Markdown(
148
+ "This viewer extracts the **ordered list of selected options** per name, then compares two names **by index** and "
149
+ "shows only where they differ. Name A is highlighted **green**, Name B **red**."
150
+ )
151
+
152
+ all_names = names_list()
153
+ default_a = "Eleanor Hagedorn" if "Eleanor Hagedorn" in all_names else (all_names[0] if all_names else "")
154
+ default_b = "Hung Wong" if "Hung Wong" in all_names else (all_names[1] if len(all_names) > 1 else default_a)
155
 
156
  with gr.Row():
157
+ name_a_dd = gr.Dropdown(choices=all_names, value=default_a, label="Name A (green)", interactive=True)
158
+ name_b_dd = gr.Dropdown(choices=all_names, value=default_b, label="Name B (red)", interactive=True)
159
+
160
+ out_md = gr.Markdown()
161
+
162
+
163
+ def on_change(a: str, b: str):
164
+ return render_diffs(a, b)
165
+
166
+
167
+ name_a_dd.change(on_change, inputs=[name_a_dd, name_b_dd], outputs=[out_md])
168
+ name_b_dd.change(on_change, inputs=[name_a_dd, name_b_dd], outputs=[out_md])
169
+
170
+ demo.load(lambda: render_diffs(default_a, default_b), inputs=None, outputs=[out_md])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  if __name__ == "__main__":
173
  demo.launch()