Spaces:

TailsResearch
/

psychometrics_case_study_with_questions

Sleeping

App Files Files Community

psychometrics_case_study_with_questions / app.py

amirali1985

fix it.

53b3011 2 months ago

raw

history blame contribute delete

8.68 kB


	# sjt_diff_viewer.py
	# Gradio viewer: show ONLY indices where two personas give different answers.
	# - Loads case_study_answers.json
	# - Extracts ordered list of selected options for each name
	# - Compares by index, displays mismatches with Name A (green) and Name B (red)
	# - For each mismatch, shows the full question text AND all option texts in HEXACO order,
	# highlighting Name A's selection in green and Name B's in red.

	import json
	from pathlib import Path
	from typing import Dict, List, Any, Optional
	import gradio as gr

	DATA_PATH = Path("case_study_answers.json")

	# ---------- Normalization & Labels ----------

	CANON = ["honesty_humility", "emotionality", "extraversion", "agreeableness", "conscientiousness", "openness"]

	LABELS = {
	"honesty_humility": "Honesty–Humility",
	"emotionality": "Emotionality",
	"extraversion": "Extraversion",
	"agreeableness": "Agreeableness",
	"conscientiousness": "Conscientiousness",
	"openness": "Openness",
	}

	# alias map for selected values
	ALIAS_TO_CANON = {
	"hh": "honesty_humility",
	"honesty_humility": "honesty_humility",
	"honesty-humility": "honesty_humility",
	"honesty–humility": "honesty_humility",
	"honesty humility": "honesty_humility",
	"honesty": "honesty_humility",
	"emotionality": "emotionality",
	"extraversion": "extraversion",
	"agreeableness": "agreeableness",
	"conscientiousness": "conscientiousness",
	"openness": "openness",
	"openness to experience": "openness",
	"openness_to_experience": "openness",
	}

	# for extracting option texts, try these aliases as prefixes
	CANON_TO_ALIASES = {
	"honesty_humility": ["honesty_humility", "hh"],
	"emotionality": ["emotionality"],
	"extraversion": ["extraversion"],
	"agreeableness": ["agreeableness"],
	"conscientiousness": ["conscientiousness"],
	"openness": ["openness"],
	}

	def norm_trait(s: Optional[str]) -> Optional[str]:
	s = (s or "").strip().lower()
	if s.endswith("_option"):
	s = s[:-7]
	s = s.replace("-", "_").replace(" ", "_")
	return ALIAS_TO_CANON.get(s, s if s in CANON else None)

	def disp_label(canon: str) -> str:
	return LABELS.get(canon, canon.capitalize())

	# ---------- Data Loading ----------

	def load_data(path: Path) -> Dict[str, List[Dict[str, Any]]]:
	if not path.exists():
	return {}
	with path.open("r", encoding="utf-8") as f:
	return json.load(f)

	RAW = load_data(DATA_PATH)

	def names_list() -> List[str]:
	return sorted(RAW.keys())

	# ---------- Extract question text + options per item ----------

	def get_question_text(it: Dict[str, Any]) -> str:
	q = it.get("question") or {}
	if isinstance(q, dict):
	block = q.get("corrected_sjt") or q.get("original_sjt") or {}
	if isinstance(block, dict):
	return str(block.get("question") or q.get("question") or "").strip()
	return str(block or q.get("question") or "").strip()
	return ""

	def get_options_map(it: Dict[str, Any]) -> Dict[str, str]:
	"""
	Returns dict canonical_trait -> option text, trying both canonical and alias keys
	(e.g., 'honesty_humility_option' and 'hh_option').
	"""
	result: Dict[str, str] = {}
	q = it.get("question") or {}
	block = None
	if isinstance(q, dict):
	block = q.get("corrected_sjt") or q.get("original_sjt")

	for canon in CANON:
	found = None
	aliases = CANON_TO_ALIASES.get(canon, [canon])
	for ali in aliases:
	key = f"{ali}_option"
	# check block first (if dict), then q-level
	if isinstance(block, dict) and key in block and block[key]:
	found = str(block[key]).strip()
	break
	if isinstance(q, dict) and key in q and q[key]:
	found = str(q[key]).strip()
	break
	if found:
	result[canon] = found
	return result

	# ---------- Build ordered entries per name ----------

	def build_entries(name: str) -> List[Dict[str, Any]]:
	items = RAW.get(name, [])
	out: List[Dict[str, Any]] = []
	for it in items:
	if not isinstance(it, dict):
	continue
	selected = norm_trait(it.get("option"))
	q_text = get_question_text(it)
	options = get_options_map(it)
	if selected and q_text and options:
	out.append({"question": q_text, "selected": selected, "options": options})
	return out

	# ---------- Diff Logic (Index-aligned) ----------

	def mismatches_by_index(name_a: str, name_b: str):
	seq_a = build_entries(name_a)
	seq_b = build_entries(name_b)

	n = min(len(seq_a), len(seq_b))
	diffs = []
	for i in range(n):
	if seq_a[i]["selected"] != seq_b[i]["selected"]:
	# prefer a's options; fallback to b's where missing
	opts = {c: (seq_a[i]["options"].get(c) or seq_b[i]["options"].get(c))
	for c in CANON if (seq_a[i]["options"].get(c) or seq_b[i]["options"].get(c))}
	diffs.append({
	"idx": i,
	"question": seq_a[i]["question"] or seq_b[i]["question"],
	"a_sel": seq_a[i]["selected"],
	"b_sel": seq_b[i]["selected"],
	"options": opts,
	})
	return diffs, len(seq_a), len(seq_b)

	def render_options_md(options: Dict[str, str], a_sel: str, b_sel: str, name_a: str, name_b: str) -> str:
	lines: List[str] = []
	for i, canon in enumerate(CANON, start=1):
	txt = options.get(canon)
	if not txt:
	continue
	label = disp_label(canon)
	line = f"{i}. {label}: {txt}"
	# highlight selections
	if canon == a_sel and canon == b_sel:
	# unlikely here (only mismatches), but handle gracefully
	line = (
	f"{i}. <span style='background:#e8ffe8;color:#0a6410;font-weight:700;'>"
	f"{label}: {txt} ( {name_a} )</span> "
	f"<span style='background:#ffe8e8;color:#a00606;font-weight:700;'>"
	f"( {name_b} )</span>"
	)
	elif canon == a_sel:
	line = (
	f"{i}. <span style='background:#e8ffe8;color:#0a6410;font-weight:700;'>"
	f"{label}: {txt} ( {name_a} )</span>"
	)
	elif canon == b_sel:
	line = (
	f"{i}. <span style='background:#ffe8e8;color:#a00606;font-weight:700;'>"
	f"{label}: {txt} ( {name_b} )</span>"
	)
	lines.append(line)
	return "\n\n".join(lines)

	def render_diffs(name_a: str, name_b: str) -> str:
	diffs, len_a, len_b = mismatches_by_index(name_a, name_b)
	header = f"{len(diffs)} differences (of {min(len_a, len_b)} compared) for {name_a} vs {name_b}"
	if not diffs:
	return header + "\n\n_No differences._"

	blocks: List[str] = [header, ""]
	for d in diffs:
	idx = d["idx"]
	q = d["question"]
	opts = d["options"]
	a_sel = d["a_sel"]
	b_sel = d["b_sel"]

	blocks.append(f"### {idx:02d}. Question")
	blocks.append(q if q else "_(no question text found)_")
	blocks.append("")
	blocks.append(render_options_md(opts, a_sel, b_sel, name_a, name_b))
	blocks.append("") # spacing between blocks
	return "\n".join(blocks)

	# ---------- Gradio App ----------

	with gr.Blocks(title="Differences by Index — Two Personas") as demo:
	gr.Markdown("# Differences by Index — Two Personas")
	gr.Markdown(
	"Extracts the ordered list of selected options per name, compares two names by index, and shows only where they differ.\n"
	"Name A is highlighted in green, Name B in red. Full question text and all option texts are shown for each mismatch."
	)

	all_names = names_list()
	default_a = "Eleanor Hagedorn" if "Eleanor Hagedorn" in all_names else (all_names[0] if all_names else "")
	default_b = "Hung Wong" if "Hung Wong" in all_names else (all_names[1] if len(all_names) > 1 else default_a)

	with gr.Row():
	name_a_dd = gr.Dropdown(choices=all_names, value=default_a, label="Name A (green)", interactive=True)
	name_b_dd = gr.Dropdown(choices=all_names, value=default_b, label="Name B (red)", interactive=True)

	out_md = gr.Markdown()

	def on_change(a: str, b: str):
	return render_diffs(a, b)

	name_a_dd.change(on_change, inputs=[name_a_dd, name_b_dd], outputs=[out_md])
	name_b_dd.change(on_change, inputs=[name_a_dd, name_b_dd], outputs=[out_md])

	demo.load(lambda: render_diffs(default_a, default_b), inputs=None, outputs=[out_md])

	if __name__ == "__main__":
	demo.launch()