Spaces:

AmnaHassan
/

Activation-Patching

Sleeping

AmnaHassan commited on Nov 21, 2025

Commit

7796c8a

verified ·

1 Parent(s): d240aa6

Create agents.py

Files changed (1) hide show

agents.py ADDED Viewed

+# agents.py
+from typing import List
+class ExperimentAgent:
+def __init__(self, model, db):
+self.model = model
+self.db = db
+def run(self, prompt, experiment_type="story_continuation"):
+generated = self.model.generate_text(prompt)
+layer_scores = self.model.layer_importance(prompt, experiment_type=experiment_type)
+exp_id = self.db.save_experiment(prompt, generated, layer_scores)
+return exp_id, generated, layer_scores
+class ExplanationAgent:
+def __init__(self):
+pass
+def explain_layer_importance(self, layer_scores: List[float]) -> str:
+# Very simple heuristic explanation: report top-k layers and give short natural-lang summary
+import numpy as np
+arr = np.array(layer_scores)
+if arr.size == 0:
+return "No layer scores available."
+top_idx = arr.argsort()[-3:][::-1]
+top_layers = ", ".join([str(int(i)) for i in top_idx])
+summary = f"Top influencing layers (proxy): {top_layers}. Layers with higher scores changed the model's next-token logits the most when ablated. This suggests they strongly affect immediate generation behavior for the provided prompt."
+return summary