MentorFlow / student_agent_dev /student_metrics.py
Cornelius
Deploy MentorFlow with GPU support
a52f96d
raw
history blame
3.87 kB
"""
Comprehensive metrics tracking for student learning.
Tracks overall accuracy, per-topic performance, retention, and efficiency metrics.
"""
from dataclasses import dataclass, field
from typing import List, Dict
import numpy as np
from collections import defaultdict
@dataclass
class StudentMetrics:
"""Comprehensive metrics for student learning."""
# Time series data
iterations: List[int] = field(default_factory=list)
overall_accuracies: List[float] = field(default_factory=list)
per_topic_accuracies: Dict[str, List[float]] = field(default_factory=lambda: defaultdict(list))
# Per-iteration details
tasks_seen: List[str] = field(default_factory=list) # task_id
topics_seen: List[str] = field(default_factory=list)
difficulties_seen: List[str] = field(default_factory=list)
was_correct: List[bool] = field(default_factory=list)
# Retention tracking
retention_factors: Dict[str, List[float]] = field(default_factory=lambda: defaultdict(list))
# Learning efficiency
tasks_to_mastery: Dict[str, int] = field(default_factory=dict) # topic -> num tasks
def log_iteration(
self,
iteration: int,
overall_acc: float,
topic_accs: Dict[str, float],
task: 'Task',
correct: bool,
retention_factors: Dict[str, float]
):
"""Log a single training iteration."""
self.iterations.append(iteration)
self.overall_accuracies.append(overall_acc)
for topic, acc in topic_accs.items():
self.per_topic_accuracies[topic].append(acc)
self.tasks_seen.append(task.task_id)
self.topics_seen.append(task.topic)
self.difficulties_seen.append(task.difficulty)
self.was_correct.append(correct)
for topic, retention in retention_factors.items():
self.retention_factors[topic].append(retention)
def compute_learning_rate(self, window: int = 50) -> float:
"""Compute average improvement per task (last N tasks)."""
if len(self.overall_accuracies) < window:
return 0.0
recent_accs = self.overall_accuracies[-window:]
improvements = np.diff(recent_accs)
return np.mean(improvements)
def compute_sample_efficiency(self, target_accuracy: float = 0.7) -> int:
"""Number of tasks needed to reach target accuracy."""
for i, acc in enumerate(self.overall_accuracies):
if acc >= target_accuracy:
return i
return len(self.overall_accuracies) # Not reached yet
def compute_topic_mastery_times(self, mastery_threshold: float = 0.8) -> Dict[str, int]:
"""Tasks needed to master each topic."""
mastery_times = {}
for topic, accs in self.per_topic_accuracies.items():
for i, acc in enumerate(accs):
if acc >= mastery_threshold:
mastery_times[topic] = i
break
return mastery_times
def get_summary_statistics(self) -> Dict:
"""Get overall summary statistics."""
return {
'total_tasks': len(self.iterations),
'final_accuracy': self.overall_accuracies[-1] if self.overall_accuracies else 0.0,
'max_accuracy': max(self.overall_accuracies) if self.overall_accuracies else 0.0,
'mean_accuracy': np.mean(self.overall_accuracies) if self.overall_accuracies else 0.0,
'learning_rate': self.compute_learning_rate(),
'sample_efficiency_70': self.compute_sample_efficiency(0.7),
'sample_efficiency_80': self.compute_sample_efficiency(0.8),
'topics_practiced': len(self.per_topic_accuracies),
'topic_mastery_times': self.compute_topic_mastery_times()
}