"""Enhanced mock student agent with PPO-like features: transfer learning, exponential learning curves.""" import random from typing import Dict, List, Set, Optional import numpy as np from interfaces import Task, StudentState, StudentAgentInterface class MockStudentAgent(StudentAgentInterface): """ Enhanced mock student with PPO-like features: - Learning: improves with practice (exponential when guided, linear when random) - Forgetting: Ebbinghaus curve - Per-topic skill tracking - Transfer learning: skills in related topics help each other - Feature representations: abstract concepts that transfer across topics - Exponential learning curve when teacher-guided (coherent curriculum) - Stochastic/erratic learning when random """ def __init__( self, learning_rate: float = 0.15, forgetting_rate: float = 0.01, # Reduced for long training transfer_strength: float = 0.3, # How much skills transfer between topics seed: int = 42, curriculum_coherence: Optional[float] = None # Track if teacher-guided ): """ Initialize enhanced mock student. Args: learning_rate: Base learning rate (0-1) forgetting_rate: How fast retention decays transfer_strength: How much skills transfer (0-1) seed: Random seed curriculum_coherence: Track if following coherent curriculum (auto-detected) """ self.learning_rate = learning_rate self.forgetting_rate = forgetting_rate self.transfer_strength = transfer_strength self.rng = random.Random(seed) # Track per-topic base skill (0.0 to 1.0) self.topic_skills: Dict[str, float] = {} # PPO-like: Feature representations (abstract concepts that transfer) # Groups of related topics share feature representations self.feature_representations: Dict[str, Set[str]] = self._build_feature_groups() # Track history self.topic_attempts: Dict[str, int] = {} self.last_practice_time: Dict[str, float] = {} # Time tracking for forgetting simulation self.current_time = 0.0 self.total_timesteps = 0 # Track curriculum coherence (exponential learning vs stochastic) self.curriculum_coherence = curriculum_coherence self.recent_topics: List[str] = [] # Track recent topic sequence self.recent_topics_window = 5 # Expanded difficulty learning factors (all 7 levels) self.difficulty_factors = { 'trivial': 1.2, # Very easy, learn quickly 'easy': 1.0, # Standard easy 'medium': 0.8, # Moderate 'hard': 0.6, # Challenging 'expert': 0.4, # Very hard (multi-step) 'master': 0.25, # Extremely hard 'grandmaster': 0.15 # Maximum difficulty } # Multi-step penalty: harder difficulties need more practice self.multi_step_penalty = { 'trivial': 0.0, 'easy': 0.0, 'medium': 0.1, 'hard': 0.2, 'expert': 0.3, 'master': 0.4, 'grandmaster': 0.5 } def _build_feature_groups(self) -> Dict[str, Set[str]]: """Build groups of related topics for transfer learning.""" # Group related topics that share underlying concepts return { 'stem_concepts': {'mathematics', 'programming', 'science', 'physics', 'chemistry'}, 'humanities_concepts': {'history', 'literature', 'philosophy', 'art'}, 'social_concepts': {'current_events', 'economics', 'psychology', 'geography'}, 'abstract_reasoning': {'mathematics', 'programming', 'philosophy'}, 'memorization': {'history', 'geography', 'biology', 'chemistry'} } def _get_transfer_boost(self, topic: str) -> float: """ Calculate transfer learning boost from related topics. Returns: Multiplier for learning rate based on related topic skills """ boost = 0.0 # Find which feature groups this topic belongs to for feature_name, topics in self.feature_representations.items(): if topic in topics: # Get average skill from related topics related_skills = [ self.topic_skills.get(t, 0.0) for t in topics if t != topic and t in self.topic_skills ] if related_skills: avg_related_skill = np.mean(related_skills) # Transfer boost proportional to related skills boost += self.transfer_strength * avg_related_skill * 0.5 return min(boost, 0.5) # Cap at 50% boost def _get_curriculum_coherence(self) -> float: """ Detect if student is following coherent curriculum (teacher-guided). Returns: Coherence score (0.0 = random, 1.0 = very coherent) """ if len(self.recent_topics) < 3: return 0.5 # Neutral # Check if topics are related (same feature groups) recent_set = set(self.recent_topics[-3:]) coherence_score = 0.0 for feature_name, topics in self.feature_representations.items(): if recent_set.issubset(topics) or len(recent_set.intersection(topics)) >= 2: coherence_score += 0.3 # Check for progressive difficulty or review patterns if len(self.recent_topics) >= 2: # If topics repeat (review) or progress logically if self.recent_topics[-1] == self.recent_topics[-2]: coherence_score += 0.2 # Review pattern return min(coherence_score, 1.0) def answer(self, task: Task) -> int: """ Answer a task based on effective skill (accounting for forgetting and transfer). Returns: Index of chosen answer (0-3) """ effective_skill = self._get_effective_skill(task.topic) # Probability of correct = 0.25 (random) + 0.75 * effective_skill prob_correct = 0.25 + 0.75 * effective_skill if self.rng.random() < prob_correct: return task.answer else: wrong_answers = [i for i in range(4) if i != task.answer] return self.rng.choice(wrong_answers) def learn(self, task: Task) -> bool: """ Learn from a task with PPO-like features. Features: - Transfer learning: Related topics boost learning - Exponential learning: Coherent curriculum accelerates learning - Multi-step penalty: Harder tasks need more practice - Adaptive learning: Learning rate adjusts based on context Returns: Whether answer was correct """ was_correct = (self.answer(task) == task.answer) topic = task.topic difficulty = task.difficulty # Initialize if new topic if topic not in self.topic_skills: self.topic_skills[topic] = 0.0 self.topic_attempts[topic] = 0 self.last_practice_time[topic] = self.current_time current_base_skill = self.topic_skills[topic] difficulty_factor = self.difficulty_factors.get(difficulty, 0.5) # PPO-like: Transfer learning boost transfer_boost = self._get_transfer_boost(topic) # PPO-like: Curriculum coherence (exponential learning when guided) coherence = self._get_curriculum_coherence() curriculum_multiplier = 1.0 + (coherence * 0.5) # Up to 1.5x with coherent curriculum # Update recent topics for coherence tracking self.recent_topics.append(topic) if len(self.recent_topics) > self.recent_topics_window: self.recent_topics.pop(0) # Learning multiplier based on correctness if was_correct: learning_multiplier = 1.0 else: learning_multiplier = 0.3 # Multi-step penalty for very hard tasks steps = self._get_steps_for_difficulty(difficulty) step_penalty = 1.0 - (self.multi_step_penalty.get(difficulty, 0.0) * steps) # Exponential learning when guided, linear when random if coherence > 0.6: # Teacher-guided (coherent) # Exponential: faster learning as skills accumulate skill_gap = 1.0 - current_base_skill exponential_factor = 1.0 + (current_base_skill * 0.5) # Accelerates with skill else: # Random/progressive (incoherent) # Linear: constant learning rate skill_gap = 1.0 - current_base_skill exponential_factor = 1.0 # No acceleration skill_increase = ( self.learning_rate * difficulty_factor * learning_multiplier * skill_gap * (1.0 + transfer_boost) * # Transfer learning curriculum_multiplier * # Curriculum coherence step_penalty * # Multi-step penalty exponential_factor # Exponential vs linear ) self.topic_skills[topic] = min(1.0, current_base_skill + skill_increase) self.topic_attempts[topic] = self.topic_attempts.get(topic, 0) + 1 self.last_practice_time[topic] = self.current_time self.total_timesteps += 1 return was_correct def _get_steps_for_difficulty(self, difficulty: str) -> int: """Determine number of reasoning steps for a difficulty level.""" step_map = { 'trivial': 1, 'easy': 1, 'medium': 2, 'hard': 3, 'expert': 4, 'master': 5, 'grandmaster': 6 } return step_map.get(difficulty, 1) def _get_effective_skill(self, topic: str) -> float: """ Get effective skill accounting for forgetting (Ebbinghaus curve). Formula: effective_skill = base_skill * retention retention = exp(-forgetting_rate * time_since_practice) """ if topic not in self.topic_skills: return 0.0 base_skill = self.topic_skills[topic] time_since = self.current_time - self.last_practice_time.get(topic, self.current_time) # Ebbinghaus forgetting curve retention = np.exp(-self.forgetting_rate * time_since) # Effective skill = base skill reduced by forgetting effective_skill = base_skill * retention return max(0.0, min(1.0, effective_skill)) def evaluate(self, eval_tasks: List[Task]) -> float: """ Evaluate student on a list of tasks. Returns: Accuracy (0.0 to 1.0) """ if not eval_tasks: return 0.0 correct = 0 for task in eval_tasks: answer = self.answer(task) if answer == task.answer: correct += 1 return correct / len(eval_tasks) def get_state(self) -> StudentState: """Get current student state.""" topic_accuracies = {} for topic in self.topic_skills.keys(): effective_skill = self._get_effective_skill(topic) topic_accuracies[topic] = 0.25 + 0.75 * effective_skill time_since_practice = {} for topic in self.last_practice_time: time_since_practice[topic] = self.current_time - self.last_practice_time[topic] return StudentState( topic_accuracies=topic_accuracies, topic_attempts=self.topic_attempts.copy(), time_since_practice=time_since_practice, total_timesteps=self.total_timesteps, current_time=self.current_time ) def advance_time(self, delta: float = 1.0): """Advance time for forgetting simulation.""" self.current_time += delta