Spaces:

iteratehack
/

MentorFlow

Paused

File size: 12,331 Bytes

a52f96d

"""Enhanced mock student agent with PPO-like features: transfer learning, exponential learning curves."""

import random
from typing import Dict, List, Set, Optional
import numpy as np
from interfaces import Task, StudentState, StudentAgentInterface


class MockStudentAgent(StudentAgentInterface):
    """
    Enhanced mock student with PPO-like features:
    - Learning: improves with practice (exponential when guided, linear when random)
    - Forgetting: Ebbinghaus curve
    - Per-topic skill tracking
    - Transfer learning: skills in related topics help each other
    - Feature representations: abstract concepts that transfer across topics
    - Exponential learning curve when teacher-guided (coherent curriculum)
    - Stochastic/erratic learning when random
    """
    
    def __init__(
        self, 
        learning_rate: float = 0.15, 
        forgetting_rate: float = 0.01,  # Reduced for long training
        transfer_strength: float = 0.3,  # How much skills transfer between topics
        seed: int = 42,
        curriculum_coherence: Optional[float] = None  # Track if teacher-guided
    ):
        """
        Initialize enhanced mock student.
        
        Args:
            learning_rate: Base learning rate (0-1)
            forgetting_rate: How fast retention decays
            transfer_strength: How much skills transfer (0-1)
            seed: Random seed
            curriculum_coherence: Track if following coherent curriculum (auto-detected)
        """
        self.learning_rate = learning_rate
        self.forgetting_rate = forgetting_rate
        self.transfer_strength = transfer_strength
        self.rng = random.Random(seed)
        
        # Track per-topic base skill (0.0 to 1.0)
        self.topic_skills: Dict[str, float] = {}
        
        # PPO-like: Feature representations (abstract concepts that transfer)
        # Groups of related topics share feature representations
        self.feature_representations: Dict[str, Set[str]] = self._build_feature_groups()
        
        # Track history
        self.topic_attempts: Dict[str, int] = {}
        self.last_practice_time: Dict[str, float] = {}
        
        # Time tracking for forgetting simulation
        self.current_time = 0.0
        self.total_timesteps = 0
        
        # Track curriculum coherence (exponential learning vs stochastic)
        self.curriculum_coherence = curriculum_coherence
        self.recent_topics: List[str] = []  # Track recent topic sequence
        self.recent_topics_window = 5
        
        # Expanded difficulty learning factors (all 7 levels)
        self.difficulty_factors = {
            'trivial': 1.2,      # Very easy, learn quickly
            'easy': 1.0,         # Standard easy
            'medium': 0.8,       # Moderate
            'hard': 0.6,         # Challenging
            'expert': 0.4,       # Very hard (multi-step)
            'master': 0.25,      # Extremely hard
            'grandmaster': 0.15  # Maximum difficulty
        }
        
        # Multi-step penalty: harder difficulties need more practice
        self.multi_step_penalty = {
            'trivial': 0.0,
            'easy': 0.0,
            'medium': 0.1,
            'hard': 0.2,
            'expert': 0.3,
            'master': 0.4,
            'grandmaster': 0.5
        }
    
    def _build_feature_groups(self) -> Dict[str, Set[str]]:
        """Build groups of related topics for transfer learning."""
        # Group related topics that share underlying concepts
        return {
            'stem_concepts': {'mathematics', 'programming', 'science', 'physics', 'chemistry'},
            'humanities_concepts': {'history', 'literature', 'philosophy', 'art'},
            'social_concepts': {'current_events', 'economics', 'psychology', 'geography'},
            'abstract_reasoning': {'mathematics', 'programming', 'philosophy'},
            'memorization': {'history', 'geography', 'biology', 'chemistry'}
        }
    
    def _get_transfer_boost(self, topic: str) -> float:
        """
        Calculate transfer learning boost from related topics.
        
        Returns:
            Multiplier for learning rate based on related topic skills
        """
        boost = 0.0
        
        # Find which feature groups this topic belongs to
        for feature_name, topics in self.feature_representations.items():
            if topic in topics:
                # Get average skill from related topics
                related_skills = [
                    self.topic_skills.get(t, 0.0)
                    for t in topics
                    if t != topic and t in self.topic_skills
                ]
                if related_skills:
                    avg_related_skill = np.mean(related_skills)
                    # Transfer boost proportional to related skills
                    boost += self.transfer_strength * avg_related_skill * 0.5
        
        return min(boost, 0.5)  # Cap at 50% boost
    
    def _get_curriculum_coherence(self) -> float:
        """
        Detect if student is following coherent curriculum (teacher-guided).
        
        Returns:
            Coherence score (0.0 = random, 1.0 = very coherent)
        """
        if len(self.recent_topics) < 3:
            return 0.5  # Neutral
        
        # Check if topics are related (same feature groups)
        recent_set = set(self.recent_topics[-3:])
        coherence_score = 0.0
        
        for feature_name, topics in self.feature_representations.items():
            if recent_set.issubset(topics) or len(recent_set.intersection(topics)) >= 2:
                coherence_score += 0.3
        
        # Check for progressive difficulty or review patterns
        if len(self.recent_topics) >= 2:
            # If topics repeat (review) or progress logically
            if self.recent_topics[-1] == self.recent_topics[-2]:
                coherence_score += 0.2  # Review pattern
        
        return min(coherence_score, 1.0)
    
    def answer(self, task: Task) -> int:
        """
        Answer a task based on effective skill (accounting for forgetting and transfer).
        
        Returns:
            Index of chosen answer (0-3)
        """
        effective_skill = self._get_effective_skill(task.topic)
        
        # Probability of correct = 0.25 (random) + 0.75 * effective_skill
        prob_correct = 0.25 + 0.75 * effective_skill
        
        if self.rng.random() < prob_correct:
            return task.answer
        else:
            wrong_answers = [i for i in range(4) if i != task.answer]
            return self.rng.choice(wrong_answers)
    
    def learn(self, task: Task) -> bool:
        """
        Learn from a task with PPO-like features.
        
        Features:
        - Transfer learning: Related topics boost learning
        - Exponential learning: Coherent curriculum accelerates learning
        - Multi-step penalty: Harder tasks need more practice
        - Adaptive learning: Learning rate adjusts based on context
        
        Returns:
            Whether answer was correct
        """
        was_correct = (self.answer(task) == task.answer)
        
        topic = task.topic
        difficulty = task.difficulty
        
        # Initialize if new topic
        if topic not in self.topic_skills:
            self.topic_skills[topic] = 0.0
            self.topic_attempts[topic] = 0
            self.last_practice_time[topic] = self.current_time
        
        current_base_skill = self.topic_skills[topic]
        difficulty_factor = self.difficulty_factors.get(difficulty, 0.5)
        
        # PPO-like: Transfer learning boost
        transfer_boost = self._get_transfer_boost(topic)
        
        # PPO-like: Curriculum coherence (exponential learning when guided)
        coherence = self._get_curriculum_coherence()
        curriculum_multiplier = 1.0 + (coherence * 0.5)  # Up to 1.5x with coherent curriculum
        
        # Update recent topics for coherence tracking
        self.recent_topics.append(topic)
        if len(self.recent_topics) > self.recent_topics_window:
            self.recent_topics.pop(0)
        
        # Learning multiplier based on correctness
        if was_correct:
            learning_multiplier = 1.0
        else:
            learning_multiplier = 0.3
        
        # Multi-step penalty for very hard tasks
        steps = self._get_steps_for_difficulty(difficulty)
        step_penalty = 1.0 - (self.multi_step_penalty.get(difficulty, 0.0) * steps)
        
        # Exponential learning when guided, linear when random
        if coherence > 0.6:  # Teacher-guided (coherent)
            # Exponential: faster learning as skills accumulate
            skill_gap = 1.0 - current_base_skill
            exponential_factor = 1.0 + (current_base_skill * 0.5)  # Accelerates with skill
        else:  # Random/progressive (incoherent)
            # Linear: constant learning rate
            skill_gap = 1.0 - current_base_skill
            exponential_factor = 1.0  # No acceleration
        
        skill_increase = (
            self.learning_rate * 
            difficulty_factor * 
            learning_multiplier * 
            skill_gap *
            (1.0 + transfer_boost) *  # Transfer learning
            curriculum_multiplier *  # Curriculum coherence
            step_penalty *  # Multi-step penalty
            exponential_factor  # Exponential vs linear
        )
        
        self.topic_skills[topic] = min(1.0, current_base_skill + skill_increase)
        self.topic_attempts[topic] = self.topic_attempts.get(topic, 0) + 1
        self.last_practice_time[topic] = self.current_time
        self.total_timesteps += 1
        
        return was_correct
    
    def _get_steps_for_difficulty(self, difficulty: str) -> int:
        """Determine number of reasoning steps for a difficulty level."""
        step_map = {
            'trivial': 1,
            'easy': 1,
            'medium': 2,
            'hard': 3,
            'expert': 4,
            'master': 5,
            'grandmaster': 6
        }
        return step_map.get(difficulty, 1)
    
    def _get_effective_skill(self, topic: str) -> float:
        """
        Get effective skill accounting for forgetting (Ebbinghaus curve).
        
        Formula: effective_skill = base_skill * retention
        retention = exp(-forgetting_rate * time_since_practice)
        """
        if topic not in self.topic_skills:
            return 0.0
        
        base_skill = self.topic_skills[topic]
        time_since = self.current_time - self.last_practice_time.get(topic, self.current_time)
        
        # Ebbinghaus forgetting curve
        retention = np.exp(-self.forgetting_rate * time_since)
        
        # Effective skill = base skill reduced by forgetting
        effective_skill = base_skill * retention
        
        return max(0.0, min(1.0, effective_skill))
    
    def evaluate(self, eval_tasks: List[Task]) -> float:
        """
        Evaluate student on a list of tasks.
        
        Returns:
            Accuracy (0.0 to 1.0)
        """
        if not eval_tasks:
            return 0.0
        
        correct = 0
        for task in eval_tasks:
            answer = self.answer(task)
            if answer == task.answer:
                correct += 1
        
        return correct / len(eval_tasks)
    
    def get_state(self) -> StudentState:
        """Get current student state."""
        topic_accuracies = {}
        for topic in self.topic_skills.keys():
            effective_skill = self._get_effective_skill(topic)
            topic_accuracies[topic] = 0.25 + 0.75 * effective_skill
        
        time_since_practice = {}
        for topic in self.last_practice_time:
            time_since_practice[topic] = self.current_time - self.last_practice_time[topic]
        
        return StudentState(
            topic_accuracies=topic_accuracies,
            topic_attempts=self.topic_attempts.copy(),
            time_since_practice=time_since_practice,
            total_timesteps=self.total_timesteps,
            current_time=self.current_time
        )
    
    def advance_time(self, delta: float = 1.0):
        """Advance time for forgetting simulation."""
        self.current_time += delta