Spaces:

iteratehack
/

MentorFlow

Paused

MentorFlow / teacher_agent_dev /mock_student.py

Cornelius

Deploy MentorFlow with GPU support

a52f96d 12 days ago

12.3 kB

	"""Enhanced mock student agent with PPO-like features: transfer learning, exponential learning curves."""

	import random
	from typing import Dict, List, Set, Optional
	import numpy as np
	from interfaces import Task, StudentState, StudentAgentInterface


	class MockStudentAgent(StudentAgentInterface):
	"""
	Enhanced mock student with PPO-like features:
	- Learning: improves with practice (exponential when guided, linear when random)
	- Forgetting: Ebbinghaus curve
	- Per-topic skill tracking
	- Transfer learning: skills in related topics help each other
	- Feature representations: abstract concepts that transfer across topics
	- Exponential learning curve when teacher-guided (coherent curriculum)
	- Stochastic/erratic learning when random
	"""

	def __init__(
	self,
	learning_rate: float = 0.15,
	forgetting_rate: float = 0.01, # Reduced for long training
	transfer_strength: float = 0.3, # How much skills transfer between topics
	seed: int = 42,
	curriculum_coherence: Optional[float] = None # Track if teacher-guided
	):
	"""
	Initialize enhanced mock student.

	Args:
	learning_rate: Base learning rate (0-1)
	forgetting_rate: How fast retention decays
	transfer_strength: How much skills transfer (0-1)
	seed: Random seed
	curriculum_coherence: Track if following coherent curriculum (auto-detected)
	"""
	self.learning_rate = learning_rate
	self.forgetting_rate = forgetting_rate
	self.transfer_strength = transfer_strength
	self.rng = random.Random(seed)

	# Track per-topic base skill (0.0 to 1.0)
	self.topic_skills: Dict[str, float] = {}

	# PPO-like: Feature representations (abstract concepts that transfer)
	# Groups of related topics share feature representations
	self.feature_representations: Dict[str, Set[str]] = self._build_feature_groups()

	# Track history
	self.topic_attempts: Dict[str, int] = {}
	self.last_practice_time: Dict[str, float] = {}

	# Time tracking for forgetting simulation
	self.current_time = 0.0
	self.total_timesteps = 0

	# Track curriculum coherence (exponential learning vs stochastic)
	self.curriculum_coherence = curriculum_coherence
	self.recent_topics: List[str] = [] # Track recent topic sequence
	self.recent_topics_window = 5

	# Expanded difficulty learning factors (all 7 levels)
	self.difficulty_factors = {
	'trivial': 1.2, # Very easy, learn quickly
	'easy': 1.0, # Standard easy
	'medium': 0.8, # Moderate
	'hard': 0.6, # Challenging
	'expert': 0.4, # Very hard (multi-step)
	'master': 0.25, # Extremely hard
	'grandmaster': 0.15 # Maximum difficulty
	}

	# Multi-step penalty: harder difficulties need more practice
	self.multi_step_penalty = {
	'trivial': 0.0,
	'easy': 0.0,
	'medium': 0.1,
	'hard': 0.2,
	'expert': 0.3,
	'master': 0.4,
	'grandmaster': 0.5
	}

	def _build_feature_groups(self) -> Dict[str, Set[str]]:
	"""Build groups of related topics for transfer learning."""
	# Group related topics that share underlying concepts
	return {
	'stem_concepts': {'mathematics', 'programming', 'science', 'physics', 'chemistry'},
	'humanities_concepts': {'history', 'literature', 'philosophy', 'art'},
	'social_concepts': {'current_events', 'economics', 'psychology', 'geography'},
	'abstract_reasoning': {'mathematics', 'programming', 'philosophy'},
	'memorization': {'history', 'geography', 'biology', 'chemistry'}
	}

	def _get_transfer_boost(self, topic: str) -> float:
	"""
	Calculate transfer learning boost from related topics.

	Returns:
	Multiplier for learning rate based on related topic skills
	"""
	boost = 0.0

	# Find which feature groups this topic belongs to
	for feature_name, topics in self.feature_representations.items():
	if topic in topics:
	# Get average skill from related topics
	related_skills = [
	self.topic_skills.get(t, 0.0)
	for t in topics
	if t != topic and t in self.topic_skills
	]
	if related_skills:
	avg_related_skill = np.mean(related_skills)
	# Transfer boost proportional to related skills
	boost += self.transfer_strength * avg_related_skill * 0.5

	return min(boost, 0.5) # Cap at 50% boost

	def _get_curriculum_coherence(self) -> float:
	"""
	Detect if student is following coherent curriculum (teacher-guided).

	Returns:
	Coherence score (0.0 = random, 1.0 = very coherent)
	"""
	if len(self.recent_topics) < 3:
	return 0.5 # Neutral

	# Check if topics are related (same feature groups)
	recent_set = set(self.recent_topics[-3:])
	coherence_score = 0.0

	for feature_name, topics in self.feature_representations.items():
	if recent_set.issubset(topics) or len(recent_set.intersection(topics)) >= 2:
	coherence_score += 0.3

	# Check for progressive difficulty or review patterns
	if len(self.recent_topics) >= 2:
	# If topics repeat (review) or progress logically
	if self.recent_topics[-1] == self.recent_topics[-2]:
	coherence_score += 0.2 # Review pattern

	return min(coherence_score, 1.0)

	def answer(self, task: Task) -> int:
	"""
	Answer a task based on effective skill (accounting for forgetting and transfer).

	Returns:
	Index of chosen answer (0-3)
	"""
	effective_skill = self._get_effective_skill(task.topic)

	# Probability of correct = 0.25 (random) + 0.75 * effective_skill
	prob_correct = 0.25 + 0.75 * effective_skill

	if self.rng.random() < prob_correct:
	return task.answer
	else:
	wrong_answers = [i for i in range(4) if i != task.answer]
	return self.rng.choice(wrong_answers)

	def learn(self, task: Task) -> bool:
	"""
	Learn from a task with PPO-like features.

	Features:
	- Transfer learning: Related topics boost learning
	- Exponential learning: Coherent curriculum accelerates learning
	- Multi-step penalty: Harder tasks need more practice
	- Adaptive learning: Learning rate adjusts based on context

	Returns:
	Whether answer was correct
	"""
	was_correct = (self.answer(task) == task.answer)

	topic = task.topic
	difficulty = task.difficulty

	# Initialize if new topic
	if topic not in self.topic_skills:
	self.topic_skills[topic] = 0.0
	self.topic_attempts[topic] = 0
	self.last_practice_time[topic] = self.current_time

	current_base_skill = self.topic_skills[topic]
	difficulty_factor = self.difficulty_factors.get(difficulty, 0.5)

	# PPO-like: Transfer learning boost
	transfer_boost = self._get_transfer_boost(topic)

	# PPO-like: Curriculum coherence (exponential learning when guided)
	coherence = self._get_curriculum_coherence()
	curriculum_multiplier = 1.0 + (coherence * 0.5) # Up to 1.5x with coherent curriculum

	# Update recent topics for coherence tracking
	self.recent_topics.append(topic)
	if len(self.recent_topics) > self.recent_topics_window:
	self.recent_topics.pop(0)

	# Learning multiplier based on correctness
	if was_correct:
	learning_multiplier = 1.0
	else:
	learning_multiplier = 0.3

	# Multi-step penalty for very hard tasks
	steps = self._get_steps_for_difficulty(difficulty)
	step_penalty = 1.0 - (self.multi_step_penalty.get(difficulty, 0.0) * steps)

	# Exponential learning when guided, linear when random
	if coherence > 0.6: # Teacher-guided (coherent)
	# Exponential: faster learning as skills accumulate
	skill_gap = 1.0 - current_base_skill
	exponential_factor = 1.0 + (current_base_skill * 0.5) # Accelerates with skill
	else: # Random/progressive (incoherent)
	# Linear: constant learning rate
	skill_gap = 1.0 - current_base_skill
	exponential_factor = 1.0 # No acceleration

	skill_increase = (
	self.learning_rate *
	difficulty_factor *
	learning_multiplier *
	skill_gap *
	(1.0 + transfer_boost) * # Transfer learning
	curriculum_multiplier * # Curriculum coherence
	step_penalty * # Multi-step penalty
	exponential_factor # Exponential vs linear
	)

	self.topic_skills[topic] = min(1.0, current_base_skill + skill_increase)
	self.topic_attempts[topic] = self.topic_attempts.get(topic, 0) + 1
	self.last_practice_time[topic] = self.current_time
	self.total_timesteps += 1

	return was_correct

	def _get_steps_for_difficulty(self, difficulty: str) -> int:
	"""Determine number of reasoning steps for a difficulty level."""
	step_map = {
	'trivial': 1,
	'easy': 1,
	'medium': 2,
	'hard': 3,
	'expert': 4,
	'master': 5,
	'grandmaster': 6
	}
	return step_map.get(difficulty, 1)

	def _get_effective_skill(self, topic: str) -> float:
	"""
	Get effective skill accounting for forgetting (Ebbinghaus curve).

	Formula: effective_skill = base_skill * retention
	retention = exp(-forgetting_rate * time_since_practice)
	"""
	if topic not in self.topic_skills:
	return 0.0

	base_skill = self.topic_skills[topic]
	time_since = self.current_time - self.last_practice_time.get(topic, self.current_time)

	# Ebbinghaus forgetting curve
	retention = np.exp(-self.forgetting_rate * time_since)

	# Effective skill = base skill reduced by forgetting
	effective_skill = base_skill * retention

	return max(0.0, min(1.0, effective_skill))

	def evaluate(self, eval_tasks: List[Task]) -> float:
	"""
	Evaluate student on a list of tasks.

	Returns:
	Accuracy (0.0 to 1.0)
	"""
	if not eval_tasks:
	return 0.0

	correct = 0
	for task in eval_tasks:
	answer = self.answer(task)
	if answer == task.answer:
	correct += 1

	return correct / len(eval_tasks)

	def get_state(self) -> StudentState:
	"""Get current student state."""
	topic_accuracies = {}
	for topic in self.topic_skills.keys():
	effective_skill = self._get_effective_skill(topic)
	topic_accuracies[topic] = 0.25 + 0.75 * effective_skill

	time_since_practice = {}
	for topic in self.last_practice_time:
	time_since_practice[topic] = self.current_time - self.last_practice_time[topic]

	return StudentState(
	topic_accuracies=topic_accuracies,
	topic_attempts=self.topic_attempts.copy(),
	time_since_practice=time_since_practice,
	total_timesteps=self.total_timesteps,
	current_time=self.current_time
	)

	def advance_time(self, delta: float = 1.0):
	"""Advance time for forgetting simulation."""
	self.current_time += delta