"""Unit tests for Teacher Agent system.""" import sys from pathlib import Path # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent)) from mock_student import MockStudentAgent from mock_task_generator import MockTaskGenerator from teacher_agent import TeacherAgent from interfaces import TeacherAction def test_mock_student_learning(): """Test that mock student learns.""" print("Testing student learning...", end=" ") student = MockStudentAgent(learning_rate=0.15, forgetting_rate=0.05) generator = MockTaskGenerator() # Test learning topic = 'history' tasks = [generator.generate_task(topic, 'easy') for _ in range(20)] accuracies = [] for task in tasks: eval_tasks = [generator.generate_task(topic, 'easy') for _ in range(10)] acc = student.evaluate(eval_tasks) accuracies.append(acc) student.learn(task) # Student should improve improvement = accuracies[-1] - accuracies[0] assert improvement > 0.1, f"Student should improve! Improvement: {improvement:.3f}" print("✅ PASSED") print(f" Initial accuracy: {accuracies[0]:.3f}") print(f" Final accuracy: {accuracies[-1]:.3f}") print(f" Improvement: {improvement:.3f}") def test_mock_student_forgetting(): """Test that mock student forgets over time.""" print("Testing student forgetting...", end=" ") student = MockStudentAgent(learning_rate=0.15, forgetting_rate=0.1) generator = MockTaskGenerator() # Train on one topic topic = 'science' for _ in range(30): task = generator.generate_task(topic, 'easy') student.learn(task) # Measure accuracy eval_tasks = [generator.generate_task(topic, 'easy') for _ in range(10)] acc_before = student.evaluate(eval_tasks) # Time passes without practice student.advance_time(50.0) acc_after = student.evaluate(eval_tasks) # Student should forget assert acc_after < acc_before - 0.05, f"Student should forget! Before: {acc_before:.3f}, After: {acc_after:.3f}" print("✅ PASSED") print(f" Accuracy before forgetting: {acc_before:.3f}") print(f" Accuracy after 50 time units: {acc_after:.3f}") print(f" Forgetting: {acc_before - acc_after:.3f}") def test_mock_student_initial_accuracy(): """Test that student starts at ~25% accuracy (random guessing).""" print("Testing initial student accuracy...", end=" ") student = MockStudentAgent() generator = MockTaskGenerator() # Evaluate on many tasks eval_tasks = [generator.generate_task('history', 'easy') for _ in range(100)] initial_acc = student.evaluate(eval_tasks) # Should be around 25% (random guessing on 4-choice MCQ) assert 0.15 < initial_acc < 0.35, f"Initial accuracy should be ~25%! Got: {initial_acc:.3f}" print("✅ PASSED") print(f" Initial accuracy: {initial_acc:.3f} (~25% expected)") def test_teacher_exploration(): """Test that teacher explores all actions.""" print("Testing teacher exploration...", end=" ") teacher = TeacherAgent(exploration_bonus=5.0) # High exploration from mock_student import MockStudentAgent from interfaces import StudentState # Create minimal student state student = MockStudentAgent() actions_tried = set() for _ in range(100): student_state = student.get_state() action = teacher.select_action(student_state) actions_tried.add((action.topic, action.difficulty, action.is_review)) teacher.update(action, 0.0) # Neutral reward # Teacher should explore many actions (now has 15 topics × 7 difficulties × 2 = 210 actions) expected_actions = 15 * 7 * 2 # topics × difficulties × review options assert len(actions_tried) > 20, f"Teacher should explore many actions! Only tried: {len(actions_tried)}" print("✅ PASSED") print(f" Unique actions tried: {len(actions_tried)}/{expected_actions}") def test_teacher_exploitation(): """Test that teacher exploits good actions.""" print("Testing teacher exploitation...", end=" ") teacher = TeacherAgent(exploration_bonus=0.1) # Very low exploration from mock_student import MockStudentAgent student = MockStudentAgent() # Manually set one action to be very good best_action = TeacherAction(topic='history', difficulty='easy', is_review=False) best_action_idx = teacher._action_to_index(best_action) # First, try all actions once (cold start) for i in range(teacher.num_actions): test_action = teacher._index_to_action(i) if i == best_action_idx: teacher.update(test_action, 100.0) # Very high reward else: teacher.update(test_action, 0.0) # Low reward # Now teacher should prefer the best action selections = [] for _ in range(50): # More samples for better statistics student_state = student.get_state() action = teacher.select_action(student_state) idx = teacher._action_to_index(action) selections.append(idx == best_action_idx) # Should select best action frequently exploit_rate = sum(selections) / len(selections) assert exploit_rate > 0.3, f"Teacher should exploit good actions! Exploit rate: {exploit_rate:.2f}" print("✅ PASSED") print(f" Best action selection rate: {exploit_rate:.2f}") def test_teacher_action_encoding(): """Test that action encoding/decoding works correctly.""" print("Testing action encoding/decoding...", end=" ") teacher = TeacherAgent() # Test all actions for idx in range(teacher.num_actions): action1 = teacher._index_to_action(idx) idx2 = teacher._action_to_index(action1) action2 = teacher._index_to_action(idx2) assert idx == idx2, f"Encoding mismatch! {idx} != {idx2}" assert action1.topic == action2.topic, "Topic mismatch" assert action1.difficulty == action2.difficulty, "Difficulty mismatch" assert action1.is_review == action2.is_review, "Review flag mismatch" print("✅ PASSED") print(f" Tested {teacher.num_actions} actions") def test_task_generator(): """Test that task generator creates valid tasks.""" print("Testing task generator...", end=" ") generator = MockTaskGenerator() topics = generator.get_available_topics() difficulties = generator.get_available_difficulties() # Check that we have topics and difficulties (exact count may vary after expansion) assert len(topics) >= 5, f"Should have at least 5 topics, got {len(topics)}" assert len(difficulties) >= 3, f"Should have at least 3 difficulties, got {len(difficulties)}" # Generate tasks for all combinations for topic in topics: for difficulty in difficulties: task = generator.generate_task(topic, difficulty) assert len(task.choices) == 4, "Should have 4 choices" assert 0 <= task.answer < 4, "Answer should be valid index" assert task.topic == topic, "Topic should match" assert task.difficulty == difficulty, "Difficulty should match" print("✅ PASSED") print(f" Generated tasks for {len(topics)} topics × {len(difficulties)} difficulties") def run_all_tests(): """Run all tests.""" print("=" * 70) print("RUNNING TESTS") print("=" * 70) print() tests = [ test_task_generator, test_mock_student_initial_accuracy, test_mock_student_learning, test_mock_student_forgetting, test_teacher_action_encoding, test_teacher_exploration, test_teacher_exploitation, ] passed = 0 failed = 0 for test_func in tests: try: test_func() passed += 1 except AssertionError as e: print(f"❌ FAILED: {e}") failed += 1 except Exception as e: print(f"❌ ERROR: {e}") import traceback traceback.print_exc() failed += 1 print() print("=" * 70) print(f"TESTS COMPLETE: {passed} passed, {failed} failed") print("=" * 70) return failed == 0 if __name__ == "__main__": success = run_all_tests() sys.exit(0 if success else 1)