cuga-agent / tests /unit /test_plan_controller_prompt.py
Sami Marreed
feat: docker-v1 with optimized frontend
6bd9812
import pytest
from jinja2 import Template
from pathlib import Path
from cuga.backend.cuga_graph.state.agent_state import SubTaskHistory
@pytest.fixture
def user_template():
"""Load the actual user.jinja2 template"""
template_path = (
Path(__file__).parent.parent.parent
/ "src"
/ "cuga"
/ "backend"
/ "cuga_graph"
/ "nodes"
/ "task_decomposition_planning"
/ "plan_controller_agent"
/ "prompts"
/ "user.jinja2"
)
with open(template_path, 'r') as f:
return Template(f.read())
class TestPlanControllerPrompt:
"""Test the plan controller user prompt template rendering"""
def test_stm_history_with_final_answer(self, user_template):
"""Test rendering of stm_all_history with final_answer present"""
context = {
'stm_all_history': [
SubTaskHistory(
sub_task='Read emails from contacts.txt',
steps=['Opened file', 'Extracted 7 emails'],
final_answer='Successfully read 7 email addresses',
)
],
'variables_history': 'No variables',
'url': 'https://example.com',
'input': 'Test task',
'task_decomposition': ['Task 1', 'Task 2'],
'current_datetime': '2025-12-12',
'sub_tasks_progress': [],
}
rendered = user_template.render(context)
assert '**Subtask 1**: Read emails from contacts.txt' in rendered
assert '- Opened file' in rendered
assert '- Extracted 7 emails' in rendered
assert '**Final Answer**: Successfully read 7 email addresses' in rendered
assert '**Final Answer**: no answer is returned' not in rendered
def test_stm_history_without_final_answer(self, user_template):
"""Test rendering of stm_all_history when final_answer is None or empty"""
context = {
'stm_all_history': [
SubTaskHistory(
sub_task='Search for products',
steps=['Navigated to catalog', 'Filtered results'],
final_answer=None,
)
],
'variables_history': 'No variables',
'url': 'https://shop.com',
'input': 'Find products',
'task_decomposition': ['Task 1'],
'current_datetime': '2025-12-12',
'sub_tasks_progress': [],
}
rendered = user_template.render(context)
assert '**Subtask 1**: Search for products' in rendered
assert '- Navigated to catalog' in rendered
assert '- Filtered results' in rendered
assert '**Final Answer**: no answer is returned' in rendered
def test_stm_history_empty(self, user_template):
"""Test rendering when stm_all_history is empty"""
context = {
'stm_all_history': [],
'variables_history': 'No variables',
'url': 'https://example.com',
'input': 'Start task',
'task_decomposition': ['Task 1', 'Task 2'],
'current_datetime': '2025-12-12',
'sub_tasks_progress': [],
}
rendered = user_template.render(context)
assert '**Previous Subtasks**:' in rendered
assert '**Variables History**:' in rendered
def test_stm_history_multiple_tasks(self, user_template):
"""Test rendering with multiple completed subtasks"""
context = {
'stm_all_history': [
SubTaskHistory(
sub_task='Find weather in London',
steps=['Searched weather', 'Found: 15°C, Cloudy'],
final_answer='London: 15°C, Cloudy',
),
SubTaskHistory(
sub_task='Find weather in Paris',
steps=['Searched weather', 'Found: 18°C, Sunny'],
final_answer='Paris: 18°C, Sunny',
),
SubTaskHistory(
sub_task='Compose email',
steps=['API call to Gmail', 'Email drafted'],
final_answer='',
),
],
'variables_history': 'var_1: London weather\nvar_2: Paris weather',
'url': 'https://weather.com',
'input': 'Get weather and send email',
'task_decomposition': ['Task 1', 'Task 2', 'Task 3'],
'current_datetime': '2025-12-12',
'sub_tasks_progress': ['completed', 'completed', 'in-progress'],
}
rendered = user_template.render(context)
assert '**Subtask 1**: Find weather in London' in rendered
assert '**Final Answer**: London: 15°C, Cloudy' in rendered
assert '**Subtask 2**: Find weather in Paris' in rendered
assert '**Final Answer**: Paris: 18°C, Sunny' in rendered
assert '**Subtask 3**: Compose email' in rendered
assert rendered.count('**Final Answer**: no answer is returned') == 1
def test_sub_tasks_progress_display(self, user_template):
"""Test that current progress is displayed correctly"""
context = {
'stm_all_history': [],
'variables_history': 'No variables',
'url': 'https://example.com',
'input': 'Multi-step task',
'task_decomposition': ['Task 1', 'Task 2', 'Task 3'],
'current_datetime': '2025-12-12',
'sub_tasks_progress': ['completed', 'in-progress', 'not-started'],
}
rendered = user_template.render(context)
assert '**Subtasks**:' in rendered
assert '1. Task 1' in rendered
assert '2. Task 2' in rendered
assert '3. Task 3' in rendered
def test_full_context_rendering(self, user_template):
"""Test full realistic scenario with all fields populated"""
context = {
'stm_all_history': [
SubTaskHistory(
sub_task='Read email list from contacts.txt',
steps=['Opened file contacts.txt', 'Parsed content', 'Extracted 7 email addresses'],
final_answer='Successfully read email list: [[email protected], [email protected], ...]',
)
],
'variables_history': '## emails_list\n- Type: list\n- Items: 7\n- Description: Email addresses from contacts.txt',
'url': 'file:///workspace/contacts.txt',
'input': 'Read emails from contacts.txt and send a marketing email to each using Gmail API',
'task_decomposition': [
'Read the list of emails from contacts.txt (type = web, app=)',
'For each email, compose and send marketing email (type = api, app=Gmail API)',
],
'current_datetime': '2025-12-12 10:30:00',
'sub_tasks_progress': ['completed', 'not-started'],
}
rendered = user_template.render(context)
# Verify all sections are present
assert '**Previous Subtasks**:' in rendered
assert '**Subtask 1**: Read email list from contacts.txt' in rendered
assert '**Final Answer**: Successfully read email list' in rendered
assert '**Variables History**:' in rendered
assert 'emails_list' in rendered
assert '**Current URL**: file:///workspace/contacts.txt' in rendered
assert '**Intent**:' in rendered
assert 'Read emails from contacts.txt and send a marketing email' in rendered
assert '**Subtasks**:' in rendered
assert '1. Read the list of emails from contacts.txt' in rendered
assert '2. For each email, compose and send marketing email' in rendered
assert 'Current datetime: 2025-12-12 10:30:00' in rendered
def test_stm_history_with_many_steps(self, user_template):
"""Test rendering with a subtask that has many steps"""
context = {
'stm_all_history': [
SubTaskHistory(
sub_task='Add phones to wishlist',
steps=[
'Navigated to catalog',
'Clicked on Iphone 5E',
'Clicked Add to Wishlist',
'Confirmed addition',
'Returned to catalog',
'Clicked on Galaxy SE93',
'Clicked Add to Wishlist',
'Confirmed addition',
],
final_answer='2 phones added to wishlist successfully',
)
],
'variables_history': 'phone_list: [Iphone 5E, Galaxy SE93, Xiaomi 99]',
'url': 'https://shop.com/wishlist',
'input': 'Add expensive phones to wishlist',
'task_decomposition': ['Find phones', 'Add to wishlist'],
'current_datetime': '2025-12-12',
'sub_tasks_progress': ['completed', 'in-progress'],
}
rendered = user_template.render(context)
assert '**Subtask 1**: Add phones to wishlist' in rendered
# Check that all steps are rendered as bullet points
assert '- Navigated to catalog' in rendered
assert '- Clicked on Iphone 5E' in rendered
assert '- Clicked Add to Wishlist' in rendered
assert '**Final Answer**: 2 phones added to wishlist successfully' in rendered
def test_special_characters_in_content(self, user_template):
"""Test that special characters are handled correctly"""
context = {
'stm_all_history': [
SubTaskHistory(
sub_task='Search for "smartphones" & tablets',
steps=['Query: "smartphones" & tablets', 'Results: 10 items found'],
final_answer='Found 10 items matching "smartphones" & tablets',
)
],
'variables_history': 'No variables',
'url': 'https://example.com/search?q="smartphones"&category=tablets',
'input': 'Find "smartphones" & tablets',
'task_decomposition': ['Search products'],
'current_datetime': '2025-12-12',
'sub_tasks_progress': ['completed'],
}
rendered = user_template.render(context)
assert 'Search for "smartphones" & tablets' in rendered
assert 'Query: "smartphones" & tablets' in rendered
assert 'Found 10 items matching "smartphones" & tablets' in rendered
def test_infinite_loop_prevention_scenario(self, user_template):
"""
Test the exact scenario from the bug report:
CugaLite completes a task but only appends SubTaskHistory with empty steps[]
This validates that the template correctly handles SubTaskHistory objects
as created by CugaLiteNode (with empty steps array)
"""
# Simulate what CugaLiteNode actually does (line 408-414)
context = {
'stm_all_history': [
SubTaskHistory(
sub_task='Read the list of emails from contacts.txt (type = web, app=)',
steps=[], # CugaLiteNode sets this to empty array!
final_answer='Successfully extracted 7 emails: [email protected], [email protected], [email protected], [email protected], [email protected], [email protected], [email protected]',
)
],
'variables_history': '## emails_list\n- Type: list\n- Items: 7\n- Description: List of email addresses from contacts.txt\n- Value: [[email protected], [email protected], ...]',
'url': 'file:///workspace/contacts.txt',
'input': 'Read emails from contacts.txt and send marketing email to each using Gmail API',
'task_decomposition': [
'Read the list of emails from contacts.txt (type = web, app=)',
'For each email in emails_list, compose marketing email (type = api, app=Gmail API)',
'Send each composed email (type = api, app=Gmail API)',
],
'current_datetime': '2025-12-12 13:11:33',
'sub_tasks_progress': ['completed', 'not-started', 'not-started'],
}
rendered = user_template.render(context)
# Verify the controller can see the completed work
assert '**Previous Subtasks**:' in rendered
assert '1. Read the list of emails from contacts.txt' in rendered
assert '**Final Answer**: Successfully extracted 7 emails' in rendered
# When steps is empty, no steps should be rendered
# But the final answer should still be visible
assert '[email protected]' in rendered
# Verify variables are visible
assert '**Variables History**:' in rendered
assert 'emails_list' in rendered
# Verify subtasks are visible
assert '**Subtasks**:' in rendered
def test_cuga_lite_node_empty_steps_pattern(self, user_template):
"""
Test exact pattern from CugaLiteNode line 408-414:
SubTaskHistory(sub_task=state.format_subtask(), steps=[], final_answer=answer)
This is the critical pattern that was causing the infinite loop.
"""
# Exact pattern from CugaLiteNode
history_entry = SubTaskHistory(
sub_task='Read the list of emails from contacts.txt (type = web, app=)',
steps=[], # Always empty from CugaLiteNode!
final_answer='Successfully read 7 email addresses from contacts.txt',
)
context = {
'stm_all_history': [history_entry],
'variables_history': '## emails_list\n- Type: list\n- Items: 7',
'url': 'file:///workspace/contacts.txt',
'input': 'Read emails',
'task_decomposition': ['Read emails from file'],
'current_datetime': '2025-12-12',
'sub_tasks_progress': ['completed'],
}
rendered = user_template.render(context)
# The subtask should be visible
assert '**Subtask 1**: Read the list of emails from contacts.txt' in rendered
# The final answer should be visible
assert '**Final Answer**: Successfully read 7 email addresses' in rendered
# No step numbers should appear (since steps=[])
# The template has: {% for step in item['steps'] %}
# With empty array, nothing should render in that loop
lines = rendered.split('\n')
subtask_section = []
in_subtask = False
for line in lines:
if '1. Read the list of emails' in line:
in_subtask = True
elif in_subtask and '**Variables History**' in line:
break
elif in_subtask:
subtask_section.append(line)
# Should only have the final answer line, no step lines
step_lines = [
line for line in subtask_section if line.strip().startswith('1.') or line.strip().startswith('2.')
]
assert len(step_lines) == 0, f"Expected no step lines but found: {step_lines}"
if __name__ == '__main__':
pytest.main([__file__, '-v'])