File size: 15,177 Bytes
3289c58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
import pytest
from jinja2 import Template
from pathlib import Path
from cuga.backend.cuga_graph.state.agent_state import SubTaskHistory


@pytest.fixture
def user_template():
    """Load the actual user.jinja2 template"""
    template_path = (
        Path(__file__).parent.parent.parent
        / "src"
        / "cuga"
        / "backend"
        / "cuga_graph"
        / "nodes"
        / "task_decomposition_planning"
        / "plan_controller_agent"
        / "prompts"
        / "user.jinja2"
    )
    with open(template_path, 'r') as f:
        return Template(f.read())


class TestPlanControllerPrompt:
    """Test the plan controller user prompt template rendering"""

    def test_stm_history_with_final_answer(self, user_template):
        """Test rendering of stm_all_history with final_answer present"""
        context = {
            'stm_all_history': [
                SubTaskHistory(
                    sub_task='Read emails from contacts.txt',
                    steps=['Opened file', 'Extracted 7 emails'],
                    final_answer='Successfully read 7 email addresses',
                )
            ],
            'variables_history': 'No variables',
            'url': 'https://example.com',
            'input': 'Test task',
            'task_decomposition': ['Task 1', 'Task 2'],
            'current_datetime': '2025-12-12',
            'sub_tasks_progress': [],
        }

        rendered = user_template.render(context)

        assert '**Subtask 1**: Read emails from contacts.txt' in rendered
        assert '- Opened file' in rendered
        assert '- Extracted 7 emails' in rendered
        assert '**Final Answer**: Successfully read 7 email addresses' in rendered
        assert '**Final Answer**: no answer is returned' not in rendered

    def test_stm_history_without_final_answer(self, user_template):
        """Test rendering of stm_all_history when final_answer is None or empty"""
        context = {
            'stm_all_history': [
                SubTaskHistory(
                    sub_task='Search for products',
                    steps=['Navigated to catalog', 'Filtered results'],
                    final_answer=None,
                )
            ],
            'variables_history': 'No variables',
            'url': 'https://shop.com',
            'input': 'Find products',
            'task_decomposition': ['Task 1'],
            'current_datetime': '2025-12-12',
            'sub_tasks_progress': [],
        }

        rendered = user_template.render(context)

        assert '**Subtask 1**: Search for products' in rendered
        assert '- Navigated to catalog' in rendered
        assert '- Filtered results' in rendered
        assert '**Final Answer**: no answer is returned' in rendered

    def test_stm_history_empty(self, user_template):
        """Test rendering when stm_all_history is empty"""
        context = {
            'stm_all_history': [],
            'variables_history': 'No variables',
            'url': 'https://example.com',
            'input': 'Start task',
            'task_decomposition': ['Task 1', 'Task 2'],
            'current_datetime': '2025-12-12',
            'sub_tasks_progress': [],
        }

        rendered = user_template.render(context)

        assert '**Previous Subtasks**:' in rendered
        assert '**Variables History**:' in rendered

    def test_stm_history_multiple_tasks(self, user_template):
        """Test rendering with multiple completed subtasks"""
        context = {
            'stm_all_history': [
                SubTaskHistory(
                    sub_task='Find weather in London',
                    steps=['Searched weather', 'Found: 15°C, Cloudy'],
                    final_answer='London: 15°C, Cloudy',
                ),
                SubTaskHistory(
                    sub_task='Find weather in Paris',
                    steps=['Searched weather', 'Found: 18°C, Sunny'],
                    final_answer='Paris: 18°C, Sunny',
                ),
                SubTaskHistory(
                    sub_task='Compose email',
                    steps=['API call to Gmail', 'Email drafted'],
                    final_answer='',
                ),
            ],
            'variables_history': 'var_1: London weather\nvar_2: Paris weather',
            'url': 'https://weather.com',
            'input': 'Get weather and send email',
            'task_decomposition': ['Task 1', 'Task 2', 'Task 3'],
            'current_datetime': '2025-12-12',
            'sub_tasks_progress': ['completed', 'completed', 'in-progress'],
        }

        rendered = user_template.render(context)

        assert '**Subtask 1**: Find weather in London' in rendered
        assert '**Final Answer**: London: 15°C, Cloudy' in rendered
        assert '**Subtask 2**: Find weather in Paris' in rendered
        assert '**Final Answer**: Paris: 18°C, Sunny' in rendered
        assert '**Subtask 3**: Compose email' in rendered
        assert rendered.count('**Final Answer**: no answer is returned') == 1

    def test_sub_tasks_progress_display(self, user_template):
        """Test that current progress is displayed correctly"""
        context = {
            'stm_all_history': [],
            'variables_history': 'No variables',
            'url': 'https://example.com',
            'input': 'Multi-step task',
            'task_decomposition': ['Task 1', 'Task 2', 'Task 3'],
            'current_datetime': '2025-12-12',
            'sub_tasks_progress': ['completed', 'in-progress', 'not-started'],
        }

        rendered = user_template.render(context)

        assert '**Subtasks**:' in rendered
        assert '1. Task 1' in rendered
        assert '2. Task 2' in rendered
        assert '3. Task 3' in rendered

    def test_full_context_rendering(self, user_template):
        """Test full realistic scenario with all fields populated"""
        context = {
            'stm_all_history': [
                SubTaskHistory(
                    sub_task='Read email list from contacts.txt',
                    steps=['Opened file contacts.txt', 'Parsed content', 'Extracted 7 email addresses'],
                    final_answer='Successfully read email list: [[email protected], [email protected], ...]',
                )
            ],
            'variables_history': '## emails_list\n- Type: list\n- Items: 7\n- Description: Email addresses from contacts.txt',
            'url': 'file:///workspace/contacts.txt',
            'input': 'Read emails from contacts.txt and send a marketing email to each using Gmail API',
            'task_decomposition': [
                'Read the list of emails from contacts.txt (type = web, app=)',
                'For each email, compose and send marketing email (type = api, app=Gmail API)',
            ],
            'current_datetime': '2025-12-12 10:30:00',
            'sub_tasks_progress': ['completed', 'not-started'],
        }

        rendered = user_template.render(context)

        # Verify all sections are present
        assert '**Previous Subtasks**:' in rendered
        assert '**Subtask 1**: Read email list from contacts.txt' in rendered
        assert '**Final Answer**: Successfully read email list' in rendered

        assert '**Variables History**:' in rendered
        assert 'emails_list' in rendered

        assert '**Current URL**: file:///workspace/contacts.txt' in rendered

        assert '**Intent**:' in rendered
        assert 'Read emails from contacts.txt and send a marketing email' in rendered

        assert '**Subtasks**:' in rendered
        assert '1. Read the list of emails from contacts.txt' in rendered
        assert '2. For each email, compose and send marketing email' in rendered

        assert 'Current datetime: 2025-12-12 10:30:00' in rendered

    def test_stm_history_with_many_steps(self, user_template):
        """Test rendering with a subtask that has many steps"""
        context = {
            'stm_all_history': [
                SubTaskHistory(
                    sub_task='Add phones to wishlist',
                    steps=[
                        'Navigated to catalog',
                        'Clicked on Iphone 5E',
                        'Clicked Add to Wishlist',
                        'Confirmed addition',
                        'Returned to catalog',
                        'Clicked on Galaxy SE93',
                        'Clicked Add to Wishlist',
                        'Confirmed addition',
                    ],
                    final_answer='2 phones added to wishlist successfully',
                )
            ],
            'variables_history': 'phone_list: [Iphone 5E, Galaxy SE93, Xiaomi 99]',
            'url': 'https://shop.com/wishlist',
            'input': 'Add expensive phones to wishlist',
            'task_decomposition': ['Find phones', 'Add to wishlist'],
            'current_datetime': '2025-12-12',
            'sub_tasks_progress': ['completed', 'in-progress'],
        }

        rendered = user_template.render(context)

        assert '**Subtask 1**: Add phones to wishlist' in rendered
        # Check that all steps are rendered as bullet points
        assert '- Navigated to catalog' in rendered
        assert '- Clicked on Iphone 5E' in rendered
        assert '- Clicked Add to Wishlist' in rendered
        assert '**Final Answer**: 2 phones added to wishlist successfully' in rendered

    def test_special_characters_in_content(self, user_template):
        """Test that special characters are handled correctly"""
        context = {
            'stm_all_history': [
                SubTaskHistory(
                    sub_task='Search for "smartphones" & tablets',
                    steps=['Query: "smartphones" & tablets', 'Results: 10 items found'],
                    final_answer='Found 10 items matching "smartphones" & tablets',
                )
            ],
            'variables_history': 'No variables',
            'url': 'https://example.com/search?q="smartphones"&category=tablets',
            'input': 'Find "smartphones" & tablets',
            'task_decomposition': ['Search products'],
            'current_datetime': '2025-12-12',
            'sub_tasks_progress': ['completed'],
        }

        rendered = user_template.render(context)

        assert 'Search for "smartphones" & tablets' in rendered
        assert 'Query: "smartphones" & tablets' in rendered
        assert 'Found 10 items matching "smartphones" & tablets' in rendered

    def test_infinite_loop_prevention_scenario(self, user_template):
        """
        Test the exact scenario from the bug report:
        CugaLite completes a task but only appends SubTaskHistory with empty steps[]

        This validates that the template correctly handles SubTaskHistory objects
        as created by CugaLiteNode (with empty steps array)
        """
        # Simulate what CugaLiteNode actually does (line 408-414)
        context = {
            'stm_all_history': [
                SubTaskHistory(
                    sub_task='Read the list of emails from contacts.txt (type = web, app=)',
                    steps=[],  # CugaLiteNode sets this to empty array!
                    final_answer='Successfully extracted 7 emails: [email protected], [email protected], [email protected], [email protected], [email protected], [email protected], [email protected]',
                )
            ],
            'variables_history': '## emails_list\n- Type: list\n- Items: 7\n- Description: List of email addresses from contacts.txt\n- Value: [[email protected], [email protected], ...]',
            'url': 'file:///workspace/contacts.txt',
            'input': 'Read emails from contacts.txt and send marketing email to each using Gmail API',
            'task_decomposition': [
                'Read the list of emails from contacts.txt (type = web, app=)',
                'For each email in emails_list, compose marketing email (type = api, app=Gmail API)',
                'Send each composed email (type = api, app=Gmail API)',
            ],
            'current_datetime': '2025-12-12 13:11:33',
            'sub_tasks_progress': ['completed', 'not-started', 'not-started'],
        }

        rendered = user_template.render(context)

        # Verify the controller can see the completed work
        assert '**Previous Subtasks**:' in rendered
        assert '1. Read the list of emails from contacts.txt' in rendered
        assert '**Final Answer**: Successfully extracted 7 emails' in rendered

        # When steps is empty, no steps should be rendered
        # But the final answer should still be visible
        assert '[email protected]' in rendered

        # Verify variables are visible
        assert '**Variables History**:' in rendered
        assert 'emails_list' in rendered

        # Verify subtasks are visible
        assert '**Subtasks**:' in rendered

    def test_cuga_lite_node_empty_steps_pattern(self, user_template):
        """
        Test exact pattern from CugaLiteNode line 408-414:
        SubTaskHistory(sub_task=state.format_subtask(), steps=[], final_answer=answer)

        This is the critical pattern that was causing the infinite loop.
        """
        # Exact pattern from CugaLiteNode
        history_entry = SubTaskHistory(
            sub_task='Read the list of emails from contacts.txt (type = web, app=)',
            steps=[],  # Always empty from CugaLiteNode!
            final_answer='Successfully read 7 email addresses from contacts.txt',
        )

        context = {
            'stm_all_history': [history_entry],
            'variables_history': '## emails_list\n- Type: list\n- Items: 7',
            'url': 'file:///workspace/contacts.txt',
            'input': 'Read emails',
            'task_decomposition': ['Read emails from file'],
            'current_datetime': '2025-12-12',
            'sub_tasks_progress': ['completed'],
        }

        rendered = user_template.render(context)

        # The subtask should be visible
        assert '**Subtask 1**: Read the list of emails from contacts.txt' in rendered

        # The final answer should be visible
        assert '**Final Answer**: Successfully read 7 email addresses' in rendered

        # No step numbers should appear (since steps=[])
        # The template has: {% for step in item['steps'] %}
        # With empty array, nothing should render in that loop
        lines = rendered.split('\n')
        subtask_section = []
        in_subtask = False
        for line in lines:
            if '1. Read the list of emails' in line:
                in_subtask = True
            elif in_subtask and '**Variables History**' in line:
                break
            elif in_subtask:
                subtask_section.append(line)

        # Should only have the final answer line, no step lines
        step_lines = [
            line for line in subtask_section if line.strip().startswith('1.') or line.strip().startswith('2.')
        ]
        assert len(step_lines) == 0, f"Expected no step lines but found: {step_lines}"


if __name__ == '__main__':
    pytest.main([__file__, '-v'])