File size: 8,925 Bytes
a52f96d
 
 
 
 
 
 
a4ee801
a52f96d
61fe8fc
85ce009
a52f96d
 
 
 
 
b4b1e11
a52f96d
 
 
 
a4ee801
61fe8fc
a52f96d
 
 
61fe8fc
85ce009
 
61fe8fc
85ce009
4bbb666
 
61fe8fc
85ce009
4bbb666
61fe8fc
85ce009
4bbb666
a4ee801
ec32884
4bbb666
ec32884
85ce009
4bbb666
a4ee801
 
61fe8fc
a4ee801
85ce009
a52f96d
 
 
 
 
 
 
 
 
 
 
 
 
 
61fe8fc
a4ee801
 
a52f96d
 
 
 
61fe8fc
a52f96d
 
 
 
a4ee801
 
 
 
61fe8fc
a4ee801
 
 
 
 
61fe8fc
a4ee801
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61fe8fc
a4ee801
 
 
 
a52f96d
 
 
 
 
 
 
 
 
4bbb666
a52f96d
 
61fe8fc
 
a52f96d
85ce009
 
 
4bbb666
 
85ce009
61fe8fc
4bbb666
a52f96d
61fe8fc
85ce009
61fe8fc
4bbb666
 
 
 
 
85ce009
 
 
4bbb666
a52f96d
 
 
 
 
 
 
 
 
 
 
 
 
61fe8fc
a52f96d
 
 
61fe8fc
 
a52f96d
 
 
 
 
 
 
ff6c480
a52f96d
 
 
 
61fe8fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a52f96d
 
 
 
 
 
 
61fe8fc
 
 
 
 
 
 
a52f96d
61fe8fc
 
 
 
 
a52f96d
 
 
 
 
ff6c480
 
a52f96d
61fe8fc
 
 
 
 
 
 
 
 
 
 
 
a52f96d
 
61fe8fc
c99cd73
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
"""
Gradio app for MentorFlow - Teacher-Student RL System
Deployed on Hugging Face Spaces with GPU support
"""

import sys
import os
import subprocess
from pathlib import Path
import gradio as gr  # Import directly, do not use the patch

# Add project paths
sys.path.insert(0, str(Path(__file__).parent))
sys.path.insert(0, str(Path(__file__).parent / "teacher_agent_dev"))
sys.path.insert(0, str(Path(__file__).parent / "student_agent_dev"))

def run_comparison(iterations: int, seed: int, use_deterministic: bool, device: str):
    """
    Run strategy comparison with LM Student.
    """
    
    # Set device environment variable for subprocess
    # On Hugging Face Spaces with GPU, try to use CUDA
    if device == "cuda":
        try:
            import torch
            # Check if CUDA is available
            if torch.cuda.is_available():
                try:
                    # Try to get device name to verify GPU works
                    gpu_name = torch.cuda.get_device_name(0)
                    gpu_count = torch.cuda.device_count()
                    print(f"βœ… GPU available: {gpu_name} (Count: {gpu_count})")
                    # Keep device as "cuda"
                except Exception as e:
                    print(f"⚠️ GPU detection failed: {e}")
                    print("   Attempting to use CUDA anyway (may work)...")
            else:
                print("⚠️ CUDA not available, falling back to CPU")
                device = "cpu"
        except ImportError:
            print("⚠️ PyTorch not available, falling back to CPU")
            device = "cpu"
        except Exception as e:
            print(f"⚠️ GPU check error: {e}, falling back to CPU")
            device = "cpu"
    
    # Set environment variable for subprocess to pick up
    os.environ["CUDA_DEVICE"] = device
    print(f"πŸ”§ Using device: {device}")
    
    # Prepare command
    cmd = [
        sys.executable,
        "teacher_agent_dev/compare_strategies.py",
        "--iterations", str(iterations),
    ]
    
    if use_deterministic:
        cmd.append("--deterministic")
    else:
        cmd.extend(["--seed", str(int(seed))])
    
    try:
        # Ensure environment variables are passed to subprocess
        env = os.environ.copy()
        env["CUDA_DEVICE"] = os.environ.get("CUDA_DEVICE", device)
        
        result = subprocess.run(
            cmd,
            cwd=str(Path(__file__).parent),
            env=env,  # Pass environment variables
            capture_output=True,
            text=True,
            timeout=3600  # 1 hour timeout
        )
        
        stdout_text = result.stdout
        stderr_text = result.stderr
        
        # Combine outputs
        full_output = f"=== STDOUT ===\n{stdout_text}\n\n=== STDERR ===\n{stderr_text}"
        
        if result.returncode != 0:
            return f"❌ Error occurred:\n{full_output}", None
        
        # Find output plot (check multiple possible locations)
        plot_paths = [
            Path(__file__).parent / "teacher_agent_dev" / "comparison_all_strategies.png",
            Path(__file__).parent / "comparison_all_strategies.png",
            Path.cwd() / "teacher_agent_dev" / "comparison_all_strategies.png",
        ]
        
        plot_path = None
        for path in plot_paths:
            if path.exists():
                plot_path = path
                break
        
        if plot_path:
            return f"βœ… Comparison complete!\n\n{stdout_text}", str(plot_path)
        else:
            # Return output even if plot not found (might still be useful)
            error_msg = f"⚠️ Plot not found at expected locations.\n"
            error_msg += f"Checked: {[str(p) for p in plot_paths]}\n\n"
            error_msg += f"Output:\n{full_output}"
            return error_msg, None
                
    except subprocess.TimeoutExpired:
        return "❌ Timeout: Comparison took longer than 1 hour", None
    except Exception as e:
        import traceback
        return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}", None


def check_gpu():
    """Check if GPU is available on Hugging Face Spaces."""
    try:
        import torch
        
        # Check CUDA availability
        if torch.cuda.is_available():
            try:
                gpu_name = torch.cuda.get_device_name(0)
                gpu_count = torch.cuda.device_count()
                cuda_version = torch.version.cuda
                return f"βœ… GPU Available: {gpu_name} (Count: {gpu_count}, CUDA: {cuda_version})"
            except Exception as e:
                # GPU might be available but not immediately accessible
                return f"βœ… GPU Detected (accessing: {str(e)[:50]}...)"
        else:
            # On Hugging Face Spaces, check environment
            if os.getenv("SPACE_ID"):
                # Check if GPU hardware is allocated
                hf_hardware = os.getenv("SPACE_HARDWARE", "unknown")
                if "gpu" in hf_hardware.lower() or "t4" in hf_hardware.lower() or "l4" in hf_hardware.lower():
                    return f"⚠️ GPU Hardware ({hf_hardware}) allocated but not accessible yet. Try running anyway."
                return f"⚠️ No GPU on this Space (hardware: {hf_hardware}). Please configure GPU tier."
            return "⚠️ No GPU available, will use CPU"
    except ImportError:
        return "⚠️ PyTorch not installed"
    except Exception as e:
        return f"⚠️ GPU check error: {str(e)}"


# Create Gradio interface
with gr.Blocks(title="MentorFlow - Strategy Comparison") as demo:
    gr.Markdown("""
    # πŸŽ“ MentorFlow - Teacher-Student RL System
    
    Compare three training strategies using LM Student (DistilBERT):
    1. **Random Strategy**: Random questions until student can pass difficult questions
    2. **Progressive Strategy**: Easy β†’ Medium β†’ Hard within each family
    3. **Teacher Strategy**: RL teacher agent learns optimal curriculum
    
    ## Usage
    
    1. Set parameters below
    2. Click "Run Comparison" to start training
    3. View results and generated plots
    
    **Note**: With LM Student, this will take 15-30 minutes for 500 iterations.
    """)
    
    # GPU Status
    with gr.Row():
        gpu_status = gr.Textbox(label="GPU Status", value=check_gpu(), interactive=False)
        refresh_btn = gr.Button("πŸ”„ Refresh GPU Status")
    
    refresh_btn.click(fn=check_gpu, outputs=gpu_status, api_name="check_gpu")
    
    # Parameters
    with gr.Row():
        with gr.Column():
            iterations = gr.Slider(
                minimum=50,
                maximum=500,
                value=100,
                step=50,
                label="Iterations",
                info="Number of training iterations (higher = longer runtime)"
            )
            
            seed = gr.Number(
                value=42,
                label="Random Seed",
                info="Seed for reproducibility (ignored if deterministic)"
            )
            
            use_deterministic = gr.Checkbox(
                value=True,
                label="Deterministic Mode",
                info="Use fixed seed=42 for reproducible results"
            )
            
            device = gr.Radio(
                choices=["cuda", "cpu"],
                value="cuda",  # Default to GPU for HF Spaces with Nvidia 4xL4
                label="Device",
                info="GPU (cuda) recommended for Nvidia 4xL4, CPU fallback available"
            )
        
        with gr.Column():
            run_btn = gr.Button("πŸš€ Run Comparison", variant="primary", size="lg")
    
    # Output
    with gr.Row():
        with gr.Column(scale=1):
            output_text = gr.Textbox(
                label="Output",
                lines=15,
                max_lines=30,
                interactive=False
            )
        
        with gr.Column(scale=1):
            output_plot = gr.Image(
                label="Comparison Plot",
                type="filepath",
                height=500
            )
    
    # Run comparison
    run_btn.click(
        fn=run_comparison,
        inputs=[iterations, seed, use_deterministic, device],
        outputs=[output_text, output_plot],
        api_name="run_comparison"
    )
    
    gr.Markdown("""
    ## πŸ“Š Understanding Results
    
    The comparison plot shows:
    - **Learning Curves**: How each strategy improves over time
    - **Difficult Question Performance**: Accuracy on hard questions
    - **Curriculum Diversity**: Topic coverage over time
    - **Learning Efficiency**: Iterations to reach target vs final performance
    
    The **Teacher Strategy** should ideally outperform Random and Progressive strategies.
    """)

if __name__ == "__main__":
    # Ensure the app binds to all interfaces for HF Spaces
    demo.launch(server_name="0.0.0.0", server_port=7860)