Spaces:

iteratehack
/

MentorFlow

Paused

App Files Files Community

Cornelius commited on 13 days ago

Commit

85ce009

1 Parent(s): c775d45

Fix GPU error - default to CPU and enhance GPU detection

Browse files

Files changed (2) hide show

app.py +39 -20
teacher_agent_dev/compare_strategies.py +16 -2

app.py CHANGED Viewed

@@ -3,17 +3,13 @@ Gradio app for MentorFlow - Teacher-Student RL System
 Deployed on Hugging Face Spaces with GPU support
 """
-import gradio as gr
 import sys
 import os
 import subprocess
 from pathlib import Path
-# Monkey-patch to fix Gradio 4.44.x schema generation bug
 # Prevents TypeError: argument of type 'bool' is not iterable
-import sys
-# Patch BEFORE importing gradio to ensure it takes effect
 def _patch_gradio_schema_bug():
     """Patch Gradio's buggy schema generation."""
     try:
@@ -25,12 +21,10 @@ def _patch_gradio_schema_bug():
             def _patched_get_type(schema):
                 """Handle bool schemas that cause the bug."""
-                # Bug fix: schema is sometimes a bool
                 if isinstance(schema, bool):
                     return "bool"
                 if schema is None:
                     return "Any"
-                # Must be dict to check membership
                 if not isinstance(schema, dict):
                     return "Any"
                 try:
@@ -42,7 +36,7 @@ def _patch_gradio_schema_bug():
             gradio_client_utils.get_type = _patched_get_type
-        # Also patch the wrapper function that calls get_type
         if hasattr(gradio_client_utils, '_json_schema_to_python_type'):
             _original_json_to_type = gradio_client_utils._json_schema_to_python_type
@@ -51,18 +45,20 @@ def _patch_gradio_schema_bug():
                 try:
                     return _original_json_to_type(schema, defs)
                 except (TypeError, AttributeError) as e:
-                    if "is not iterable" in str(e) or "bool" in str(type(e)):
                         return "Any"
                     raise
             gradio_client_utils._json_schema_to_python_type = _patched_json_to_type
     except (ImportError, AttributeError):
         pass
-# Apply patch immediately
 _patch_gradio_schema_bug()
 # Add project paths
 sys.path.insert(0, str(Path(__file__).parent))
 sys.path.insert(0, str(Path(__file__).parent / "teacher_agent_dev"))
@@ -80,19 +76,32 @@ def run_comparison(iterations: int, seed: int, use_deterministic: bool, device:
     """
     # Set device environment variable for subprocess
-    # Check if CUDA is actually available before using
     if device == "cuda":
         try:
             import torch
-            if not torch.cuda.is_available():
                 device = "cpu"
         except ImportError:
             device = "cpu"
-        except Exception:
             device = "cpu"
     # Set environment variable for subprocess to pick up
     os.environ["CUDA_DEVICE"] = device
     # Prepare command
     cmd = [
@@ -163,11 +172,21 @@ def check_gpu():
     try:
         import torch
         if torch.cuda.is_available():
-            return f"✅ GPU Available: {torch.cuda.get_device_name(0)}"
         else:
             return "⚠️ No GPU available, using CPU"
-    except:
-        return "⚠️ Could not check GPU status"
 # Create Gradio interface
@@ -221,10 +240,10 @@ with gr.Blocks(title="MentorFlow - Strategy Comparison") as demo:
             )
             device = gr.Radio(
-                choices=["cuda", "cpu"],
-                value="cuda",
                 label="Device",
-                info="Use GPU (cuda) if available, CPU otherwise"
             )
         with gr.Column():

 Deployed on Hugging Face Spaces with GPU support
 """
 import sys
 import os
 import subprocess
 from pathlib import Path
+# Monkey-patch to fix Gradio schema generation bug BEFORE importing gradio
 # Prevents TypeError: argument of type 'bool' is not iterable
 def _patch_gradio_schema_bug():
     """Patch Gradio's buggy schema generation."""
     try:
             def _patched_get_type(schema):
                 """Handle bool schemas that cause the bug."""
                 if isinstance(schema, bool):
                     return "bool"
                 if schema is None:
                     return "Any"
                 if not isinstance(schema, dict):
                     return "Any"
                 try:
             gradio_client_utils.get_type = _patched_get_type
+        # Patch the wrapper function too
         if hasattr(gradio_client_utils, '_json_schema_to_python_type'):
             _original_json_to_type = gradio_client_utils._json_schema_to_python_type
                 try:
                     return _original_json_to_type(schema, defs)
                 except (TypeError, AttributeError) as e:
+                    if "is not iterable" in str(e):
                         return "Any"
                     raise
             gradio_client_utils._json_schema_to_python_type = _patched_json_to_type
     except (ImportError, AttributeError):
         pass
+# Apply patch BEFORE importing gradio
 _patch_gradio_schema_bug()
+# Now import gradio (patch will be in effect)
+import gradio as gr
 # Add project paths
 sys.path.insert(0, str(Path(__file__).parent))
 sys.path.insert(0, str(Path(__file__).parent / "teacher_agent_dev"))
     """
     # Set device environment variable for subprocess
+    # On Hugging Face Spaces, check GPU availability more carefully
     if device == "cuda":
         try:
             import torch
+            # Check if CUDA is available
+            if torch.cuda.is_available():
+                try:
+                    # Try to get device name to verify GPU works
+                    gpu_name = torch.cuda.get_device_name(0)
+                    print(f"✅ GPU available: {gpu_name}")
+                except Exception as e:
+                    print(f"⚠️ GPU detection failed: {e}, falling back to CPU")
+                    device = "cpu"
+            else:
+                print("⚠️ CUDA not available, using CPU")
                 device = "cpu"
         except ImportError:
+            print("⚠️ PyTorch not available, using CPU")
             device = "cpu"
+        except Exception as e:
+            print(f"⚠️ GPU check error: {e}, using CPU")
             device = "cpu"
     # Set environment variable for subprocess to pick up
     os.environ["CUDA_DEVICE"] = device
+    print(f"🔧 Using device: {device}")
     # Prepare command
     cmd = [
     try:
         import torch
         if torch.cuda.is_available():
+            try:
+                gpu_name = torch.cuda.get_device_name(0)
+                gpu_count = torch.cuda.device_count()
+                return f"✅ GPU Available: {gpu_name} (Count: {gpu_count})"
+            except Exception as e:
+                return f"⚠️ GPU detected but error accessing: {str(e)}"
         else:
+            # Check if we're on Hugging Face Spaces
+            if os.getenv("SPACE_ID"):
+                return "⚠️ No GPU available on this Space. Please upgrade to GPU tier."
             return "⚠️ No GPU available, using CPU"
+    except ImportError:
+        return "⚠️ PyTorch not installed"
+    except Exception as e:
+        return f"⚠️ Could not check GPU status: {str(e)}"
 # Create Gradio interface
             )
             device = gr.Radio(
+                choices=["cpu", "cuda"],
+                value="cpu",  # Default to CPU for reliability on HF Spaces
                 label="Device",
+                info="CPU (recommended) or CUDA/GPU if available on your Space"
             )
         with gr.Column():

teacher_agent_dev/compare_strategies.py CHANGED Viewed

@@ -90,11 +90,25 @@ def train_strategy_random(num_iterations: int = 500, seed: int = 42, target_accu
     if device == "cuda":
         try:
             import torch
-            if not torch.cuda.is_available():
                 device = "cpu"
                 print("⚠️ CUDA not available, using CPU")
-        except:
             device = "cpu"
     student = LMStudentAgent(
         learning_rate=5e-5,  # LM fine-tuning learning rate

     if device == "cuda":
         try:
             import torch
+            if torch.cuda.is_available():
+                try:
+                    # Verify GPU actually works
+                    gpu_name = torch.cuda.get_device_name(0)
+                    print(f"✅ Using GPU: {gpu_name}")
+                except Exception as e:
+                    print(f"⚠️ GPU access failed: {e}, using CPU")
+                    device = "cpu"
+            else:
                 device = "cpu"
                 print("⚠️ CUDA not available, using CPU")
+        except ImportError:
+            device = "cpu"
+            print("⚠️ PyTorch not available, using CPU")
+        except Exception as e:
             device = "cpu"
+            print(f"⚠️ GPU check error: {e}, using CPU")
+    print(f"🔧 LM Student device: {device}")
     student = LMStudentAgent(
         learning_rate=5e-5,  # LM fine-tuning learning rate