Spaces:

Nihal2000
/

autoSLM

Sleeping

App Files Files Community

Nihal2000 commited on Aug 10, 2025

Commit

4888362

verified ·

1 Parent(s): 3589789

Update src/model_manager.py

Browse files

Files changed (1) hide show

src/model_manager.py +31 -22

src/model_manager.py CHANGED Viewed

@@ -1,11 +1,13 @@
 import os
-import torch
 import json
-import onnxruntime as ort
-from transformers import AutoTokenizer
 from dataclasses import dataclass
 from typing import Tuple, Any, List
 @dataclass
 class AutomotiveSLMConfig:
     model_name: str = "Automotive-SLM-Edge-3M"
@@ -20,15 +22,17 @@ class AutomotiveSLMConfig:
     moe_intermediate_size: int = 384
     router_aux_loss_coef: float = 0.01
     rotary_dim: int = 64
-    rope_base: float = 10000
     dropout: float = 0.05
     layer_norm_epsilon: float = 1e-5
     max_gen_length: int = 50
     temperature: float = 0.8
     top_p: float = 0.9
     top_k: int = 50
     repetition_penalty: float = 1.1
 class ModelManager:
     def __init__(self, models_path: str):
         if not isinstance(models_path, str) or not models_path:
@@ -41,32 +45,35 @@ class ModelManager:
         if not os.path.isdir(self.models_path):
             return []
         files = []
-        for f in os.listdir(self.models_path):
-            path = os.path.join(self.models_path, f)
             if not os.path.isfile(path):
                 continue
-            ext = os.path.splitext(f)[1].lower()
             if ext in [".pt", ".pth", ".onnx"]:
-                files.append(f)
         return sorted(files)
     def _load_config(self, checkpoint_path: str) -> AutomotiveSLMConfig:
-        # Derive assets root safely
-        if not isinstance(checkpoint_path, str):
-            raise ValueError(f"checkpoint_path must be a string, got: {checkpoint_path!r}")
-        assets_root = os.path.dirname(self.models_path)  # assets
         cfg_path = os.path.join(assets_root, "config.json")
-        if isinstance(cfg_path, str) and os.path.exists(cfg_path):
             with open(cfg_path, "r") as f:
                 cfg = json.load(f)
             return AutomotiveSLMConfig(**cfg)
-        # Fall back to reading from checkpoint if it’s a torch file
         ext = os.path.splitext(checkpoint_path)[1].lower()
         if ext in [".pt", ".pth"] and os.path.exists(checkpoint_path):
             ckpt = torch.load(checkpoint_path, map_location="cpu")
             if isinstance(ckpt, dict) and "config" in ckpt:
                 return AutomotiveSLMConfig(**ckpt["config"])
-        # Final fallback
         return AutomotiveSLMConfig()
     def load_model(self, model_filename: str) -> Tuple[Any, Any, AutomotiveSLMConfig]:
@@ -80,7 +87,7 @@ class ModelManager:
         if not os.path.isfile(model_path):
             raise FileNotFoundError(f"Model file not found: {model_path}")
-        # tokenizer
         tokenizer = AutoTokenizer.from_pretrained("gpt2")
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
@@ -89,20 +96,22 @@ class ModelManager:
         config = self._load_config(model_path)
         if ext in [".pt", ".pth"]:
             from src.model_architecture import AutomotiveSLM
-                        checkpoint = torch.load(model_path, map_location="cpu")
-            model = AutomotiveSLM(config)
             state = checkpoint.get("model_state_dict", checkpoint)
             model.load_state_dict(state, strict=True)
             model.eval()
         elif ext == ".onnx":
             providers = ["CPUExecutionProvider"]
-            so = ort.SessionOptions()
-            so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-            model = ort.InferenceSession(model_path, providers=providers, sess_options=so)
         else:
             raise ValueError(f"Unsupported model format: {ext}")
         self.cache[model_filename] = (model, tokenizer, config)
         return model, tokenizer, config

 import os
 import json
 from dataclasses import dataclass
 from typing import Tuple, Any, List
+import torch
+import onnxruntime as ort
+from transformers import AutoTokenizer
 @dataclass
 class AutomotiveSLMConfig:
     model_name: str = "Automotive-SLM-Edge-3M"
     moe_intermediate_size: int = 384
     router_aux_loss_coef: float = 0.01
     rotary_dim: int = 64
+    rope_base: float = 10000.0
     dropout: float = 0.05
     layer_norm_epsilon: float = 1e-5
+    # UI defaults
     max_gen_length: int = 50
     temperature: float = 0.8
     top_p: float = 0.9
     top_k: int = 50
     repetition_penalty: float = 1.1
 class ModelManager:
     def __init__(self, models_path: str):
         if not isinstance(models_path, str) or not models_path:
         if not os.path.isdir(self.models_path):
             return []
         files = []
+        for fname in os.listdir(self.models_path):
+            path = os.path.join(self.models_path, fname)
             if not os.path.isfile(path):
                 continue
+            ext = os.path.splitext(fname)[1].lower()
             if ext in [".pt", ".pth", ".onnx"]:
+                files.append(fname)
         return sorted(files)
     def _load_config(self, checkpoint_path: str) -> AutomotiveSLMConfig:
+        if not isinstance(checkpoint_path, str) or not checkpoint_path:
+            raise ValueError(f"checkpoint_path must be a non-empty string, got: {checkpoint_path!r}")
+        # Prefer assets/config.json
+        assets_root = os.path.dirname(self.models_path)  # assets/
         cfg_path = os.path.join(assets_root, "config.json")
+        if os.path.exists(cfg_path):
             with open(cfg_path, "r") as f:
                 cfg = json.load(f)
             return AutomotiveSLMConfig(**cfg)
+        # Fallback: read config from torch checkpoint if present
         ext = os.path.splitext(checkpoint_path)[1].lower()
         if ext in [".pt", ".pth"] and os.path.exists(checkpoint_path):
             ckpt = torch.load(checkpoint_path, map_location="cpu")
             if isinstance(ckpt, dict) and "config" in ckpt:
                 return AutomotiveSLMConfig(**ckpt["config"])
+        # Last resort defaults
         return AutomotiveSLMConfig()
     def load_model(self, model_filename: str) -> Tuple[Any, Any, AutomotiveSLMConfig]:
         if not os.path.isfile(model_path):
             raise FileNotFoundError(f"Model file not found: {model_path}")
+        # Load tokenizer (GPT-2 per training)
         tokenizer = AutoTokenizer.from_pretrained("gpt2")
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
         config = self._load_config(model_path)
         if ext in [".pt", ".pth"]:
+            # Import only when needed to avoid circular deps
             from src.model_architecture import AutomotiveSLM
+            checkpoint = torch.load(model_path, map_location="cpu")
             state = checkpoint.get("model_state_dict", checkpoint)
+            model = AutomotiveSLM(config)
             model.load_state_dict(state, strict=True)
             model.eval()
         elif ext == ".onnx":
             providers = ["CPUExecutionProvider"]
+            sess_options = ort.SessionOptions()
+            sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+            model = ort.InferenceSession(model_path, sess_options=sess_options, providers=providers)
         else:
             raise ValueError(f"Unsupported model format: {ext}")
         self.cache[model_filename] = (model, tokenizer, config)
         return model, tokenizer, config