Spaces:
Sleeping
Sleeping
Update backend.py
Browse files- backend.py +29 -44
backend.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# backend.py —
|
| 2 |
import sqlite3
|
| 3 |
import os
|
| 4 |
import json
|
|
@@ -152,18 +152,18 @@ def get_vram_usage_gb():
|
|
| 152 |
except:
|
| 153 |
return 0.0
|
| 154 |
|
| 155 |
-
# ------------------------------ MODEL LOADING ------------------------------
|
| 156 |
MODEL_REGISTRY = {
|
| 157 |
"understander": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
|
| 158 |
"architect": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
|
| 159 |
-
"ceo": "
|
| 160 |
"manager": "microsoft/Phi-3-mini-4k-instruct",
|
| 161 |
"worker": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
|
| 162 |
"reviewer": "microsoft/Phi-3-mini-4k-instruct",
|
| 163 |
"editor": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
|
| 164 |
"tester": "microsoft/Phi-3-mini-4k-instruct",
|
| 165 |
"publisher": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
|
| 166 |
-
"final_ceo": "
|
| 167 |
}
|
| 168 |
|
| 169 |
_MODEL_CACHE = {}
|
|
@@ -177,7 +177,8 @@ def load_model(model_name):
|
|
| 177 |
model_name,
|
| 178 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 179 |
device_map="auto",
|
| 180 |
-
trust_remote_code=True
|
|
|
|
| 181 |
)
|
| 182 |
_MODEL_CACHE[model_name] = (tokenizer, model)
|
| 183 |
return tokenizer, model
|
|
@@ -418,51 +419,35 @@ def zip_project(user_id, project_name: str):
|
|
| 418 |
zf.write(full_path, arcname)
|
| 419 |
return zip_path
|
| 420 |
|
| 421 |
-
# ------------------------------ LLM GENERATOR (WITH
|
| 422 |
def generate_with_model(role: str, prompt: str, context: dict = {}) -> str:
|
| 423 |
try:
|
| 424 |
model_name = MODEL_REGISTRY[role]
|
| 425 |
tokenizer, model = load_model(model_name)
|
| 426 |
|
| 427 |
-
# Format message
|
| 428 |
-
if
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
{"role": "user", "content": prompt}]
|
| 440 |
-
elif role == "worker":
|
| 441 |
-
messages = [{"role": "system", "content": ROLE_PROMPTS["worker"].format(file=context.get("file"), instructions=context.get("instructions"))},
|
| 442 |
-
{"role": "user", "content": ""}]
|
| 443 |
-
elif role == "reviewer":
|
| 444 |
-
messages = [{"role": "system", "content": ROLE_PROMPTS["reviewer"]},
|
| 445 |
-
{"role": "user", "content": prompt}]
|
| 446 |
-
elif role == "editor":
|
| 447 |
-
messages = [{"role": "system", "content": ROLE_PROMPTS["editor"]},
|
| 448 |
-
{"role": "user", "content": prompt}]
|
| 449 |
-
elif role == "tester":
|
| 450 |
-
messages = [{"role": "system", "content": ROLE_PROMPTS["tester"]},
|
| 451 |
-
{"role": "user", "content": prompt}]
|
| 452 |
-
elif role == "publisher":
|
| 453 |
-
messages = [{"role": "system", "content": ROLE_PROMPTS["publisher"]},
|
| 454 |
-
{"role": "user", "content": prompt}]
|
| 455 |
-
elif role == "final_ceo":
|
| 456 |
-
messages = [{"role": "system", "content": ROLE_PROMPTS["final_ceo"]},
|
| 457 |
-
{"role": "user", "content": prompt}]
|
| 458 |
else:
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
|
|
|
|
|
|
|
|
|
| 466 |
|
| 467 |
inputs = tokenizer(text, return_tensors="pt").to(model.device)
|
| 468 |
|
|
|
|
| 1 |
+
# backend.py — FINAL VERSION — PHASE-3 MINI INSTRUCT + CACHING + LOGGING
|
| 2 |
import sqlite3
|
| 3 |
import os
|
| 4 |
import json
|
|
|
|
| 152 |
except:
|
| 153 |
return 0.0
|
| 154 |
|
| 155 |
+
# ------------------------------ MODEL LOADING (PER ROLE) ------------------------------
|
| 156 |
MODEL_REGISTRY = {
|
| 157 |
"understander": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
|
| 158 |
"architect": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
|
| 159 |
+
"ceo": "microsoft/Phi-3-mini-4k-instruct",
|
| 160 |
"manager": "microsoft/Phi-3-mini-4k-instruct",
|
| 161 |
"worker": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
|
| 162 |
"reviewer": "microsoft/Phi-3-mini-4k-instruct",
|
| 163 |
"editor": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
|
| 164 |
"tester": "microsoft/Phi-3-mini-4k-instruct",
|
| 165 |
"publisher": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
|
| 166 |
+
"final_ceo": "microsoft/Phi-3-mini-4k-instruct",
|
| 167 |
}
|
| 168 |
|
| 169 |
_MODEL_CACHE = {}
|
|
|
|
| 177 |
model_name,
|
| 178 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 179 |
device_map="auto",
|
| 180 |
+
trust_remote_code=True,
|
| 181 |
+
attn_implementation="eager" # ✅ Avoids flash-attn requirement — safe on all hardware
|
| 182 |
)
|
| 183 |
_MODEL_CACHE[model_name] = (tokenizer, model)
|
| 184 |
return tokenizer, model
|
|
|
|
| 419 |
zf.write(full_path, arcname)
|
| 420 |
return zip_path
|
| 421 |
|
| 422 |
+
# ------------------------------ LLM GENERATOR (WITH CHAT TEMPLATE FOR PHI-3) ------------------------------
|
| 423 |
def generate_with_model(role: str, prompt: str, context: dict = {}) -> str:
|
| 424 |
try:
|
| 425 |
model_name = MODEL_REGISTRY[role]
|
| 426 |
tokenizer, model = load_model(model_name)
|
| 427 |
|
| 428 |
+
# Format message using Phi-3 or Qwen chat template
|
| 429 |
+
if "Phi-3" in model_name:
|
| 430 |
+
# Phi-3 format
|
| 431 |
+
messages = [
|
| 432 |
+
{"role": "system", "content": ROLE_PROMPTS[role]},
|
| 433 |
+
{"role": "user", "content": prompt}
|
| 434 |
+
]
|
| 435 |
+
text = tokenizer.apply_chat_template(
|
| 436 |
+
messages,
|
| 437 |
+
tokenize=False,
|
| 438 |
+
add_generation_prompt=True
|
| 439 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 440 |
else:
|
| 441 |
+
# Qwen format
|
| 442 |
+
messages = [
|
| 443 |
+
{"role": "system", "content": ROLE_PROMPTS[role]},
|
| 444 |
+
{"role": "user", "content": prompt}
|
| 445 |
+
]
|
| 446 |
+
text = tokenizer.apply_chat_template(
|
| 447 |
+
messages,
|
| 448 |
+
tokenize=False,
|
| 449 |
+
add_generation_prompt=True
|
| 450 |
+
)
|
| 451 |
|
| 452 |
inputs = tokenizer(text, return_tensors="pt").to(model.device)
|
| 453 |
|