Spaces:
Running
Running
File size: 20,295 Bytes
44a54f8 6e90b51 6bdb2fa a3961a8 6bdb2fa 6e90b51 a1e146d 28a74f0 102ac7f 44a54f8 6bdb2fa 6e90b51 44a54f8 90fcfde 44a54f8 6bdb2fa 90fcfde 6bdb2fa 44a54f8 6bdb2fa 289d01f 90fcfde 289d01f 6bdb2fa 90fcfde 6e90b51 289d01f 90fcfde 6bdb2fa 6e90b51 1db4c64 6e90b51 289d01f 469e3da 1c29bb4 289d01f 469e3da 1c29bb4 289d01f 6bdb2fa 1c29bb4 6e90b51 6bdb2fa 6e90b51 28a74f0 6e90b51 28a74f0 a3961a8 1d6374d 28a74f0 1d6374d a3961a8 1d6374d 28a74f0 a3961a8 1d6374d a3961a8 28a74f0 1d6374d a3961a8 1d6374d a3961a8 6e90b51 a3961a8 1c29bb4 90fcfde 6bdb2fa 289d01f a3961a8 6bdb2fa 289d01f a3961a8 6bdb2fa 90fcfde 289d01f 28a74f0 a3961a8 6bdb2fa 289d01f 44a54f8 6bdb2fa 289d01f 44a54f8 28a74f0 a3961a8 1922406 a3961a8 6bdb2fa 28a74f0 a1e146d 28a74f0 6bdb2fa 28a74f0 a1e146d a3961a8 6e90b51 28a74f0 6e90b51 6bdb2fa 289d01f 28a74f0 289d01f 28a74f0 a3961a8 28a74f0 a3961a8 28a74f0 6bdb2fa 28a74f0 a3961a8 28a74f0 a3961a8 28a74f0 a3961a8 28a74f0 a3961a8 28a74f0 a3961a8 28a74f0 a3961a8 28a74f0 a3961a8 28a74f0 a3961a8 28a74f0 a3961a8 28a74f0 a3961a8 28a74f0 a3961a8 28a74f0 a3961a8 28a74f0 289d01f 6bdb2fa fc7720c af28159 44a54f8 6e90b51 289d01f 6bdb2fa 6e90b51 6bdb2fa 6e90b51 289d01f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 |
import sqlite3
import os
import json
import re
import concurrent.futures
import traceback
import zipfile
import hashlib
from typing import Optional, Dict, Any, List
# --- New dependencies for Web Scraping ---
import requests
from bs4 import BeautifulSoup
# ---
import torch
import psutil
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import os
import concurrent.futures
# Detect number of CPUs in the Hugging Face Space
cpu_count = os.cpu_count() or 1
# Use that many workers
executor = concurrent.futures.ThreadPoolExecutor(max_workers=cpu_count)
# --- CONFIGURATION ---
DB_PATH = "code_agents_pro.db"
PROJECT_ROOT = "./projects"
os.makedirs(PROJECT_ROOT, exist_ok=True)
# ------------------------------ DATABASE (ROBUST) ------------------------------
def init_db():
with sqlite3.connect(DB_PATH) as conn:
cursor = conn.cursor()
cursor.executescript("""
CREATE TABLE IF NOT EXISTS users (id INTEGER PRIMARY KEY, username TEXT UNIQUE, password_hash TEXT);
CREATE TABLE IF NOT EXISTS projects (id INTEGER PRIMARY KEY, user_id INTEGER, title TEXT, description TEXT, status TEXT DEFAULT 'queued', zip_path TEXT, logs TEXT DEFAULT '', created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (user_id) REFERENCES users(id));
CREATE INDEX IF NOT EXISTS idx_user_status ON projects(user_id, status);
""")
init_db()
def _db_execute(query, params=(), fetchone=False, fetchall=False, commit=False):
try:
with sqlite3.connect(DB_PATH) as conn:
conn.row_factory = sqlite3.Row; cursor = conn.cursor(); cursor.execute(query, params)
if commit: conn.commit(); return cursor.lastrowid
if fetchone: return cursor.fetchone()
if fetchall: return cursor.fetchall()
except sqlite3.Error as e: print(f"Database error: {e}"); return None
def hash_password(password): return hashlib.sha256(password.encode()).hexdigest()
def verify_password(password, stored_hash): return hash_password(password) == stored_hash
def create_user(username, password):
try: return _db_execute("INSERT INTO users (username, password_hash) VALUES (?, ?)", (username, hash_password(password)), commit=True)
except sqlite3.IntegrityError: return None
def get_user_by_username(username): return _db_execute("SELECT * FROM users WHERE username = ?", (username,), fetchone=True)
def get_user_projects(user_id, limit=20): return _db_execute("SELECT * FROM projects WHERE user_id = ? ORDER BY created_at DESC LIMIT ?", (user_id, limit), fetchall=True)
def create_project(user_id, title, description): return _db_execute("INSERT INTO projects (user_id, title, description) VALUES (?, ?, ?)", (user_id, title, description), commit=True)
def update_project_status(project_id, status, logs=None, zip_path=None): _db_execute("UPDATE projects SET status=?, logs=COALESCE(?, logs), zip_path=COALESCE(?, zip_path) WHERE id=?", (status, logs, zip_path, project_id), commit=True)
def get_project(project_id): return _db_execute("SELECT * FROM projects WHERE id = ?", (project_id,), fetchone=True)
# ------------------------------ MODEL LOADING & CACHING ------------------------------
MODEL_REGISTRY = {
"ceo": "Qwen/Qwen3-0.6B",
"manager": "Qwen/Qwen3-0.6B",
"worker_coder": "Qwen/Qwen3-0.6B",
"worker_tester": "Qwen/Qwen3-0.6B",
}
_MODEL_CACHE = {}
def load_model(model_name):
if model_name in _MODEL_CACHE: return _MODEL_CACHE[model_name]
model_kwargs = {"device_map": "auto", "trust_remote_code": True, "attn_implementation": "eager"}
if torch.cuda.is_available():
print(f"CUDA found. Loading '{model_name}' in 4-bit.")
bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16)
model_kwargs["quantization_config"] = bnb_config
else:
print(f"CUDA not found. Loading '{model_name}' on CPU.")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
_MODEL_CACHE[model_name] = (tokenizer, model)
print(f"Model {model_name} loaded and cached.")
return tokenizer, model
# ------------------------------ AGENT PROMPTS FOR DYNAMIC FLOW ------------------------------
ROLE_PROMPTS = {
"ceo": """You are the CEO. Your job is to create a high-level plan and delegate the first task to your Manager.
Analyze the user's request and provide a list of files to be created.
Then, create the very first task for the Manager to execute.
Respond ONLY with a JSON object.
Example: {"plan": {"files": ["app.py", "backend.py", "tests/test_app.py"]}, "initial_task": "Start by writing the backend.py file with database functions."}""",
"manager": """You are the Manager. You receive tasks and questions. Your job is to:
1. Break down work into small steps for workers
2. After a worker completes a task, assign the NEXT logical task (code another file OR write tests)
3. Only finish when ALL code and tests are complete
4. If unsure, ask the CEO
Available actions: `delegate_coder`, `delegate_tester`, `answer_worker`, `ask_ceo`, `finish_project`
Respond ONLY with a single tool call on a new line.
Example Actions:
To delegate a task to a Coder:
`delegate_coder("Write the full Python code for app.py")`
To delegate a task to a Tester:
`delegate_tester("Write unit tests for backend.py")`
To answer a worker's question:
`answer_worker("The main file is main.py, and it should use Pygame.")`
To ask a question to the CEO (e.g., if you are confused):
`ask_ceo("Should we use a relational database or a NoSQL database for this project?")`
To finish the entire project:
`finish_project()`""",
"worker_coder": """You are a Coder. Your only job is to write code based on the Manager's task.
If you need more information or documentation to complete a task, you can use the `scrape_web` tool.
After completing a task, report back using the `task_complete` tool. DO NOT decide what to do next.
The Manager will assign the next file to code.
Available actions: `scrape_web`, `write_code`, `read_file`, `ask_manager`, `task_complete`
Respond ONLY with a single tool call on a new line.
Example Actions:
To scrape a website for information:
`scrape_web("https://docs.python.org/3/library/sqlite3.html")`
To write code to a file. The second argument is a raw string of the code:
`write_code("backend.py", "import sqlite3\\n\\ndef init_db():\\n # ...")`
To read the contents of an existing file:
`read_file("main.py")`
To ask the Manager a question:
`ask_manager("What testing framework should I use for this project?")`
To report that you have finished your task:
`task_complete()`""",
"worker_tester": """You are a Tester. Your only job is to write tests for existing code.
If you need to look up testing libraries or best practices, use the `scrape_web` tool.
After completing test writing, report `task_complete`.
The Manager will decide if more tests are needed or if the project can finish.
Available actions: `scrape_web`, `write_test`, `read_file`, `ask_manager`, `task_complete`
Respond ONLY with a single tool call on a new line.
Example Actions:
To scrape a website for information:
`scrape_web("https://docs.pytest.org/en/stable/")`
To write tests to a file. The second argument is a raw string of the test code:
`write_test("tests/test_backend.py", "import pytest\\n\\ndef test_db_connection():\\n # ...")`
To read the contents of an existing file:
`read_file("app.py")`
To ask the Manager a question:
`ask_manager("Are there any specific edge cases I should focus on for the user login tests?")`
To report that you have finished your task:
`task_complete()`"""
}
# --------------------------------- NEW PARSER ---------------------------------
def _extract_tool_call(text: str) -> Optional[Dict[str, Any]]:
"""
Parses a string for a tool-call like `tool_name("arg1", "arg2")` and extracts
the function name and its arguments.
"""
text = text.strip().split("\n")[0] # Only consider the first line
match = re.search(r'(\w+)\((.*)\)', text, re.DOTALL)
if not match:
return None
tool_name = match.group(1)
args_str = match.group(2)
# Simple regex to split arguments, handling quoted strings
# This is a basic approach and might fail on complex arguments
args = []
# Handle single string arguments
if re.fullmatch(r'\s*"(.*?)"\s*', args_str, re.DOTALL) or re.fullmatch(r"\s*'(.*?)'\s*", args_str, re.DOTALL):
args.append(args_str.strip().strip("'\""))
else:
# Simple split by comma for multiple args
for arg in args_str.split(','):
args.append(arg.strip().strip("'\""))
# Convert known numerical args if needed
try:
if tool_name in ["delegate_coder", "delegate_tester"]:
args = [str(arg) for arg in args]
except (ValueError, IndexError):
return None # Return None if parsing fails
return {"tool_name": tool_name, "args": args}
# ------------------------------ FILE SYSTEM & AI TOOLS ------------------------------
def get_project_dir(user_id, project_id):
path = os.path.join(PROJECT_ROOT, str(user_id), str(project_id)); os.makedirs(path, exist_ok=True); return path
def create_file(project_dir, path, content):
full_path = os.path.join(project_dir, path); os.makedirs(os.path.dirname(full_path), exist_ok=True)
with open(full_path, 'w', encoding='utf-8') as f: f.write(content)
def read_file(project_dir, path):
full_path = os.path.join(project_dir, path)
try:
with open(full_path, 'r', encoding='utf-8') as f: return f.read()
except FileNotFoundError: return "Error: File not found."
def zip_project(project_dir, project_id):
zip_filename = f"project_{project_id}.zip"; zip_path = os.path.join(os.path.dirname(project_dir), zip_filename)
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
for root, _, files in os.walk(project_dir):
for file in files: zf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), project_dir))
return zip_path
def scrape_web(url: str) -> str:
"""Scrapes a URL and returns the clean text content, limited to 5000 chars."""
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers, timeout=15)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
for script_or_style in soup(["script", "style"]): script_or_style.decompose()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = '\n'.join(chunk for chunk in chunks if chunk)
return text[:5000]
except requests.exceptions.RequestException as e:
return f"Error: Could not retrieve content from URL. {e}"
except Exception as e:
return f"Error: An unexpected error occurred during web scraping. {e}"
def generate_with_model(role: str, prompt: str) -> str:
try:
model_name = MODEL_REGISTRY[role]; tokenizer, model = load_model(model_name)
messages = [{"role": "system", "content": ROLE_PROMPTS[role]}, {"role": "user", "content": prompt}]
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=3072, pad_token_id=tokenizer.eos_token_id, use_cache=True)
return tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True).strip()
except Exception as e:
print(f"Error during model generation for role {role}: {e}"); return f"error({e})"
# ------------------------------ HIERARCHICAL AGENT ORCHESTRATOR ------------------------------
def run_agent_chain(project_id, user_id, initial_prompt):
project_dir = get_project_dir(user_id, project_id)
log_entries = []
def log_step(agent, thought, action_result=""):
log_entry = f"**[{agent.upper()}]**\n> {thought}\n"
if action_result:
log_entry += f"_{action_result}_\n"
log_entry += "\n---\n"
log_entries.append(log_entry)
update_project_status(project_id, "running", logs="".join(log_entries))
try:
project_context = {
"initial_prompt": initial_prompt,
"file_structure": [],
"current_task": "Start the project.",
"last_action_result": None,
"turn": 0,
"max_turns": 30,
}
log_step("ORCHESTRATOR", "Briefing the CEO with the user's request.")
ceo_response_text = generate_with_model("ceo", initial_prompt)
ceo_action = json.loads(ceo_response_text) # CEO still uses JSON for its specific output
if not ceo_action or "plan" not in ceo_action: raise ValueError("CEO failed to provide an initial plan.")
project_context["file_structure"] = ceo_action["plan"].get("files", [])
project_context["current_task"] = ceo_action["initial_task"]
log_step("CEO", f"Initial plan created. First task for Manager: {project_context['current_task']}", f"Files to create: {project_context['file_structure']}")
next_agent = "manager"
while project_context["turn"] < project_context["max_turns"]:
prompt = f"""
Project context:
- User's Goal: {project_context['initial_prompt']}
- File Structure: {project_context['file_structure']}
- Current Task: {project_context['current_task']}
- Result of last action: {project_context['last_action_result']}
Based on the context, decide your next action.
"""
response_text = generate_with_model(next_agent, prompt)
tool_call = _extract_tool_call(response_text)
if not tool_call:
project_context["last_action_result"] = f"Error: Agent {next_agent} returned invalid response. Retrying."
log_step("ORCHESTRATOR", f"Invalid response from {next_agent}. Retrying task.", response_text)
project_context["turn"] += 1
continue
action = tool_call.get("tool_name")
args = tool_call.get("args", [])
log_step(next_agent.replace("_", " "), f"Decided to perform action: `{action}` with args: `{args}`")
if action == "delegate_coder":
if not args:
project_context["last_action_result"] = "Error: Missing task for delegate_coder."
next_agent = "manager"
continue
next_agent = "worker_coder"
project_context["current_task"] = args[0]
project_context["last_action_result"] = f"Task delegated to {next_agent.replace('_', ' ')}."
elif action == "delegate_tester":
if not args:
project_context["last_action_result"] = "Error: Missing task for delegate_tester."
next_agent = "manager"
continue
next_agent = "worker_tester"
project_context["current_task"] = args[0]
project_context["last_action_result"] = f"Task delegated to {next_agent.replace('_', ' ')}."
elif action == "write_code":
if len(args) < 2:
project_context["last_action_result"] = "Error: Missing file_path or code in data."
else:
file_path = args[0]; code = args[1]
create_file(project_dir, file_path, code)
project_context["last_action_result"] = f"Successfully wrote code to {file_path}."
next_agent = "manager"
project_context["current_task"] = f"Worker finished task. What is the next high-level step?"
elif action == "write_test":
if len(args) < 2:
project_context["last_action_result"] = "Error: Missing file_path or code in data."
else:
file_path = args[0]; code = args[1]
create_file(project_dir, file_path, code)
project_context["last_action_result"] = f"Successfully wrote tests to {file_path}."
next_agent = "manager"
project_context["current_task"] = f"Worker finished task. What is the next high-level step?"
elif action == "scrape_web":
if not args:
project_context["last_action_result"] = "Error: Missing URL for scrape_web action."
else:
url = args[0]
log_step(next_agent.replace("_", " "), f"Scraping {url}...")
scraped_content = scrape_web(url)
project_context["last_action_result"] = f"Scraping result from {url}:\n\n{scraped_content}"
continue # Do not increment turn or change agent yet
elif action == "ask_manager":
if not args:
project_context["last_action_result"] = "Error: Missing question for ask_manager."
else:
project_context["last_action_result"] = f"A worker has a question."
project_context["current_task"] = args[0]
next_agent = "manager"
elif action == "answer_worker":
if not args:
project_context["last_action_result"] = "Error: Missing answer for answer_worker."
else:
project_context["last_action_result"] = f"Manager answered: {args[0]}"
project_context["current_task"] = "The worker's question has been answered. What is the next task for a worker?"
next_agent = "manager"
elif action == "task_complete":
project_context["last_action_result"] = "Worker reported task is complete."
project_context["current_task"] = f"Worker finished task. What is the next high-level step?"
next_agent = "manager"
elif action == "finish_project":
log_step("MANAGER", "Project is ready for final packaging.")
break
else:
project_context["last_action_result"] = f"Error: Unknown action '{action}'."
log_step("ORCHESTRATOR", project_context['last_action_result'])
project_context["turn"] += 1
if project_context["turn"] >= project_context["max_turns"]:
raise ValueError("Project failed to complete within the maximum number of turns.")
log_step("SYSTEM", "Packaging project..."); zip_path = zip_project(project_dir, project_id)
update_project_status(project_id, "completed", logs="".join(log_entries), zip_path=zip_path)
log_step("SYSTEM", "Project completed successfully!")
update_project_status(project_id, "completed", logs="".join(log_entries), zip_path=zip_path)
except Exception as e:
tb_str = traceback.format_exc(); print(f"--- AGENT CHAIN FAILED ---\n{tb_str}\n---")
error_log = "".join(log_entries) + f"\n\n❌ **CRITICAL ERROR:**\n```{str(e)}```"
update_project_status(project_id, "failed", logs=error_log)
# ------------------------------ JOB QUEUE ------------------------------
executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
def queue_job(project_id, user_id, prompt):
print(f"Queuing job for project: {project_id}"); executor.submit(run_agent_chain, project_id, user_id, prompt) |