Keeby-smilyai commited on
Commit
1c29bb4
·
verified ·
1 Parent(s): 8fca3fa

Update backend.py

Browse files
Files changed (1) hide show
  1. backend.py +59 -48
backend.py CHANGED
@@ -1,4 +1,4 @@
1
- # backend.py — FINAL HARDENED VERSION v1.2
2
  import sqlite3
3
  import os
4
  import json
@@ -11,7 +11,7 @@ from typing import Optional, Dict, Any
11
 
12
  import torch
13
  import psutil
14
- from transformers import AutoTokenizer, AutoModelForCausalLM
15
 
16
  # --- CONFIGURATION ---
17
  DB_PATH = "code_agents_pro.db"
@@ -27,7 +27,6 @@ def init_db():
27
  CREATE TABLE IF NOT EXISTS projects (id INTEGER PRIMARY KEY, user_id INTEGER, title TEXT, description TEXT, status TEXT DEFAULT 'queued', zip_path TEXT, logs TEXT DEFAULT '', created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (user_id) REFERENCES users(id));
28
  CREATE INDEX IF NOT EXISTS idx_user_status ON projects(user_id, status);
29
  """)
30
-
31
  init_db()
32
 
33
  def _db_execute(query, params=(), fetchone=False, fetchall=False, commit=False):
@@ -47,12 +46,10 @@ def _db_execute(query, params=(), fetchone=False, fetchall=False, commit=False):
47
 
48
  def hash_password(password): return hashlib.sha256(password.encode()).hexdigest()
49
  def verify_password(password, stored_hash): return hash_password(password) == stored_hash
50
-
51
  def create_user(username, password):
52
  try:
53
  return _db_execute("INSERT INTO users (username, password_hash) VALUES (?, ?)", (username, hash_password(password)), commit=True)
54
  except sqlite3.IntegrityError: return None
55
-
56
  def get_user_by_username(username): return _db_execute("SELECT * FROM users WHERE username = ?", (username,), fetchone=True)
57
  def get_user_projects(user_id, limit=20): return _db_execute("SELECT * FROM projects WHERE user_id = ? ORDER BY created_at DESC LIMIT ?", (user_id, limit), fetchall=True)
58
  def create_project(user_id, title, description): return _db_execute("INSERT INTO projects (user_id, title, description) VALUES (?, ?, ?)", (user_id, title, description), commit=True)
@@ -62,37 +59,56 @@ def get_project(project_id): return _db_execute("SELECT * FROM projects WHERE id
62
  # ------------------------------ MODEL LOADING & CACHING ------------------------------
63
  MODEL_REGISTRY = {
64
  "planner": "microsoft/Phi-3-mini-4k-instruct",
65
- "architect": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
66
- "coder": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
 
67
  "reviewer": "microsoft/Phi-3-mini-4k-instruct",
68
- "tester": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
69
  "publisher": "microsoft/Phi-3-mini-4k-instruct",
70
  }
71
  _MODEL_CACHE = {}
72
 
73
  def load_model(model_name):
74
- if model_name in _MODEL_CACHE: return _MODEL_CACHE[model_name]
75
- print(f"Loading model: {model_name}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  tokenizer = AutoTokenizer.from_pretrained(model_name)
77
- model = AutoModelForCausalLM.from_pretrained(model_name, dtype="auto", device_map="auto", trust_remote_code=True, attn_implementation="eager")
 
78
  _MODEL_CACHE[model_name] = (tokenizer, model)
79
  print(f"Model {model_name} loaded and cached.")
80
  return tokenizer, model
81
 
82
- # ------------------------------ AGENT PROMPTS (SIMPLIFIED & ROBUST) ------------------------------
83
  ROLE_PROMPTS = {
84
- "planner": """You are an expert file planner. Based on the user's request, determine the necessary file structure.
85
- Output ONLY a single JSON object with a single key: "files".
86
- The "files" key MUST be an array of strings representing complete file paths (e.g., ["src/main.py", "tests/test_main.py", "requirements.txt"]).""",
87
- "architect": """You are a software architect. Create initial placeholder content for a list of files.
88
- Output ONLY a single JSON object where keys are file paths and values are the initial content (e.g., a comment like '# Main application logic here').""",
89
  "coder": "You are a professional programmer. Your ONLY job is to write the complete, clean, and functional code for the single file requested. Do NOT add any explanations, introductions, or markdown formatting. Output ONLY the raw source code.",
90
- "reviewer": """You are a meticulous code reviewer. Analyze the given code for bugs, style issues, and security vulnerabilities.
91
- Output ONLY a single JSON object with two keys: "has_issues" (boolean) and "suggestions" (a string containing a bulleted list of required changes).""",
92
  "tester": "You are a QA engineer. Write a complete pytest test file for the given source code. Cover main functionality and edge cases. Output ONLY the raw source code for the test file.",
93
- "publisher": """You are a release manager. Create final documentation and configuration files.
94
- Output ONLY a single JSON object where keys are the filenames ("README.md", ".gitignore", "Dockerfile") and values are their complete string content."""
95
  }
 
96
  # ------------------------------ FILE SYSTEM & AI TOOLS ------------------------------
97
  def get_project_dir(user_id, project_id):
98
  path = os.path.join(PROJECT_ROOT, str(user_id), str(project_id))
@@ -102,20 +118,26 @@ def get_project_dir(user_id, project_id):
102
  def create_file(project_dir, path, content):
103
  full_path = os.path.join(project_dir, path)
104
  os.makedirs(os.path.dirname(full_path), exist_ok=True)
105
- with open(full_path, 'w', encoding='utf-8') as f: f.write(content)
 
106
 
107
  def read_file(project_dir, path):
108
  full_path = os.path.join(project_dir, path)
109
  try:
110
- with open(full_path, 'r', encoding='utf-8') as f: return f.read()
111
- except FileNotFoundError: return None
 
 
112
 
113
  def zip_project(project_dir, project_id):
114
  zip_filename = f"project_{project_id}.zip"
115
  zip_path = os.path.join(os.path.dirname(project_dir), zip_filename)
116
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
117
  for root, _, files in os.walk(project_dir):
118
- for file in files: zf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), project_dir))
 
 
 
119
  return zip_path
120
 
121
  def _extract_json(text: str) -> Optional[Dict[str, Any]]:
@@ -135,7 +157,7 @@ def generate_with_model(role: str, prompt: str) -> str:
135
  input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
136
  inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
137
 
138
- outputs = model.generate(**inputs, max_new_tokens=2048, pad_token_id=tokenizer.eos_token_id, use_cache=False)
139
 
140
  return tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True).strip()
141
  except Exception as e:
@@ -156,24 +178,20 @@ def run_agent_chain(project_id, user_id, initial_prompt):
156
  try:
157
  log_step("SYSTEM", "Initializing project...")
158
 
159
- # 1. PLANNER
160
- log_step("PLANNER", "Determining file structure from user request...")
161
  plan_response = generate_with_model("planner", initial_prompt)
162
  plan_data = _extract_json(plan_response)
163
- if not plan_data or "files" not in plan_data: raise ValueError("Planner failed to create a valid JSON plan with a 'files' key.")
164
-
165
  log_step("PLANNER", "File structure plan created.", json.dumps(plan_data, indent=2))
166
-
167
- # 2. ARCHITECT
168
  log_step("ARCHITECT", "Creating initial file skeletons...")
169
  arch_prompt = f"Create initial content for these files:\n{json.dumps(plan_data['files'])}"
170
  arch_response = generate_with_model("architect", arch_prompt)
171
  arch_data = _extract_json(arch_response)
172
- if not arch_data: raise ValueError("Architect failed to create valid JSON file structures.")
173
  for path, content in arch_data.items(): create_file(project_dir, path, content)
174
  log_step("ARCHITECT", "File skeletons created.", "\n".join(arch_data.keys()))
175
 
176
- # 3. CODER
177
  source_files = [f for f in plan_data['files'] if f.startswith('src/') and f.endswith('.py')]
178
  for file_path in source_files:
179
  log_step("CODER", f"Writing complete code for `{file_path}`...")
@@ -182,37 +200,30 @@ def run_agent_chain(project_id, user_id, initial_prompt):
182
  create_file(project_dir, file_path, code)
183
  log_step("CODER", f"Finished writing `{file_path}`.", code)
184
 
185
- # 4. REVIEWER
186
- log_step("REVIEWER", "Reviewing all generated source code...")
187
  for file_path in source_files:
188
  code_content = read_file(project_dir, file_path)
189
  if not code_content: continue
190
- review_prompt = f"Review this code from `{file_path}`:\n\n{code_content}"
191
- review_response = generate_with_model("reviewer", review_prompt)
192
  log_step("REVIEWER", f"Review of `{file_path}` complete.", review_response)
193
 
194
- # 5. TESTER
195
- log_step("TESTER", "Writing unit tests...")
196
  for file_path in source_files:
197
  code_content = read_file(project_dir, file_path)
198
  if not code_content: continue
199
  test_file_path = os.path.join("tests", f"test_{os.path.basename(file_path)}")
200
- tester_prompt = f"Write a pytest test file (`{test_file_path}`) for this code from `{file_path}`:\n\n{code_content}"
201
- test_code = generate_with_model("tester", tester_prompt)
202
  create_file(project_dir, test_file_path, test_code)
203
  log_step("TESTER", f"Generated test `{test_file_path}`.", test_code)
204
 
205
- # 6. PUBLISHER
206
  log_step("PUBLISHER", "Generating final documentation and configuration...")
207
- all_files = [os.path.join(r, f).replace(project_dir, '', 1) for r, d, fs in os.walk(project_dir) for f in fs]
208
- pub_prompt = f"Project file structure: {json.dumps(all_files)}. Generate README.md, .gitignore, and Dockerfile."
209
- pub_response = generate_with_model("publisher", pub_prompt)
210
  pub_data = _extract_json(pub_response)
211
  if not pub_data: raise ValueError("Publisher failed to create valid final assets.")
212
  for path, content in pub_data.items(): create_file(project_dir, path, content)
213
  log_step("PUBLISHER", "Final assets created.", json.dumps(pub_data, indent=2))
214
 
215
- # 7. FINALIZATION
216
  log_step("SYSTEM", "Packaging project into a ZIP file...")
217
  zip_path = zip_project(project_dir, project_id)
218
  update_project_status(project_id, "completed", logs="".join(log_entries), zip_path=zip_path)
@@ -228,5 +239,5 @@ def run_agent_chain(project_id, user_id, initial_prompt):
228
  executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
229
 
230
  def queue_job(project_id, user_id, prompt):
231
- print(f"Queuing job for project_id: {project_id}, user_id: {user_id}")
232
  executor.submit(run_agent_chain, project_id, user_id, prompt)
 
1
+ # backend.py — FINAL, STABLE & OPTIMIZED VERSION v1.6
2
  import sqlite3
3
  import os
4
  import json
 
11
 
12
  import torch
13
  import psutil
14
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
15
 
16
  # --- CONFIGURATION ---
17
  DB_PATH = "code_agents_pro.db"
 
27
  CREATE TABLE IF NOT EXISTS projects (id INTEGER PRIMARY KEY, user_id INTEGER, title TEXT, description TEXT, status TEXT DEFAULT 'queued', zip_path TEXT, logs TEXT DEFAULT '', created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (user_id) REFERENCES users(id));
28
  CREATE INDEX IF NOT EXISTS idx_user_status ON projects(user_id, status);
29
  """)
 
30
  init_db()
31
 
32
  def _db_execute(query, params=(), fetchone=False, fetchall=False, commit=False):
 
46
 
47
  def hash_password(password): return hashlib.sha256(password.encode()).hexdigest()
48
  def verify_password(password, stored_hash): return hash_password(password) == stored_hash
 
49
  def create_user(username, password):
50
  try:
51
  return _db_execute("INSERT INTO users (username, password_hash) VALUES (?, ?)", (username, hash_password(password)), commit=True)
52
  except sqlite3.IntegrityError: return None
 
53
  def get_user_by_username(username): return _db_execute("SELECT * FROM users WHERE username = ?", (username,), fetchone=True)
54
  def get_user_projects(user_id, limit=20): return _db_execute("SELECT * FROM projects WHERE user_id = ? ORDER BY created_at DESC LIMIT ?", (user_id, limit), fetchall=True)
55
  def create_project(user_id, title, description): return _db_execute("INSERT INTO projects (user_id, title, description) VALUES (?, ?, ?)", (user_id, title, description), commit=True)
 
59
  # ------------------------------ MODEL LOADING & CACHING ------------------------------
60
  MODEL_REGISTRY = {
61
  "planner": "microsoft/Phi-3-mini-4k-instruct",
62
+ # --- FINAL, CORRECTED MODEL NAME AS PER YOUR INSTRUCTION ---
63
+ "architect": "Qwen/Qwen2.5-Coder-0.5B-Instruct",
64
+ "coder": "Qwen/Qwen2.5-Coder-0.5B-Instruct",
65
  "reviewer": "microsoft/Phi-3-mini-4k-instruct",
66
+ "tester": "Qwen/Qwen2.5-Coder-0.5B-Instruct",
67
  "publisher": "microsoft/Phi-3-mini-4k-instruct",
68
  }
69
  _MODEL_CACHE = {}
70
 
71
  def load_model(model_name):
72
+ if model_name in _MODEL_CACHE:
73
+ return _MODEL_CACHE[model_name]
74
+
75
+ # --- THIS IS THE FIX: Conditional Quantization for CPU/GPU compatibility ---
76
+ model_kwargs = {
77
+ "device_map": "auto",
78
+ "trust_remote_code": True,
79
+ "attn_implementation": "eager",
80
+ }
81
+
82
+ if torch.cuda.is_available():
83
+ print(f"CUDA is available. Loading model '{model_name}' in 4-bit for GPU acceleration.")
84
+ bnb_config = BitsAndBytesConfig(
85
+ load_in_4bit=True,
86
+ bnb_4bit_use_double_quant=True,
87
+ bnb_4bit_quant_type="nf4",
88
+ bnb_4bit_compute_dtype=torch.bfloat16
89
+ )
90
+ model_kwargs["quantization_config"] = bnb_config
91
+ else:
92
+ print(f"CUDA not available. Loading model '{model_name}' on CPU in default precision.")
93
+ # No quantization on CPU
94
+
95
  tokenizer = AutoTokenizer.from_pretrained(model_name)
96
+ model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
97
+
98
  _MODEL_CACHE[model_name] = (tokenizer, model)
99
  print(f"Model {model_name} loaded and cached.")
100
  return tokenizer, model
101
 
102
+ # ------------------------------ AGENT PROMPTS ------------------------------
103
  ROLE_PROMPTS = {
104
+ "planner": """You are an expert file planner. Based on the user's request, determine the necessary file structure. Output ONLY a single JSON object with a single key: "files". The "files" key MUST be an array of strings representing complete file paths (e.g., ["src/main.py", "tests/test_main.py", "requirements.txt"]).""",
105
+ "architect": """You are a software architect. Create initial placeholder content for a list of files. Output ONLY a single JSON object where keys are file paths and values are the initial content (e.g., a comment like '# Main application logic here').""",
 
 
 
106
  "coder": "You are a professional programmer. Your ONLY job is to write the complete, clean, and functional code for the single file requested. Do NOT add any explanations, introductions, or markdown formatting. Output ONLY the raw source code.",
107
+ "reviewer": """You are a meticulous code reviewer. Analyze the given code for bugs, style issues, and security vulnerabilities. Output ONLY a single JSON object with two keys: "has_issues" (boolean) and "suggestions" (a string containing a bulleted list of required changes).""",
 
108
  "tester": "You are a QA engineer. Write a complete pytest test file for the given source code. Cover main functionality and edge cases. Output ONLY the raw source code for the test file.",
109
+ "publisher": """You are a release manager. Create final documentation and configuration files. Output ONLY a single JSON object where keys are the filenames ("README.md", ".gitignore", "Dockerfile") and values are their complete string content."""
 
110
  }
111
+
112
  # ------------------------------ FILE SYSTEM & AI TOOLS ------------------------------
113
  def get_project_dir(user_id, project_id):
114
  path = os.path.join(PROJECT_ROOT, str(user_id), str(project_id))
 
118
  def create_file(project_dir, path, content):
119
  full_path = os.path.join(project_dir, path)
120
  os.makedirs(os.path.dirname(full_path), exist_ok=True)
121
+ with open(full_path, 'w', encoding='utf-8') as f:
122
+ f.write(content)
123
 
124
  def read_file(project_dir, path):
125
  full_path = os.path.join(project_dir, path)
126
  try:
127
+ with open(full_path, 'r', encoding='utf-8') as f:
128
+ return f.read()
129
+ except FileNotFoundError:
130
+ return None
131
 
132
  def zip_project(project_dir, project_id):
133
  zip_filename = f"project_{project_id}.zip"
134
  zip_path = os.path.join(os.path.dirname(project_dir), zip_filename)
135
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
136
  for root, _, files in os.walk(project_dir):
137
+ for file in files:
138
+ full_path = os.path.join(root, file)
139
+ arcname = os.path.relpath(full_path, project_dir)
140
+ zf.write(full_path, arcname)
141
  return zip_path
142
 
143
  def _extract_json(text: str) -> Optional[Dict[str, Any]]:
 
157
  input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
158
  inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
159
 
160
+ outputs = model.generate(**inputs, max_new_tokens=2048, pad_token_id=tokenizer.eos_token_id, use_cache=True)
161
 
162
  return tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True).strip()
163
  except Exception as e:
 
178
  try:
179
  log_step("SYSTEM", "Initializing project...")
180
 
181
+ log_step("PLANNER", "Determining file structure...")
 
182
  plan_response = generate_with_model("planner", initial_prompt)
183
  plan_data = _extract_json(plan_response)
184
+ if not plan_data or "files" not in plan_data: raise ValueError("Planner failed to create a valid JSON plan.")
 
185
  log_step("PLANNER", "File structure plan created.", json.dumps(plan_data, indent=2))
186
+
 
187
  log_step("ARCHITECT", "Creating initial file skeletons...")
188
  arch_prompt = f"Create initial content for these files:\n{json.dumps(plan_data['files'])}"
189
  arch_response = generate_with_model("architect", arch_prompt)
190
  arch_data = _extract_json(arch_response)
191
+ if not arch_data: raise ValueError("Architect failed to create valid JSON skeletons.")
192
  for path, content in arch_data.items(): create_file(project_dir, path, content)
193
  log_step("ARCHITECT", "File skeletons created.", "\n".join(arch_data.keys()))
194
 
 
195
  source_files = [f for f in plan_data['files'] if f.startswith('src/') and f.endswith('.py')]
196
  for file_path in source_files:
197
  log_step("CODER", f"Writing complete code for `{file_path}`...")
 
200
  create_file(project_dir, file_path, code)
201
  log_step("CODER", f"Finished writing `{file_path}`.", code)
202
 
 
 
203
  for file_path in source_files:
204
  code_content = read_file(project_dir, file_path)
205
  if not code_content: continue
206
+ log_step("REVIEWER", f"Reviewing `{file_path}`...")
207
+ review_response = generate_with_model("reviewer", f"Review this code from `{file_path}`:\n\n{code_content}")
208
  log_step("REVIEWER", f"Review of `{file_path}` complete.", review_response)
209
 
 
 
210
  for file_path in source_files:
211
  code_content = read_file(project_dir, file_path)
212
  if not code_content: continue
213
  test_file_path = os.path.join("tests", f"test_{os.path.basename(file_path)}")
214
+ log_step("TESTER", f"Writing test for `{file_path}`...")
215
+ test_code = generate_with_model("tester", f"Write a pytest test file (`{test_file_path}`) for this code from `{file_path}`:\n\n{code_content}")
216
  create_file(project_dir, test_file_path, test_code)
217
  log_step("TESTER", f"Generated test `{test_file_path}`.", test_code)
218
 
219
+ all_files = [os.path.join(r, f).replace(project_dir, '', 1) for r, _, fs in os.walk(project_dir) for f in fs]
220
  log_step("PUBLISHER", "Generating final documentation and configuration...")
221
+ pub_response = generate_with_model("publisher", f"Project file structure: {json.dumps(all_files)}. Generate README.md, .gitignore, and Dockerfile.")
 
 
222
  pub_data = _extract_json(pub_response)
223
  if not pub_data: raise ValueError("Publisher failed to create valid final assets.")
224
  for path, content in pub_data.items(): create_file(project_dir, path, content)
225
  log_step("PUBLISHER", "Final assets created.", json.dumps(pub_data, indent=2))
226
 
 
227
  log_step("SYSTEM", "Packaging project into a ZIP file...")
228
  zip_path = zip_project(project_dir, project_id)
229
  update_project_status(project_id, "completed", logs="".join(log_entries), zip_path=zip_path)
 
239
  executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
240
 
241
  def queue_job(project_id, user_id, prompt):
242
+ print(f"Queuing job for project: {project_id}")
243
  executor.submit(run_agent_chain, project_id, user_id, prompt)