Spaces:
Running
Running
Merge pull request #10 from CintraAI/enhancement/code-parser-and-test-suite-fix
Browse files- CodeParser.py +16 -12
- test_code_chunker.py +5 -5
CodeParser.py
CHANGED
|
@@ -2,16 +2,8 @@ import os
|
|
| 2 |
import subprocess
|
| 3 |
from typing import List, Dict, Union, Tuple
|
| 4 |
from tree_sitter import Language, Parser, Node
|
| 5 |
-
from typing import Union, List
|
| 6 |
import logging
|
| 7 |
|
| 8 |
-
def return_simple_line_numbers_with_code(code: str) -> str:
|
| 9 |
-
code_lines = code.split('\n')
|
| 10 |
-
code_with_line_numbers = [f"Line {i + 1}: {line}" for i, line in enumerate(code_lines)]
|
| 11 |
-
joined_lines = "\n".join(code_with_line_numbers)
|
| 12 |
-
return joined_lines
|
| 13 |
-
|
| 14 |
-
|
| 15 |
class CodeParser:
|
| 16 |
# Added a CACHE_DIR class attribute for caching
|
| 17 |
CACHE_DIR = os.path.expanduser("~/.code_parser_cache")
|
|
@@ -53,11 +45,11 @@ class CodeParser:
|
|
| 53 |
try:
|
| 54 |
if os.path.exists(repo_path):
|
| 55 |
logging.info(f"Updating existing repository for {language}")
|
| 56 |
-
update_command = f
|
| 57 |
subprocess.run(update_command, shell=True, check=True)
|
| 58 |
else:
|
| 59 |
logging.info(f"Cloning repository for {language}")
|
| 60 |
-
clone_command = f
|
| 61 |
subprocess.run(clone_command, shell=True, check=True)
|
| 62 |
except subprocess.CalledProcessError as e:
|
| 63 |
logging.error(f"Failed to clone/update repository for {language}. Error: {e}")
|
|
@@ -74,9 +66,12 @@ class CodeParser:
|
|
| 74 |
Language.build_library(build_path, [ts_dir, tsx_dir])
|
| 75 |
else:
|
| 76 |
raise FileNotFoundError(f"TypeScript or TSX directory not found in {repo_path}")
|
| 77 |
-
|
| 78 |
php_dir = os.path.join(repo_path, 'php')
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
| 80 |
else:
|
| 81 |
Language.build_library(build_path, [repo_path])
|
| 82 |
|
|
@@ -84,6 +79,13 @@ class CodeParser:
|
|
| 84 |
logging.info(f"Successfully built and loaded {language} parser")
|
| 85 |
except Exception as e:
|
| 86 |
logging.error(f"Failed to build or load language {language}. Error: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
except Exception as e:
|
| 89 |
logging.error(f"An unexpected error occurred during parser installation: {str(e)}")
|
|
@@ -218,6 +220,8 @@ class CodeParser:
|
|
| 218 |
return node_types[file_extension]
|
| 219 |
elif file_extension == "jsx":
|
| 220 |
return node_types["js"]
|
|
|
|
|
|
|
| 221 |
else:
|
| 222 |
raise ValueError("Unsupported file type")
|
| 223 |
|
|
|
|
| 2 |
import subprocess
|
| 3 |
from typing import List, Dict, Union, Tuple
|
| 4 |
from tree_sitter import Language, Parser, Node
|
|
|
|
| 5 |
import logging
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
class CodeParser:
|
| 8 |
# Added a CACHE_DIR class attribute for caching
|
| 9 |
CACHE_DIR = os.path.expanduser("~/.code_parser_cache")
|
|
|
|
| 45 |
try:
|
| 46 |
if os.path.exists(repo_path):
|
| 47 |
logging.info(f"Updating existing repository for {language}")
|
| 48 |
+
update_command = f"cd {repo_path} && git pull"
|
| 49 |
subprocess.run(update_command, shell=True, check=True)
|
| 50 |
else:
|
| 51 |
logging.info(f"Cloning repository for {language}")
|
| 52 |
+
clone_command = f"git clone https://github.com/tree-sitter/tree-sitter-{language} {repo_path}"
|
| 53 |
subprocess.run(clone_command, shell=True, check=True)
|
| 54 |
except subprocess.CalledProcessError as e:
|
| 55 |
logging.error(f"Failed to clone/update repository for {language}. Error: {e}")
|
|
|
|
| 66 |
Language.build_library(build_path, [ts_dir, tsx_dir])
|
| 67 |
else:
|
| 68 |
raise FileNotFoundError(f"TypeScript or TSX directory not found in {repo_path}")
|
| 69 |
+
elif language == 'php':
|
| 70 |
php_dir = os.path.join(repo_path, 'php')
|
| 71 |
+
if os.path.exists(php_dir):
|
| 72 |
+
Language.build_library(build_path, [php_dir])
|
| 73 |
+
else:
|
| 74 |
+
raise FileNotFoundError(f"PHP directory not found in {repo_path}")
|
| 75 |
else:
|
| 76 |
Language.build_library(build_path, [repo_path])
|
| 77 |
|
|
|
|
| 79 |
logging.info(f"Successfully built and loaded {language} parser")
|
| 80 |
except Exception as e:
|
| 81 |
logging.error(f"Failed to build or load language {language}. Error: {str(e)}")
|
| 82 |
+
logging.error(f"Repository path: {repo_path}")
|
| 83 |
+
logging.error(f"Build path: {build_path}")
|
| 84 |
+
if language == 'typescript':
|
| 85 |
+
logging.error(f"TypeScript dir exists: {os.path.exists(ts_dir)}")
|
| 86 |
+
logging.error(f"TSX dir exists: {os.path.exists(tsx_dir)}")
|
| 87 |
+
elif language == 'php':
|
| 88 |
+
logging.error(f"PHP dir exists: {os.path.exists(php_dir)}")
|
| 89 |
|
| 90 |
except Exception as e:
|
| 91 |
logging.error(f"An unexpected error occurred during parser installation: {str(e)}")
|
|
|
|
| 220 |
return node_types[file_extension]
|
| 221 |
elif file_extension == "jsx":
|
| 222 |
return node_types["js"]
|
| 223 |
+
elif file_extension == "tsx":
|
| 224 |
+
return node_types["ts"]
|
| 225 |
else:
|
| 226 |
raise ValueError("Unsupported file type")
|
| 227 |
|
test_code_chunker.py
CHANGED
|
@@ -104,7 +104,7 @@ class TestCodeChunkerPython(unittest.TestCase):
|
|
| 104 |
class TestCodeChunkerJavaScript(unittest.TestCase):
|
| 105 |
|
| 106 |
def setUp(self):
|
| 107 |
-
self.patcher = patch('
|
| 108 |
self.mock_count_tokens = self.patcher.start()
|
| 109 |
self.code_chunker = CodeChunker(file_extension='js')
|
| 110 |
self.mock_codebase = load_json('mock_codefiles.json')
|
|
@@ -181,7 +181,7 @@ class TestCodeChunkerJavaScript(unittest.TestCase):
|
|
| 181 |
class TestCodeChunkerCSS(unittest.TestCase):
|
| 182 |
|
| 183 |
def setUp(self):
|
| 184 |
-
self.patcher = patch('
|
| 185 |
self.mock_count_tokens = self.patcher.start()
|
| 186 |
self.code_chunker = CodeChunker(file_extension='css')
|
| 187 |
#Load the JSON data
|
|
@@ -214,7 +214,7 @@ class TestCodeChunkerCSS(unittest.TestCase):
|
|
| 214 |
class TestCodeChunkerTypeScript(unittest.TestCase):
|
| 215 |
|
| 216 |
def setUp(self):
|
| 217 |
-
self.patcher = patch('
|
| 218 |
self.mock_count_tokens = self.patcher.start()
|
| 219 |
self.code_chunker = CodeChunker(file_extension='ts')
|
| 220 |
self.mock_codebase = load_json('mock_codefiles.json')
|
|
@@ -237,7 +237,7 @@ class TestCodeChunkerTypeScript(unittest.TestCase):
|
|
| 237 |
class TestCodeChunkerRuby(unittest.TestCase):
|
| 238 |
|
| 239 |
def setUp(self):
|
| 240 |
-
self.patcher = patch('
|
| 241 |
self.mock_count_tokens = self.patcher.start()
|
| 242 |
self.code_chunker = CodeChunker(file_extension='rb')
|
| 243 |
self.mock_codebase = load_json('mock_codefiles.json')
|
|
@@ -260,7 +260,7 @@ class TestCodeChunkerRuby(unittest.TestCase):
|
|
| 260 |
class TestCodeChunkerPHP(unittest.TestCase):
|
| 261 |
|
| 262 |
def setUp(self):
|
| 263 |
-
self.patcher = patch('
|
| 264 |
self.mock_count_tokens = self.patcher.start()
|
| 265 |
self.code_chunker = CodeChunker(file_extension='php')
|
| 266 |
self.mock_codebase = load_json('mock_codefiles.json')
|
|
|
|
| 104 |
class TestCodeChunkerJavaScript(unittest.TestCase):
|
| 105 |
|
| 106 |
def setUp(self):
|
| 107 |
+
self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
|
| 108 |
self.mock_count_tokens = self.patcher.start()
|
| 109 |
self.code_chunker = CodeChunker(file_extension='js')
|
| 110 |
self.mock_codebase = load_json('mock_codefiles.json')
|
|
|
|
| 181 |
class TestCodeChunkerCSS(unittest.TestCase):
|
| 182 |
|
| 183 |
def setUp(self):
|
| 184 |
+
self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
|
| 185 |
self.mock_count_tokens = self.patcher.start()
|
| 186 |
self.code_chunker = CodeChunker(file_extension='css')
|
| 187 |
#Load the JSON data
|
|
|
|
| 214 |
class TestCodeChunkerTypeScript(unittest.TestCase):
|
| 215 |
|
| 216 |
def setUp(self):
|
| 217 |
+
self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
|
| 218 |
self.mock_count_tokens = self.patcher.start()
|
| 219 |
self.code_chunker = CodeChunker(file_extension='ts')
|
| 220 |
self.mock_codebase = load_json('mock_codefiles.json')
|
|
|
|
| 237 |
class TestCodeChunkerRuby(unittest.TestCase):
|
| 238 |
|
| 239 |
def setUp(self):
|
| 240 |
+
self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
|
| 241 |
self.mock_count_tokens = self.patcher.start()
|
| 242 |
self.code_chunker = CodeChunker(file_extension='rb')
|
| 243 |
self.mock_codebase = load_json('mock_codefiles.json')
|
|
|
|
| 260 |
class TestCodeChunkerPHP(unittest.TestCase):
|
| 261 |
|
| 262 |
def setUp(self):
|
| 263 |
+
self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
|
| 264 |
self.mock_count_tokens = self.patcher.start()
|
| 265 |
self.code_chunker = CodeChunker(file_extension='php')
|
| 266 |
self.mock_codebase = load_json('mock_codefiles.json')
|