Spaces:
Running
Running
| import unittest | |
| from unittest.mock import patch | |
| from Chunker import Chunker, CodeChunker | |
| import tiktoken | |
| from utils import load_json | |
| # Mocking the count_tokens function as it's external and not the focus of these tests | |
| def mock_count_tokens(string: str, encoding_name='gpt-4') -> int: | |
| """Returns the number of tokens in a text string.""" | |
| encoding = tiktoken.encoding_for_model(encoding_name) | |
| num_tokens = len(encoding.encode(string)) | |
| return num_tokens | |
| # Python Test Class | |
| class TestCodeChunkerPython(unittest.TestCase): | |
| def setUp(self): | |
| self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens) | |
| self.mock_count_tokens = self.patcher.start() | |
| self.code_chunker = CodeChunker(file_extension='py') | |
| self.mock_codebase = load_json('mock_codefiles.json') | |
| def tearDown(self): | |
| self.patcher.stop() | |
| def test_chunk_simple_code(self): | |
| py_code = self.mock_codebase['simple.py'] | |
| first_chunk_token_limit = mock_count_tokens("import sys") | |
| print(f"first_chunk_token_limit = {first_chunk_token_limit}") | |
| chunks = self.code_chunker.chunk(py_code, token_limit=25) | |
| token_count = self.mock_count_tokens(py_code) | |
| print(f"token_count = {token_count}") | |
| print(f"original code:\n {py_code}") | |
| Chunker.print_chunks(chunks) | |
| full_code = Chunker.consolidate_chunks_into_file(chunks) | |
| print(f"code after consolidation:\n {full_code}") | |
| num_lines = Chunker.count_lines(full_code) | |
| self.assertEqual(num_lines, len(py_code.split("\n"))) # The number of lines should be the same | |
| self.assertIn(full_code, py_code) # The full code should be in the original code | |
| self.assertEqual(len(chunks), 2) # There should be 2 chunks | |
| self.assertIn("import sys", chunks[1]) # The first chunk should contain the import statement | |
| self.assertIn("print('Hello, world!')", chunks[2]) # The second chunk should contain the print statement | |
| def test_chunk_code_text_only(self): | |
| py_code = self.mock_codebase['text_only.py'] | |
| chunks = self.code_chunker.chunk(py_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(Chunker.consolidate_chunks_into_file(chunks)) | |
| self.assertEqual(num_lines, len(py_code.split("\n"))) # The number of lines should be the same | |
| self.assertIn(py_code, final_code) # The full code should be in the original code | |
| self.assertEqual(len(chunks), 1) | |
| self.assertIn("This file is empty and should test the chunker's ability to handle empty files", chunks[1]) | |
| def test_chunk_code_with_routes(self): | |
| py_code = self.mock_codebase['routes.py'] | |
| chunks = self.code_chunker.chunk(py_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(Chunker.consolidate_chunks_into_file(chunks)) | |
| self.assertEqual(num_lines, len(py_code.split("\n"))) # The number of lines should be the same | |
| self.assertIn(py_code, final_code) # The full code should be in the original code | |
| def test_chunk_code_with_models(self): | |
| py_code = self.mock_codebase['models.py'] | |
| chunks = self.code_chunker.chunk(py_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(Chunker.consolidate_chunks_into_file(chunks)) | |
| self.assertEqual(num_lines, len(py_code.split("\n"))) | |
| self.assertIn(py_code, final_code) | |
| def test_chunk_code_with_main(self): | |
| py_code = self.mock_codebase['main.py'] | |
| chunks = self.code_chunker.chunk(py_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(Chunker.consolidate_chunks_into_file(chunks)) | |
| self.assertEqual(num_lines, len(py_code.split("\n"))) | |
| self.assertIn(py_code, final_code) | |
| def test_chunk_code_with_utilities(self): | |
| py_code = self.mock_codebase['utilities.py'] | |
| chunks = self.code_chunker.chunk(py_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(Chunker.consolidate_chunks_into_file(chunks)) | |
| self.assertEqual(num_lines, len(py_code.split("\n"))) | |
| self.assertIn(py_code, final_code) | |
| def test_chunk_code_with_big_class(self): | |
| py_code = self.mock_codebase['big_class.py'] | |
| chunks = self.code_chunker.chunk(py_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(Chunker.consolidate_chunks_into_file(chunks)) | |
| self.assertEqual(num_lines, len(py_code.split("\n"))) | |
| self.assertIn(py_code, final_code) | |
| # JavaScript Test Class | |
| class TestCodeChunkerJavaScript(unittest.TestCase): | |
| def setUp(self): | |
| self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens) | |
| self.mock_count_tokens = self.patcher.start() | |
| self.code_chunker = CodeChunker(file_extension='js') | |
| self.mock_codebase = load_json('mock_codefiles.json') | |
| def tearDown(self): | |
| self.patcher.stop() | |
| def test_chunk_javascript_simple_code(self): | |
| js_code = self.mock_codebase['simple.js'] | |
| chunks = self.code_chunker.chunk(js_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(Chunker.consolidate_chunks_into_file(chunks)) | |
| self.assertEqual(num_lines, len(js_code.split("\n"))) | |
| self.assertIn(js_code, final_code) | |
| def test_chunk_javascript_with_routes(self): | |
| js_code = self.mock_codebase['routes.js'] | |
| chunks = self.code_chunker.chunk(js_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(Chunker.consolidate_chunks_into_file(chunks)) | |
| self.assertEqual(num_lines, len(js_code.split("\n"))) | |
| self.assertIn(js_code, final_code) | |
| def test_chunk_javascript_with_models(self): | |
| js_code = self.mock_codebase['models.js'] | |
| chunks = self.code_chunker.chunk(js_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(Chunker.consolidate_chunks_into_file(chunks)) | |
| self.assertEqual(num_lines, len(js_code.split("\n"))) | |
| self.assertIn(js_code, final_code) | |
| def test_chunk_javascript_with_main(self): | |
| js_code = self.mock_codebase['main.js'] | |
| chunks = self.code_chunker.chunk(js_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(Chunker.consolidate_chunks_into_file(chunks)) | |
| self.assertEqual(num_lines, len(js_code.split("\n"))) | |
| self.assertIn(js_code, final_code) | |
| def test_chunk_javascript_with_utilities(self): | |
| js_code = self.mock_codebase['utilities.js'] | |
| chunks = self.code_chunker.chunk(js_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(Chunker.consolidate_chunks_into_file(chunks)) | |
| self.assertEqual(num_lines, len(js_code.split("\n"))) | |
| self.assertIn(js_code, final_code) | |
| def test_chunk_javascript_with_big_class(self): | |
| js_code = self.mock_codebase['big_class.js'] | |
| chunks = self.code_chunker.chunk(js_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(Chunker.consolidate_chunks_into_file(chunks)) | |
| self.assertEqual(num_lines, len(js_code.split("\n"))) | |
| self.assertIn(js_code, final_code) | |
| def test_chunk_javascript_with_react_component(self): | |
| js_code = self.mock_codebase['react_component.js'] | |
| chunks = self.code_chunker.chunk(js_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(Chunker.consolidate_chunks_into_file(chunks)) | |
| self.assertEqual(num_lines, len(js_code.split("\n"))) | |
| self.assertIn(js_code, final_code) | |
| # CSS Test Class | |
| class TestCodeChunkerCSS(unittest.TestCase): | |
| def setUp(self): | |
| self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens) | |
| self.mock_count_tokens = self.patcher.start() | |
| self.code_chunker = CodeChunker(file_extension='css') | |
| #Load the JSON data | |
| self.mock_codebase = load_json('mock_codefiles.json') | |
| def tearDown(self): | |
| self.patcher.stop() | |
| def test_chunk_css_with_media_query(self): | |
| css_code = self.mock_codebase['media_queries.css'] | |
| chunks = self.code_chunker.chunk(css_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(Chunker.consolidate_chunks_into_file(chunks)) | |
| self.assertEqual(num_lines, len(css_code.split("\n"))) | |
| self.assertIn(css_code, final_code) | |
| def test_chunk_css_with_simple_css(self): | |
| css_code = self.mock_codebase['simple_styles.css'] | |
| chunks = self.code_chunker.chunk(css_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(Chunker.consolidate_chunks_into_file(chunks)) | |
| self.assertEqual(num_lines, len(css_code.split("\n"))) | |
| self.assertIn(css_code, final_code) | |
| # TypeScript Test Class | |
| class TestCodeChunkerTypeScript(unittest.TestCase): | |
| def setUp(self): | |
| self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens) | |
| self.mock_count_tokens = self.patcher.start() | |
| self.code_chunker = CodeChunker(file_extension='ts') | |
| self.mock_codebase = load_json('mock_codefiles.json') | |
| def tearDown(self): | |
| self.patcher.stop() | |
| def test_chunk_typescript_code(self): | |
| ts_code = self.mock_codebase['example.ts'] | |
| chunks = self.code_chunker.chunk(ts_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(final_code) | |
| self.assertEqual(num_lines, len(ts_code.split("\n"))) | |
| self.assertIn(ts_code, final_code) | |
| self.assertGreater(len(chunks), 1) # Ensure the code is actually chunked | |
| # Ruby Test Class | |
| class TestCodeChunkerRuby(unittest.TestCase): | |
| def setUp(self): | |
| self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens) | |
| self.mock_count_tokens = self.patcher.start() | |
| self.code_chunker = CodeChunker(file_extension='rb') | |
| self.mock_codebase = load_json('mock_codefiles.json') | |
| def tearDown(self): | |
| self.patcher.stop() | |
| def test_chunk_ruby_code(self): | |
| rb_code = self.mock_codebase['example.rb'] | |
| chunks = self.code_chunker.chunk(rb_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(final_code) | |
| self.assertEqual(num_lines, len(rb_code.split("\n"))) | |
| self.assertIn(rb_code, final_code) | |
| self.assertGreater(len(chunks), 1) # Ensure the code is actually chunked | |
| # PHP Test Class | |
| class TestCodeChunkerPHP(unittest.TestCase): | |
| def setUp(self): | |
| self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens) | |
| self.mock_count_tokens = self.patcher.start() | |
| self.code_chunker = CodeChunker(file_extension='php') | |
| self.mock_codebase = load_json('mock_codefiles.json') | |
| def tearDown(self): | |
| self.patcher.stop() | |
| def test_chunk_php_code(self): | |
| php_code = self.mock_codebase['example.php'] | |
| chunks = self.code_chunker.chunk(php_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(final_code) | |
| self.assertEqual(num_lines, len(php_code.split("\n"))) | |
| self.assertIn(php_code, final_code) | |
| self.assertGreater(len(chunks), 1) # Ensure the code is actually chunked | |
| # Golang Test Class | |
| class TestCodeChunkerGolang(unittest.TestCase): | |
| def setUp(self): | |
| self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens) | |
| self.mock_count_tokens = self.patcher.start() | |
| self.code_chunker = CodeChunker(file_extension='go') | |
| self.mock_codebase = load_json('mock_codefiles.json') | |
| def tearDown(self): | |
| self.patcher.stop() | |
| def test_chunk_golang_simple_code(self): | |
| go_code = self.mock_codebase['simple.go'] | |
| chunks = self.code_chunker.chunk(go_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(final_code) | |
| self.assertEqual(num_lines, len(go_code.split("\n"))) | |
| self.assertIn(go_code, final_code) | |
| self.assertGreater(len(chunks), 1) # Ensure the code is actually chunked | |
| def test_chunk_golang_with_structs(self): | |
| go_code = self.mock_codebase['structs.go'] | |
| chunks = self.code_chunker.chunk(go_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(final_code) | |
| self.assertEqual(num_lines, len(go_code.split("\n"))) | |
| self.assertIn(go_code, final_code) | |
| self.assertGreater(len(chunks), 1) | |
| def test_chunk_golang_with_interfaces(self): | |
| go_code = self.mock_codebase['interfaces.go'] | |
| chunks = self.code_chunker.chunk(go_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(final_code) | |
| self.assertEqual(num_lines, len(go_code.split("\n"))) | |
| self.assertIn(go_code, final_code) | |
| self.assertGreater(len(chunks), 1) | |
| def test_chunk_golang_with_goroutines(self): | |
| go_code = self.mock_codebase['goroutines.go'] | |
| chunks = self.code_chunker.chunk(go_code, token_limit=20) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(final_code) | |
| self.assertEqual(num_lines, len(go_code.split("\n"))) | |
| self.assertIn(go_code, final_code) | |
| self.assertGreater(len(chunks), 1) | |
| if __name__ == '__main__': | |
| unittest.main() | |