Spaces:
Running
Running
| import unittest | |
| from unittest.mock import patch | |
| import tiktoken | |
| import sys | |
| import os | |
| # Add parent directory to path to allow imports | |
| sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) | |
| from Chunker import Chunker, CodeChunker | |
| from utils import load_json | |
| # Mocking the count_tokens function as it's external and not the focus of these tests | |
| def mock_count_tokens(string: str, encoding_name='gpt-4') -> int: | |
| """Returns the number of tokens in a text string.""" | |
| encoding = tiktoken.encoding_for_model(encoding_name) | |
| num_tokens = len(encoding.encode(string)) | |
| return num_tokens | |
| class BaseChunkerTest(unittest.TestCase): | |
| """Base class for all code chunker tests with common setup and utilities.""" | |
| def setUp(self): | |
| self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens) | |
| self.mock_count_tokens = self.patcher.start() | |
| self.mock_codebase = load_json('mock_codefiles.json') | |
| def tearDown(self): | |
| self.patcher.stop() | |
| def run_chunker_test(self, code, token_limit=20): | |
| """Helper method to run standard chunker tests.""" | |
| chunks = self.code_chunker.chunk(code, token_limit=token_limit) | |
| Chunker.print_chunks(chunks) | |
| final_code = Chunker.consolidate_chunks_into_file(chunks) | |
| num_lines = Chunker.count_lines(final_code) | |
| # Common assertions | |
| self.assertEqual(num_lines, len(code.split("\n"))) | |
| self.assertIn(code, final_code) | |
| return chunks, final_code |