| from typing import Dict | |
| from transformers import PreTrainedTokenizer, AddedToken | |
| class CustomTokenizer(PreTrainedTokenizer): | |
| def __init__(self, **kwargs): | |
| super().__init__(**kwargs) | |
| print("Initializing CustomTokenizer") | |
| def tokenize(self, text): | |
| print("Tokenizing text", text) | |
| return text.split() | |
| def get_vocab(self) -> Dict[str, int]: | |
| return {} |