python src/tokenizer/train.py \ --dataset_path "data/raw_dataset/VirusComment.py" \ --template_tokenizer "BAAI/bge-reranker-v2-m3" \ --vocab_size 30000 \ --output_dir "data/tokenizer"