Ting Pan, Lulu Tang, Xinlong Wang, Xin Liu 0044, Shiguang Shan. Consistent multimodal pre-training for visual tokenization. Science in China Series F: Information Sciences, 68(10), 2025. [doi]
@article{PanTWLS25,
title = {Consistent multimodal pre-training for visual tokenization},
author = {Ting Pan and Lulu Tang and Xinlong Wang and Xin Liu 0044 and Shiguang Shan},
year = {2025},
doi = {10.1007/s11432-024-4603-x},
url = {https://doi.org/10.1007/s11432-024-4603-x},
researchr = {https://researchr.org/publication/PanTWLS25},
cites = {0},
citedby = {0},
journal = {Science in China Series F: Information Sciences},
volume = {68},
number = {10},
}