Tong Zhao, Junping Du 0001, Zhe Xue, MeiYu Liang, Aijing Li, Xiaolong Meng, Dandan Liu. ST-VLM: A Spatial-to-Image Multimodal Spatial-Temporal Prediction Framework with Vision-Language Model. In Sven Koenig, Chad Jenkins, Matthew E. Taylor, editors, Fortieth AAAI Conference on Artificial Intelligence, Thirty-Eighth Conference on Innovative Applications of Artificial Intelligence, Sixteenth Symposium on Educational Advances in Artificial Intelligence, AAAI 2026, Singapore, January 20-27, 2026. pages 16441-16449, AAAI Press, 2026. [doi]
@inproceedings{ZhaoDXLLML26,
title = {ST-VLM: A Spatial-to-Image Multimodal Spatial-Temporal Prediction Framework with Vision-Language Model},
author = {Tong Zhao and Junping Du 0001 and Zhe Xue and MeiYu Liang and Aijing Li and Xiaolong Meng and Dandan Liu},
year = {2026},
doi = {10.1609/aaai.v40i19.38683},
url = {https://doi.org/10.1609/aaai.v40i19.38683},
researchr = {https://researchr.org/publication/ZhaoDXLLML26},
cites = {0},
citedby = {0},
pages = {16441-16449},
booktitle = {Fortieth AAAI Conference on Artificial Intelligence, Thirty-Eighth Conference on Innovative Applications of Artificial Intelligence, Sixteenth Symposium on Educational Advances in Artificial Intelligence, AAAI 2026, Singapore, January 20-27, 2026},
editor = {Sven Koenig and Chad Jenkins and Matthew E. Taylor},
publisher = {AAAI Press},
isbn = {978-1-57735-906-7},
}