Haoyu Lu, Guoxing Yang, Nanyi Fei, Yuqi Huo, Zhiwu Lu 0001, Ping Luo 0002, Mingyu Ding. VDT: General-purpose Video Diffusion Transformers via Mask Modeling. In The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 7-11, 2024. OpenReview.net, 2024. [doi]
@inproceedings{LuYFH00D24, title = {VDT: General-purpose Video Diffusion Transformers via Mask Modeling}, author = {Haoyu Lu and Guoxing Yang and Nanyi Fei and Yuqi Huo and Zhiwu Lu 0001 and Ping Luo 0002 and Mingyu Ding}, year = {2024}, url = {https://openreview.net/forum?id=Un0rgm9f04}, researchr = {https://researchr.org/publication/LuYFH00D24}, cites = {0}, citedby = {0}, booktitle = {The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 7-11, 2024}, publisher = {OpenReview.net}, }