Vaishnavi Himakunthala, Andy Ouyang, Daniel Rose, Ryan He, Alex Mei, Yujie Lu, Chinmay Sonar, Michael Saxon, William Yang Wang. Let's Think Frame by Frame with VIP: A Video Infilling and Prediction Dataset for Evaluating Video Chain-of-Thought. In Houda Bouamor, Juan Pino 0001, Kalika Bali, editors, Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, EMNLP 2023, Singapore, December 6-10, 2023. pages 204-219, Association for Computational Linguistics, 2023. [doi]
@inproceedings{HimakunthalaORH23, title = {Let's Think Frame by Frame with VIP: A Video Infilling and Prediction Dataset for Evaluating Video Chain-of-Thought}, author = {Vaishnavi Himakunthala and Andy Ouyang and Daniel Rose and Ryan He and Alex Mei and Yujie Lu and Chinmay Sonar and Michael Saxon and William Yang Wang}, year = {2023}, url = {https://aclanthology.org/2023.emnlp-main.15}, researchr = {https://researchr.org/publication/HimakunthalaORH23}, cites = {0}, citedby = {0}, pages = {204-219}, booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, EMNLP 2023, Singapore, December 6-10, 2023}, editor = {Houda Bouamor and Juan Pino 0001 and Kalika Bali}, publisher = {Association for Computational Linguistics}, isbn = {979-8-89176-060-8}, }