Focus and Align: Learning Tube Tokens for Video-Language Pre-Training

Yongqing Zhu, Xiangyang Li 0002, Mao Zheng, Jiahao Yang, Zihan Wang, Xiaoqian Guo, Zifeng Chai, Yuchen Yuan, Shuqiang Jiang. Focus and Align: Learning Tube Tokens for Video-Language Pre-Training. IEEE Transactions on Multimedia, 25:8036-8050, 2023. [doi]

@article{ZhuLZYWGCYJ23,
  title = {Focus and Align: Learning Tube Tokens for Video-Language Pre-Training},
  author = {Yongqing Zhu and Xiangyang Li 0002 and Mao Zheng and Jiahao Yang and Zihan Wang and Xiaoqian Guo and Zifeng Chai and Yuchen Yuan and Shuqiang Jiang},
  year = {2023},
  doi = {10.1109/TMM.2022.3231108},
  url = {https://doi.org/10.1109/TMM.2022.3231108},
  researchr = {https://researchr.org/publication/ZhuLZYWGCYJ23},
  cites = {0},
  citedby = {0},
  journal = {IEEE Transactions on Multimedia},
  volume = {25},
  pages = {8036-8050},
}