Multimodal Pretraining Unmasked: A Meta-Analysis and a Unified Framework of Vision-and-Language BERTs

Emanuele Bugliarello, Ryan Cotterell, Naoaki Okazaki, Desmond Elliott. Multimodal Pretraining Unmasked: A Meta-Analysis and a Unified Framework of Vision-and-Language BERTs. TACL, 9:978-994, 2021. [doi]

@article{BugliarelloCOE21,
  title = {Multimodal Pretraining Unmasked: A Meta-Analysis and a Unified Framework of Vision-and-Language BERTs},
  author = {Emanuele Bugliarello and Ryan Cotterell and Naoaki Okazaki and Desmond Elliott},
  year = {2021},
  doi = {10.1162/tacl_a_00408},
  url = {https://doi.org/10.1162/tacl_a_00408},
  researchr = {https://researchr.org/publication/BugliarelloCOE21},
  cites = {0},
  citedby = {0},
  journal = {TACL},
  volume = {9},
  pages = {978-994},
}