Emanuele Bugliarello, Ryan Cotterell, Naoaki Okazaki, Desmond Elliott. Multimodal Pretraining Unmasked: A Meta-Analysis and a Unified Framework of Vision-and-Language BERTs. TACL, 9:978-994, 2021. [doi]
@article{BugliarelloCOE21, title = {Multimodal Pretraining Unmasked: A Meta-Analysis and a Unified Framework of Vision-and-Language BERTs}, author = {Emanuele Bugliarello and Ryan Cotterell and Naoaki Okazaki and Desmond Elliott}, year = {2021}, doi = {10.1162/tacl_a_00408}, url = {https://doi.org/10.1162/tacl_a_00408}, researchr = {https://researchr.org/publication/BugliarelloCOE21}, cites = {0}, citedby = {0}, journal = {TACL}, volume = {9}, pages = {978-994}, }