Grzegorz Chrupala. Visually Grounded Models of Spoken Language: A Survey of Datasets, Architectures and Evaluation Techniques. J. Artif. Intell. Res. (JAIR), 73:673-707, 2022. [doi]
@article{Chrupala22, title = {Visually Grounded Models of Spoken Language: A Survey of Datasets, Architectures and Evaluation Techniques}, author = {Grzegorz Chrupala}, year = {2022}, doi = {10.1613/jair.1.12967}, url = {https://doi.org/10.1613/jair.1.12967}, researchr = {https://researchr.org/publication/Chrupala22}, cites = {0}, citedby = {0}, journal = {J. Artif. Intell. Res. (JAIR)}, volume = {73}, pages = {673-707}, }