Vladimir Iashin, Esa Rahtu. A Better Use of Audio-Visual Cues: Dense Video Captioning with Bi-modal Transformer. In 31st British Machine Vision Conference 2020, BMVC 2020, Virtual Event, UK, September 7-10, 2020. BMVA Press, 2020. [doi]
@inproceedings{IashinR20-0, title = {A Better Use of Audio-Visual Cues: Dense Video Captioning with Bi-modal Transformer}, author = {Vladimir Iashin and Esa Rahtu}, year = {2020}, url = {https://www.bmvc2020-conference.com/assets/papers/0111.pdf}, researchr = {https://researchr.org/publication/IashinR20-0}, cites = {0}, citedby = {0}, booktitle = {31st British Machine Vision Conference 2020, BMVC 2020, Virtual Event, UK, September 7-10, 2020}, publisher = {BMVA Press}, }