Jenhao Hsiao, Yikang Li, Chiuman Ho. Language-guided Multi-Modal Fusion for Video Action Recognition. In IEEE/CVF International Conference on Computer Vision Workshops, ICCVW 2021, Montreal, BC, Canada, October 11-17, 2021. pages 3151-3155, IEEE, 2021. [doi]
@inproceedings{HsiaoLH21, title = {Language-guided Multi-Modal Fusion for Video Action Recognition}, author = {Jenhao Hsiao and Yikang Li and Chiuman Ho}, year = {2021}, doi = {10.1109/ICCVW54120.2021.00354}, url = {https://doi.org/10.1109/ICCVW54120.2021.00354}, researchr = {https://researchr.org/publication/HsiaoLH21}, cites = {0}, citedby = {0}, pages = {3151-3155}, booktitle = {IEEE/CVF International Conference on Computer Vision Workshops, ICCVW 2021, Montreal, BC, Canada, October 11-17, 2021}, publisher = {IEEE}, isbn = {978-1-6654-0191-3}, }