Yuwu Tang, Ying Hu, Liang He, Hao Huang. A bimodal network based on Audio-Text-Interactional-Attention with ArcFace loss for speech emotion recognition. Speech Communication, 143:21-32, 2022. [doi]
@article{TangHHH22, title = {A bimodal network based on Audio-Text-Interactional-Attention with ArcFace loss for speech emotion recognition}, author = {Yuwu Tang and Ying Hu and Liang He and Hao Huang}, year = {2022}, doi = {10.1016/j.specom.2022.07.004}, url = {https://doi.org/10.1016/j.specom.2022.07.004}, researchr = {https://researchr.org/publication/TangHHH22}, cites = {0}, citedby = {0}, journal = {Speech Communication}, volume = {143}, pages = {21-32}, }