Yi Meng, Xiang Li, Zhiyong Wu 0001, Tingtian Li, Zixun Sun, Xinyu Xiao, Chi Sun, Hui Zhan, Helen Meng. CALM: Constrastive Cross-modal Speaking Style Modeling for Expressive Text-to-Speech Synthesis. In Hanseok Ko, John H. L. Hansen, editors, Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022. pages 5533-5537, ISCA, 2022. [doi]
@inproceedings{MengL0LSXSZM22, title = {CALM: Constrastive Cross-modal Speaking Style Modeling for Expressive Text-to-Speech Synthesis}, author = {Yi Meng and Xiang Li and Zhiyong Wu 0001 and Tingtian Li and Zixun Sun and Xinyu Xiao and Chi Sun and Hui Zhan and Helen Meng}, year = {2022}, doi = {10.21437/Interspeech.2022-11275}, url = {https://doi.org/10.21437/Interspeech.2022-11275}, researchr = {https://researchr.org/publication/MengL0LSXSZM22}, cites = {0}, citedby = {0}, pages = {5533-5537}, booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, Korea, 18-22 September 2022}, editor = {Hanseok Ko and John H. L. Hansen}, publisher = {ISCA}, }