Learning to Speak from Text: Zero-Shot Multilingual Text-to-Speech with Unsupervised Text Pretraining

Takaaki Saeki, Soumi Maiti, Xinjian Li, Shinji Watanabe 0001, Shinnosuke Takamichi, Hiroshi Saruwatari. Learning to Speak from Text: Zero-Shot Multilingual Text-to-Speech with Unsupervised Text Pretraining. In Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence, IJCAI 2023, 19th-25th August 2023, Macao, SAR, China. pages 5179-5187, ijcai.org, 2023. [doi]

@inproceedings{SaekiML0TS23,
  title = {Learning to Speak from Text: Zero-Shot Multilingual Text-to-Speech with Unsupervised Text Pretraining},
  author = {Takaaki Saeki and Soumi Maiti and Xinjian Li and Shinji Watanabe 0001 and Shinnosuke Takamichi and Hiroshi Saruwatari},
  year = {2023},
  doi = {10.24963/ijcai.2023/575},
  url = {https://doi.org/10.24963/ijcai.2023/575},
  researchr = {https://researchr.org/publication/SaekiML0TS23},
  cites = {0},
  citedby = {0},
  pages = {5179-5187},
  booktitle = {Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence, IJCAI 2023, 19th-25th August 2023, Macao, SAR, China},
  publisher = {ijcai.org},
}