Skyler Seto, Maartje ter Hoeve, Maureen de Seyssel, David Grangier. Assessing the Role of Data Quality in Training Bilingual Language Models. In Christos Christodoulopoulos 0001, Tanmoy Chakraborty 0002, Carolyn Rose, Violet Peng, editors, Findings of the Association for Computational Linguistics: EMNLP 2025, Suzhou, China, November 4-9, 2025. pages 22694-22720, Association for Computational Linguistics, 2025. [doi]
@inproceedings{SetoHSG25,
title = {Assessing the Role of Data Quality in Training Bilingual Language Models},
author = {Skyler Seto and Maartje ter Hoeve and Maureen de Seyssel and David Grangier},
year = {2025},
url = {https://aclanthology.org/2025.findings-emnlp.1236/},
researchr = {https://researchr.org/publication/SetoHSG25},
cites = {0},
citedby = {0},
pages = {22694-22720},
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2025, Suzhou, China, November 4-9, 2025},
editor = {Christos Christodoulopoulos 0001 and Tanmoy Chakraborty 0002 and Carolyn Rose and Violet Peng},
publisher = {Association for Computational Linguistics},
isbn = {979-8-89176-335-7},
}