Kair: A Statistical and Causal Approach to Pinpointing Stragglers in Distributed Model Training

Yitang Yang, Junhong Liu, Jiapeng Chen, Xiaoyang Sun, Tianyu Wo, Chunming Hu, Chengru Song, Jin Ouyang, Renyu Yang. Kair: A Statistical and Causal Approach to Pinpointing Stragglers in Distributed Model Training. In 40th IEEE/ACM International Conference on Automated Software Engineering, ASE 2025, Seoul, Korea, Republic of, November 16-20, 2025. pages 3754-3759, IEEE, 2025. [doi]

@inproceedings{YangLCSWHSOY25,
  title = {Kair: A Statistical and Causal Approach to Pinpointing Stragglers in Distributed Model Training},
  author = {Yitang Yang and Junhong Liu and Jiapeng Chen and Xiaoyang Sun and Tianyu Wo and Chunming Hu and Chengru Song and Jin Ouyang and Renyu Yang},
  year = {2025},
  doi = {10.1109/ASE63991.2025.00311},
  url = {https://doi.org/10.1109/ASE63991.2025.00311},
  researchr = {https://researchr.org/publication/YangLCSWHSOY25},
  cites = {0},
  citedby = {0},
  pages = {3754-3759},
  booktitle = {40th IEEE/ACM International Conference on Automated Software Engineering, ASE 2025, Seoul, Korea, Republic of, November 16-20, 2025},
  publisher = {IEEE},
  isbn = {979-8-3503-5733-2},
}