Yanying Lin, Yanbo Li, Shijie Peng, Yingfei Tang, Shutian Luo, Haiying Shen, Cheng-Zhong Xu 0001, Kejiang Ye. QUART: Latency-Aware FaaS System for Pipelining Large Model Inference. In 44th IEEE International Conference on Distributed Computing Systems, ICDCS 2024, Jersey City, NJ, USA, July 23-26, 2024. pages 1-12, IEEE, 2024. [doi]
@inproceedings{LinLPTLSXY24,
title = {QUART: Latency-Aware FaaS System for Pipelining Large Model Inference},
author = {Yanying Lin and Yanbo Li and Shijie Peng and Yingfei Tang and Shutian Luo and Haiying Shen and Cheng-Zhong Xu 0001 and Kejiang Ye},
year = {2024},
doi = {10.1109/ICDCS60910.2024.00010},
url = {https://doi.org/10.1109/ICDCS60910.2024.00010},
researchr = {https://researchr.org/publication/LinLPTLSXY24},
cites = {0},
citedby = {0},
pages = {1-12},
booktitle = {44th IEEE International Conference on Distributed Computing Systems, ICDCS 2024, Jersey City, NJ, USA, July 23-26, 2024},
publisher = {IEEE},
isbn = {979-8-3503-8605-9},
}