Shulai Zhang, Weihao Cui, Quan Chen, Zhengnian Zhang, Yue Guan, Jingwen Leng, Chao Li, Minyi Guo. PAME: precision-aware multi-exit DNN serving for reducing latencies of batched inferences. In Lawrence Rauchwerger, Kirk W. Cameron, Dimitrios S. Nikolopoulos, Dionisios N. Pnevmatikatos, editors, ICS '22: 2022 International Conference on Supercomputing, Virtual Event, June 28 - 30, 2022. ACM, 2022. [doi]
@inproceedings{ZhangCCZGLLG22,
title = {PAME: precision-aware multi-exit DNN serving for reducing latencies of batched inferences},
author = {Shulai Zhang and Weihao Cui and Quan Chen and Zhengnian Zhang and Yue Guan and Jingwen Leng and Chao Li and Minyi Guo},
year = {2022},
doi = {10.1145/3524059.3532366},
url = {https://doi.org/10.1145/3524059.3532366},
researchr = {https://researchr.org/publication/ZhangCCZGLLG22},
cites = {0},
citedby = {0},
booktitle = {ICS '22: 2022 International Conference on Supercomputing, Virtual Event, June 28 - 30, 2022},
editor = {Lawrence Rauchwerger and Kirk W. Cameron and Dimitrios S. Nikolopoulos and Dionisios N. Pnevmatikatos},
publisher = {ACM},
isbn = {978-1-4503-9281-5},
}