Branden Butler, Sixing Yu, Arya Mazaheri, Ali Jannesari. PipeInfer: Accelerating LLM Inference using Asynchronous Pipelined Speculation. In Proceedings of the International Conference for High Performance Computing, Networking, Storage, and Analysis, SC 2024, Atlanta, GA, USA, November 17-22, 2024. pages 40, IEEE, 2024. [doi]
@inproceedings{ButlerYMJ24,
title = {PipeInfer: Accelerating LLM Inference using Asynchronous Pipelined Speculation},
author = {Branden Butler and Sixing Yu and Arya Mazaheri and Ali Jannesari},
year = {2024},
url = {https://dl.acm.org/doi/10.1109/SC41406.2024.00046},
researchr = {https://researchr.org/publication/ButlerYMJ24},
cites = {0},
citedby = {0},
pages = {40},
booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage, and Analysis, SC 2024, Atlanta, GA, USA, November 17-22, 2024},
publisher = {IEEE},
}