Jiangsu Du, Jiazhi Jiang, Jiang Zheng, Hongbin Zhang, Dan Huang, Yutong Lu. Improving Computation and Memory Efficiency for Real-world Transformer Inference on GPUs. TACO, 20(4), December 2023. [doi]
@article{DuJZZHL23, title = {Improving Computation and Memory Efficiency for Real-world Transformer Inference on GPUs}, author = {Jiangsu Du and Jiazhi Jiang and Jiang Zheng and Hongbin Zhang and Dan Huang and Yutong Lu}, year = {2023}, month = {December}, doi = {10.1145/3617689}, url = {https://doi.org/10.1145/3617689}, researchr = {https://researchr.org/publication/DuJZZHL23}, cites = {0}, citedby = {0}, journal = {TACO}, volume = {20}, number = {4}, }