Zichang Liu, Jue Wang, Tri Dao, Tianyi Zhou 0002, Binhang Yuan, Zhao Song 0002, Anshumali Shrivastava, Ce Zhang 0001, Yuandong Tian, Christopher Ré, Beidi Chen. Deja Vu: Contextual Sparsity for Efficient LLMs at Inference Time. In Andreas Krause 0001, Emma Brunskill, KyungHyun Cho, Barbara Engelhardt, Sivan Sabato, Jonathan Scarlett, editors, International Conference on Machine Learning, ICML 2023, 23-29 July 2023, Honolulu, Hawaii, USA. Volume 202 of Proceedings of Machine Learning Research, pages 22137-22176, PMLR, 2023. [doi]
@inproceedings{LiuWDZY0S0TRC23, title = {Deja Vu: Contextual Sparsity for Efficient LLMs at Inference Time}, author = {Zichang Liu and Jue Wang and Tri Dao and Tianyi Zhou 0002 and Binhang Yuan and Zhao Song 0002 and Anshumali Shrivastava and Ce Zhang 0001 and Yuandong Tian and Christopher Ré and Beidi Chen}, year = {2023}, url = {https://proceedings.mlr.press/v202/liu23am.html}, researchr = {https://researchr.org/publication/LiuWDZY0S0TRC23}, cites = {0}, citedby = {0}, pages = {22137-22176}, booktitle = {International Conference on Machine Learning, ICML 2023, 23-29 July 2023, Honolulu, Hawaii, USA}, editor = {Andreas Krause 0001 and Emma Brunskill and KyungHyun Cho and Barbara Engelhardt and Sivan Sabato and Jonathan Scarlett}, volume = {202}, series = {Proceedings of Machine Learning Research}, publisher = {PMLR}, }