Aran Komatsuzaki, Joan Puigcerver, James Lee-Thorp, Carlos Riquelme Ruiz, Basil Mustafa, Joshua Ainslie, Yi Tay, Mostafa Dehghani 0001, Neil Houlsby. Sparse Upcycling: Training Mixture-of-Experts from Dense Checkpoints. In The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023. OpenReview.net, 2023. [doi]
@inproceedings{KomatsuzakiPLRM23, title = {Sparse Upcycling: Training Mixture-of-Experts from Dense Checkpoints}, author = {Aran Komatsuzaki and Joan Puigcerver and James Lee-Thorp and Carlos Riquelme Ruiz and Basil Mustafa and Joshua Ainslie and Yi Tay and Mostafa Dehghani 0001 and Neil Houlsby}, year = {2023}, url = {https://openreview.net/pdf?id=T5nUQDrM4u}, researchr = {https://researchr.org/publication/KomatsuzakiPLRM23}, cites = {0}, citedby = {0}, booktitle = {The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023}, publisher = {OpenReview.net}, }