Sho Takase, Shun Kiyono. Lessons on Parameter Sharing across Layers in Transformers. In Nafise Sadat Moosavi, Iryna Gurevych, Yufang Hou 0001, Gyuwan Kim, Young-Jin Kim, Tal Schuster, Ameeta Agrawal, editors, Proceedings of The Fourth Workshop on Simple and Efficient Natural Language Processing, SustaiNLP 2023, Toronto, Canada (Hybrid), July 13, 2023. pages 78-90, Association for Computational Linguistics, 2023. [doi]
@inproceedings{TakaseK23, title = {Lessons on Parameter Sharing across Layers in Transformers}, author = {Sho Takase and Shun Kiyono}, year = {2023}, url = {https://aclanthology.org/2023.sustainlp-1.5}, researchr = {https://researchr.org/publication/TakaseK23}, cites = {0}, citedby = {0}, pages = {78-90}, booktitle = {Proceedings of The Fourth Workshop on Simple and Efficient Natural Language Processing, SustaiNLP 2023, Toronto, Canada (Hybrid), July 13, 2023}, editor = {Nafise Sadat Moosavi and Iryna Gurevych and Yufang Hou 0001 and Gyuwan Kim and Young-Jin Kim and Tal Schuster and Ameeta Agrawal}, publisher = {Association for Computational Linguistics}, isbn = {978-1-959429-79-1}, }