Angela Fan, Edouard Grave, Armand Joulin. Reducing Transformer Depth on Demand with Structured Dropout. In 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26-30, 2020. OpenReview.net, 2020. [doi]
@inproceedings{FanGJ20, title = {Reducing Transformer Depth on Demand with Structured Dropout}, author = {Angela Fan and Edouard Grave and Armand Joulin}, year = {2020}, url = {https://openreview.net/forum?id=SylO2yStDr}, researchr = {https://researchr.org/publication/FanGJ20}, cites = {0}, citedby = {0}, booktitle = {8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26-30, 2020}, publisher = {OpenReview.net}, }