Nadezhda Chirkova, Sergey Troshin. CodeBPE: Investigating Subtokenization Options for Large Language Model Pretraining on Source Code. In The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023. OpenReview.net, 2023. [doi]
@inproceedings{ChirkovaT23, title = {CodeBPE: Investigating Subtokenization Options for Large Language Model Pretraining on Source Code}, author = {Nadezhda Chirkova and Sergey Troshin}, year = {2023}, url = {https://openreview.net/pdf?id=htL4UZ344nF}, researchr = {https://researchr.org/publication/ChirkovaT23}, cites = {0}, citedby = {0}, booktitle = {The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023}, publisher = {OpenReview.net}, }