Kush Bhatia, Wenshuo Guo, Jacob Steinhardt. Reward Learning as Doubly Nonparametric Bandits: Optimal Design and Scaling Laws. In Francisco J. R. Ruiz, Jennifer G. Dy, Jan-Willem van de Meent, editors, International Conference on Artificial Intelligence and Statistics, 25-27 April 2023, Palau de Congressos, Valencia, Spain. Volume 206 of Proceedings of Machine Learning Research, pages 11149-11171, PMLR, 2023. [doi]
@inproceedings{BhatiaGS23, title = {Reward Learning as Doubly Nonparametric Bandits: Optimal Design and Scaling Laws}, author = {Kush Bhatia and Wenshuo Guo and Jacob Steinhardt}, year = {2023}, url = {https://proceedings.mlr.press/v206/bhatia23a.html}, researchr = {https://researchr.org/publication/BhatiaGS23}, cites = {0}, citedby = {0}, pages = {11149-11171}, booktitle = {International Conference on Artificial Intelligence and Statistics, 25-27 April 2023, Palau de Congressos, Valencia, Spain}, editor = {Francisco J. R. Ruiz and Jennifer G. Dy and Jan-Willem van de Meent}, volume = {206}, series = {Proceedings of Machine Learning Research}, publisher = {PMLR}, }