Heyang Zhao, Jiafan He, Dongruo Zhou, Tong Zhang, Quanquan Gu. Variance-Dependent Regret Bounds for Linear Bandits and Reinforcement Learning: Adaptivity and Computational Efficiency. In Gergely Neu, Lorenzo Rosasco, editors, The Thirty Sixth Annual Conference on Learning Theory, 12-15 July 2023, Bangalore, India. Volume 195 of Proceedings of Machine Learning Research, pages 4977-5020, PMLR, 2023. [doi]
@inproceedings{ZhaoHZZG23, title = {Variance-Dependent Regret Bounds for Linear Bandits and Reinforcement Learning: Adaptivity and Computational Efficiency}, author = {Heyang Zhao and Jiafan He and Dongruo Zhou and Tong Zhang and Quanquan Gu}, year = {2023}, url = {https://proceedings.mlr.press/v195/zhao23a.html}, researchr = {https://researchr.org/publication/ZhaoHZZG23}, cites = {0}, citedby = {0}, pages = {4977-5020}, booktitle = {The Thirty Sixth Annual Conference on Learning Theory, 12-15 July 2023, Bangalore, India}, editor = {Gergely Neu and Lorenzo Rosasco}, volume = {195}, series = {Proceedings of Machine Learning Research}, publisher = {PMLR}, }