Zihan Zhang, Xiangyang Ji, Simon S. Du. Horizon-Free Reinforcement Learning in Polynomial Time: the Power of Stationary Policies. In Po-Ling Loh, Maxim Raginsky, editors, Conference on Learning Theory, 2-5 July 2022, London, UK. Volume 178 of Proceedings of Machine Learning Research, pages 3858-3904, PMLR, 2022. [doi]
@inproceedings{ZhangJD22-0, title = {Horizon-Free Reinforcement Learning in Polynomial Time: the Power of Stationary Policies}, author = {Zihan Zhang and Xiangyang Ji and Simon S. Du}, year = {2022}, url = {https://proceedings.mlr.press/v178/zhang22a.html}, researchr = {https://researchr.org/publication/ZhangJD22-0}, cites = {0}, citedby = {0}, pages = {3858-3904}, booktitle = {Conference on Learning Theory, 2-5 July 2022, London, UK}, editor = {Po-Ling Loh and Maxim Raginsky}, volume = {178}, series = {Proceedings of Machine Learning Research}, publisher = {PMLR}, }