Yuanhao Wang, Kefan Dong, Xiaoyu Chen, Liwei Wang. Q-learning with UCB Exploration is Sample Efficient for Infinite-Horizon MDP. In 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26-30, 2020. OpenReview.net, 2020. [doi]
@inproceedings{WangDCW20, title = {Q-learning with UCB Exploration is Sample Efficient for Infinite-Horizon MDP}, author = {Yuanhao Wang and Kefan Dong and Xiaoyu Chen and Liwei Wang}, year = {2020}, url = {https://openreview.net/forum?id=BkglSTNFDB}, researchr = {https://researchr.org/publication/WangDCW20}, cites = {0}, citedby = {0}, booktitle = {8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26-30, 2020}, publisher = {OpenReview.net}, }