Naman Agarwal, Syomantak Chaudhuri, Prateek Jain 0002, Dheeraj Mysore Nagaraj, Praneeth Netrapalli. Online Target Q-learning with Reverse Experience Replay: Efficiently finding the Optimal Policy for Linear MDPs. In The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, April 25-29, 2022. OpenReview.net, 2022. [doi]
@inproceedings{AgarwalC0NN22, title = {Online Target Q-learning with Reverse Experience Replay: Efficiently finding the Optimal Policy for Linear MDPs}, author = {Naman Agarwal and Syomantak Chaudhuri and Prateek Jain 0002 and Dheeraj Mysore Nagaraj and Praneeth Netrapalli}, year = {2022}, url = {https://openreview.net/forum?id=HMJdXzbWKH}, researchr = {https://researchr.org/publication/AgarwalC0NN22}, cites = {0}, citedby = {0}, booktitle = {The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, April 25-29, 2022}, publisher = {OpenReview.net}, }