Yuan Xie, Boyi Liu, Qiang Liu 0001, Zhaoran Wang, Yuan Zhou, Jian Peng 0001. Off-Policy Evaluation and Learning from Logged Bandit Feedback: Error Reduction via Surrogate Policy. In 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019. OpenReview.net, 2019. [doi]
@inproceedings{XieLLWZP19, title = {Off-Policy Evaluation and Learning from Logged Bandit Feedback: Error Reduction via Surrogate Policy}, author = {Yuan Xie and Boyi Liu and Qiang Liu 0001 and Zhaoran Wang and Yuan Zhou and Jian Peng 0001}, year = {2019}, url = {https://openreview.net/forum?id=HklKui0ct7}, researchr = {https://researchr.org/publication/XieLLWZP19}, cites = {0}, citedby = {0}, booktitle = {7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019}, publisher = {OpenReview.net}, }