Jun Xu, Zeng Wei, Long Xia, Yanyan Lan, Dawei Yin, Xueqi Cheng, Ji-Rong Wen. Reinforcement Learning to Rank with Pairwise Policy Gradient. In Jimmy Huang, Yi Chang, Xueqi Cheng, Jaap Kamps, Vanessa Murdock, Ji-Rong Wen, Yiqun Liu, editors, Proceedings of the 43rd International ACM SIGIR conference on research and development in Information Retrieval, SIGIR 2020, Virtual Event, China, July 25-30, 2020. pages 509-518, ACM, 2020. [doi]
@inproceedings{XuWXLYCW20, title = {Reinforcement Learning to Rank with Pairwise Policy Gradient}, author = {Jun Xu and Zeng Wei and Long Xia and Yanyan Lan and Dawei Yin and Xueqi Cheng and Ji-Rong Wen}, year = {2020}, doi = {10.1145/3397271.3401148}, url = {https://doi.org/10.1145/3397271.3401148}, researchr = {https://researchr.org/publication/XuWXLYCW20}, cites = {0}, citedby = {0}, pages = {509-518}, booktitle = {Proceedings of the 43rd International ACM SIGIR conference on research and development in Information Retrieval, SIGIR 2020, Virtual Event, China, July 25-30, 2020}, editor = {Jimmy Huang and Yi Chang and Xueqi Cheng and Jaap Kamps and Vanessa Murdock and Ji-Rong Wen and Yiqun Liu}, publisher = {ACM}, isbn = {978-1-4503-8016-4}, }