Jiaqing Cao, Quan Liu, Fei Zhu, Qiming Fu, Shan Zhong. Gradient temporal-difference learning for off-policy evaluation using emphatic weightings. Inf. Sci., 580:311-330, 2021. [doi]
@article{CaoLZFZ21, title = {Gradient temporal-difference learning for off-policy evaluation using emphatic weightings}, author = {Jiaqing Cao and Quan Liu and Fei Zhu and Qiming Fu and Shan Zhong}, year = {2021}, doi = {10.1016/j.ins.2021.08.082}, url = {https://doi.org/10.1016/j.ins.2021.08.082}, researchr = {https://researchr.org/publication/CaoLZFZ21}, cites = {0}, citedby = {0}, journal = {Inf. Sci.}, volume = {580}, pages = {311-330}, }