Josiah P. Hanna, Scott Niekum, Peter Stone. Importance sampling in reinforcement learning with an estimated behavior policy. Machine Learning, 110(6):1267-1317, 2021. [doi]
@article{HannaNS21, title = {Importance sampling in reinforcement learning with an estimated behavior policy}, author = {Josiah P. Hanna and Scott Niekum and Peter Stone}, year = {2021}, doi = {10.1007/s10994-020-05938-9}, url = {https://doi.org/10.1007/s10994-020-05938-9}, researchr = {https://researchr.org/publication/HannaNS21}, cites = {0}, citedby = {0}, journal = {Machine Learning}, volume = {110}, number = {6}, pages = {1267-1317}, }