Importance sampling in reinforcement learning with an estimated behavior policy

Josiah P. Hanna, Scott Niekum, Peter Stone. Importance sampling in reinforcement learning with an estimated behavior policy. Machine Learning, 110(6):1267-1317, 2021. [doi]

@article{HannaNS21,
  title = {Importance sampling in reinforcement learning with an estimated behavior policy},
  author = {Josiah P. Hanna and Scott Niekum and Peter Stone},
  year = {2021},
  doi = {10.1007/s10994-020-05938-9},
  url = {https://doi.org/10.1007/s10994-020-05938-9},
  researchr = {https://researchr.org/publication/HannaNS21},
  cites = {0},
  citedby = {0},
  journal = {Machine Learning},
  volume = {110},
  number = {6},
  pages = {1267-1317},
}