Jonathan Baxter, Peter L. Bartlett. Infinite-Horizon Policy-Gradient Estimation. J. Artif. Intell. Res. (JAIR), 15:319-350, 2001. [doi]
@article{BaxterB01, title = {Infinite-Horizon Policy-Gradient Estimation}, author = {Jonathan Baxter and Peter L. Bartlett}, year = {2001}, url = {http://www.cs.washington.edu/research/jair/abstracts/baxter01a.html}, researchr = {https://researchr.org/publication/BaxterB01}, cites = {0}, citedby = {0}, journal = {J. Artif. Intell. Res. (JAIR)}, volume = {15}, pages = {319-350}, }