Jonathan Baxter, Peter L. Bartlett, Lex Weaver. Experiments with Infinite-Horizon, Policy-Gradient Estimation. J. Artif. Intell. Res. (JAIR), 15:351-381, 2001. [doi]
@article{BaxterBW01, title = {Experiments with Infinite-Horizon, Policy-Gradient Estimation}, author = {Jonathan Baxter and Peter L. Bartlett and Lex Weaver}, year = {2001}, url = {http://www.cs.washington.edu/research/jair/abstracts/baxter01b.html}, researchr = {https://researchr.org/publication/BaxterBW01}, cites = {0}, citedby = {0}, journal = {J. Artif. Intell. Res. (JAIR)}, volume = {15}, pages = {351-381}, }