Peter L. Bartlett, Jonathan Baxter. Estimation and Approximation Bounds for Gradient-Based Reinforcement Learning. J. Comput. Syst. Sci., 64(1):133-150, 2002. [doi]
@article{BartlettB02:0, title = {Estimation and Approximation Bounds for Gradient-Based Reinforcement Learning}, author = {Peter L. Bartlett and Jonathan Baxter}, year = {2002}, doi = {10.1006/jcss.2001.1793}, url = {http://dx.doi.org/10.1006/jcss.2001.1793}, tags = {rule-based}, researchr = {https://researchr.org/publication/BartlettB02%3A0}, cites = {0}, citedby = {0}, journal = {J. Comput. Syst. Sci.}, volume = {64}, number = {1}, pages = {133-150}, }