Yan-Jie Li, Fang Cao, Xi-Ren Cao. On-Line Policy Gradient Estimation with Multi-Step Sampling. Discrete Event Dynamic Systems, 20(1):3-17, 2010. [doi]
@article{LiCC10, title = {On-Line Policy Gradient Estimation with Multi-Step Sampling}, author = {Yan-Jie Li and Fang Cao and Xi-Ren Cao}, year = {2010}, doi = {10.1007/s10626-009-0078-3}, url = {http://dx.doi.org/10.1007/s10626-009-0078-3}, researchr = {https://researchr.org/publication/LiCC10}, cites = {0}, citedby = {0}, journal = {Discrete Event Dynamic Systems}, volume = {20}, number = {1}, pages = {3-17}, }