Lawrence K. Saul, Satinder P. Singh. Learning Curve Bounds for a Markov Decision Process with Undiscounted Rewards. In COLT. pages 147-156, 1996. [doi]
@inproceedings{SaulS96, title = {Learning Curve Bounds for a Markov Decision Process with Undiscounted Rewards}, author = {Lawrence K. Saul and Satinder P. Singh}, year = {1996}, doi = {10.1145/238061.238084}, url = {http://doi.acm.org/10.1145/238061.238084}, tags = {Markov}, researchr = {https://researchr.org/publication/SaulS96}, cites = {0}, citedby = {0}, pages = {147-156}, booktitle = {COLT}, }