Raman Arora, Ofer Dekel, Ambuj Tewari. Deterministic MDPs with Adversarial Rewards and Bandit Feedback. In Nando de Freitas, Kevin P. Murphy, editors, Proceedings of the Twenty-Eighth Conference on Uncertainty in Artificial Intelligence, Catalina Island, CA, USA, August 14-18, 2012. pages 93-101, AUAI Press, 2012. [doi]
@inproceedings{AroraDT12, title = {Deterministic MDPs with Adversarial Rewards and Bandit Feedback}, author = {Raman Arora and Ofer Dekel and Ambuj Tewari}, year = {2012}, url = {http://uai.sis.pitt.edu/displayArticleDetails.jsp?mmnu=1&smnu=2&article_id=2273&proceeding_id=28}, researchr = {https://researchr.org/publication/AroraDT12}, cites = {0}, citedby = {0}, pages = {93-101}, booktitle = {Proceedings of the Twenty-Eighth Conference on Uncertainty in Artificial Intelligence, Catalina Island, CA, USA, August 14-18, 2012}, editor = {Nando de Freitas and Kevin P. Murphy}, publisher = {AUAI Press}, }