Odalric-Ambrym Maillard, Rémi Munos, Gilles Stoltz. A Finite-Time Analysis of Multi-armed Bandits Problems with Kullback-Leibler Divergences. Journal of Machine Learning Research, 19:497-514, 2011. [doi]
@article{MaillardMS11, title = {A Finite-Time Analysis of Multi-armed Bandits Problems with Kullback-Leibler Divergences}, author = {Odalric-Ambrym Maillard and Rémi Munos and Gilles Stoltz}, year = {2011}, url = {http://www.jmlr.org/proceedings/papers/v19/maillard11a/maillard11a.pdf}, researchr = {https://researchr.org/publication/MaillardMS11}, cites = {0}, citedby = {0}, journal = {Journal of Machine Learning Research}, volume = {19}, pages = {497-514}, }