Adith Swaminathan, Thorsten Joachims. Batch learning from logged bandit feedback through counterfactual risk minimization. Journal of Machine Learning Research, 16:1731-1755, 2015. [doi]
@article{SwaminathanJ15-1, title = {Batch learning from logged bandit feedback through counterfactual risk minimization}, author = {Adith Swaminathan and Thorsten Joachims}, year = {2015}, url = {http://dl.acm.org/citation.cfm?id=2886805}, researchr = {https://researchr.org/publication/SwaminathanJ15-1}, cites = {0}, citedby = {0}, journal = {Journal of Machine Learning Research}, volume = {16}, pages = {1731-1755}, }