Ali Ezzeddine, Nafee Mourad, Babak Nadjar Araabi, Majid Nili Ahmadabadi. Combination of learning from non-optimal demonstrations and feedbacks using inverse reinforcement learning and Bayesian policy improvement. Expert Syst. Appl., 112:331-341, 2018. [doi]
@article{EzzeddineMAA18, title = {Combination of learning from non-optimal demonstrations and feedbacks using inverse reinforcement learning and Bayesian policy improvement}, author = {Ali Ezzeddine and Nafee Mourad and Babak Nadjar Araabi and Majid Nili Ahmadabadi}, year = {2018}, doi = {10.1016/j.eswa.2018.06.035}, url = {https://doi.org/10.1016/j.eswa.2018.06.035}, researchr = {https://researchr.org/publication/EzzeddineMAA18}, cites = {0}, citedby = {0}, journal = {Expert Syst. Appl.}, volume = {112}, pages = {331-341}, }