Combination of learning from non-optimal demonstrations and feedbacks using inverse reinforcement learning and Bayesian policy improvement

Ali Ezzeddine, Nafee Mourad, Babak Nadjar Araabi, Majid Nili Ahmadabadi. Combination of learning from non-optimal demonstrations and feedbacks using inverse reinforcement learning and Bayesian policy improvement. Expert Syst. Appl., 112:331-341, 2018. [doi]

@article{EzzeddineMAA18,
  title = {Combination of learning from non-optimal demonstrations and feedbacks using inverse reinforcement learning and Bayesian policy improvement},
  author = {Ali Ezzeddine and Nafee Mourad and Babak Nadjar Araabi and Majid Nili Ahmadabadi},
  year = {2018},
  doi = {10.1016/j.eswa.2018.06.035},
  url = {https://doi.org/10.1016/j.eswa.2018.06.035},
  researchr = {https://researchr.org/publication/EzzeddineMAA18},
  cites = {0},
  citedby = {0},
  journal = {Expert Syst. Appl.},
  volume = {112},
  pages = {331-341},
}