Hamidreza Chinaei, Brahim Chaib-draa. Dialogue POMDP components (Part II): learning the reward function. I. J. Speech Technology, 17(4):325-340, 2014. [doi]
@article{ChinaeiC14a, title = {Dialogue POMDP components (Part II): learning the reward function}, author = {Hamidreza Chinaei and Brahim Chaib-draa}, year = {2014}, doi = {10.1007/s10772-014-9224-x}, url = {http://dx.doi.org/10.1007/s10772-014-9224-x}, researchr = {https://researchr.org/publication/ChinaeiC14a}, cites = {0}, citedby = {0}, journal = {I. J. Speech Technology}, volume = {17}, number = {4}, pages = {325-340}, }