Stef Van Havermaet, Yara Khaluf, Pieter Simoens. No More Hand-Tuning Rewards: Masked Constrained Policy Optimization for Safe Reinforcement Learning. In Frank Dignum, Alessio Lomuscio, Ulle Endriss, Ann Nowé, editors, AAMAS '21: 20th International Conference on Autonomous Agents and Multiagent Systems, Virtual Event, United Kingdom, May 3-7, 2021. pages 1344-1352, ACM, 2021. [doi]
@inproceedings{HavermaetKS21, title = {No More Hand-Tuning Rewards: Masked Constrained Policy Optimization for Safe Reinforcement Learning}, author = {Stef Van Havermaet and Yara Khaluf and Pieter Simoens}, year = {2021}, url = {https://dl.acm.org/doi/10.5555/3463952.3464107}, researchr = {https://researchr.org/publication/HavermaetKS21}, cites = {0}, citedby = {0}, pages = {1344-1352}, booktitle = {AAMAS '21: 20th International Conference on Autonomous Agents and Multiagent Systems, Virtual Event, United Kingdom, May 3-7, 2021}, editor = {Frank Dignum and Alessio Lomuscio and Ulle Endriss and Ann Nowé}, publisher = {ACM}, isbn = {978-1-4503-8307-3}, }