Gen Li 0005, Yuting Wei, Yuejie Chi, Yuxin Chen 0002. Softmax policy gradient methods can take exponential time to converge. Math. Program., 201(1):707-802, 2023. [doi]
@article{0005WC023, title = {Softmax policy gradient methods can take exponential time to converge}, author = {Gen Li 0005 and Yuting Wei and Yuejie Chi and Yuxin Chen 0002}, year = {2023}, doi = {10.1007/s10107-022-01920-6}, url = {https://doi.org/10.1007/s10107-022-01920-6}, researchr = {https://researchr.org/publication/0005WC023}, cites = {0}, citedby = {0}, journal = {Math. Program.}, volume = {201}, number = {1}, pages = {707-802}, }