Peter Chen, Xiaopeng Li, Ziniu Li, Xi Chen, Tianyi Lin. Stepwise Guided Policy Optimization: Coloring Your Incorrect Reasoning in GRPO. Trans. Mach. Learn. Res., 2026, 2026. [doi]
@article{ChenLLCL26,
title = {Stepwise Guided Policy Optimization: Coloring Your Incorrect Reasoning in GRPO},
author = {Peter Chen and Xiaopeng Li and Ziniu Li and Xi Chen and Tianyi Lin},
year = {2026},
url = {https://openreview.net/forum?id=ALnVAqtshR},
researchr = {https://researchr.org/publication/ChenLLCL26},
cites = {0},
citedby = {0},
journal = {Trans. Mach. Learn. Res.},
volume = {2026},
}