CRAB: Cross-environment Agent Benchmark for Multimodal Language Model Agents

Tianqi Xu, Linyao Chen, Dai-Jie Wu, Yanjun Chen, Zecheng Zhang, Xiang Yao, Zhiqiang Xie, Yongchao Chen, Shilong Liu, Bochen Qian, Anjie Yang, Zhaoxuan Jin, Jianbo Deng, Philip Torr 0001, Bernard Ghanem, Guohao Li 0001. CRAB: Cross-environment Agent Benchmark for Multimodal Language Model Agents. In Wanxiang Che, Joyce Nabende, Ekaterina Shutova, Mohammad Taher Pilehvar, editors, Findings of the Association for Computational Linguistics, ACL 2025, Vienna, Austria, July 27 - August 1, 2025. pages 21607-21647, Association for Computational Linguistics, 2025. [doi]

@inproceedings{XuCWCZYXCLQYJD025,
  title = {CRAB: Cross-environment Agent Benchmark for Multimodal Language Model Agents},
  author = {Tianqi Xu and Linyao Chen and Dai-Jie Wu and Yanjun Chen and Zecheng Zhang and Xiang Yao and Zhiqiang Xie and Yongchao Chen and Shilong Liu and Bochen Qian and Anjie Yang and Zhaoxuan Jin and Jianbo Deng and Philip Torr 0001 and Bernard Ghanem and Guohao Li 0001},
  year = {2025},
  url = {https://aclanthology.org/2025.findings-acl.1113/},
  researchr = {https://researchr.org/publication/XuCWCZYXCLQYJD025},
  cites = {0},
  citedby = {0},
  pages = {21607-21647},
  booktitle = {Findings of the Association for Computational Linguistics, ACL 2025, Vienna, Austria, July 27 - August 1, 2025},
  editor = {Wanxiang Che and Joyce Nabende and Ekaterina Shutova and Mohammad Taher Pilehvar},
  publisher = {Association for Computational Linguistics},
  isbn = {979-8-89176-256-5},
}