Holmes ⌕ A Benchmark to Assess the Linguistic Competence of Language Models

Andreas Waldis, Yotam Perlitz, Leshem Choshen, Yufang Hou 0001, Iryna Gurevych. Holmes ⌕ A Benchmark to Assess the Linguistic Competence of Language Models. TACL, 12:1616-1647, 2024. [doi]

@article{WaldisWPCCHG24a,
  title = {Holmes ⌕ A Benchmark to Assess the Linguistic Competence of Language Models},
  author = {Andreas Waldis and Yotam Perlitz and Leshem Choshen and Yufang Hou 0001 and Iryna Gurevych},
  year = {2024},
  doi = {10.1162/tacl_a_00718},
  url = {https://doi.org/10.1162/tacl_a_00718},
  researchr = {https://researchr.org/publication/WaldisWPCCHG24a},
  cites = {0},
  citedby = {0},
  journal = {TACL},
  volume = {12},
  pages = {1616-1647},
}