José Antonio Hernández López, Boqi Chen, Mootez Saad, Tushar Sharma 0001, Dániel Varró. On Inter-Dataset Code Duplication and Data Leakage in Large Language Models. IEEE Trans. Software Eng., 51(1):192-205, January 2025. [doi]
@article{LopezCSSV25,
title = {On Inter-Dataset Code Duplication and Data Leakage in Large Language Models},
author = {José Antonio Hernández López and Boqi Chen and Mootez Saad and Tushar Sharma 0001 and Dániel Varró},
year = {2025},
month = {January},
doi = {10.1109/TSE.2024.3504286},
url = {https://doi.org/10.1109/TSE.2024.3504286},
researchr = {https://researchr.org/publication/LopezCSSV25},
cites = {0},
citedby = {0},
journal = {IEEE Trans. Software Eng.},
volume = {51},
number = {1},
pages = {192-205},
}