Stanislav Fort. Scaling Laws for Adversarial Attacks on Language Model Activations and Tokens. In The Thirteenth International Conference on Learning Representations, ICLR 2025, Singapore, April 24-28, 2025. OpenReview.net, 2025. [doi]
@inproceedings{Fort25,
title = {Scaling Laws for Adversarial Attacks on Language Model Activations and Tokens},
author = {Stanislav Fort},
year = {2025},
url = {https://openreview.net/forum?id=YzxMu1asQi},
researchr = {https://researchr.org/publication/Fort25},
cites = {0},
citedby = {0},
booktitle = {The Thirteenth International Conference on Learning Representations, ICLR 2025, Singapore, April 24-28, 2025},
publisher = {OpenReview.net},
}