You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
title = {{PyRIT}: A Framework for Security Risk Identification and Red Teaming in Generative {AI} Systems},
646
-
author = {Gary D. Lopez Munoz and Amanda J. Minnich and Roman Lutz and Richard Lundeen and Raja Sekhar Rao Dheekonda and Nina Chikanov and Bolor-Erdene Jagdagdorj and Martin Pouliot and Shiven Chawla and Whitney Maxwell and Blake Bullwinkel and Katherine Pratt and Joris de Gruyter and Charlotte Siska and Pete Bryan and Tori Westerhoff and Chang Kawaguchi and Christian Seifert and Ram Shankar Siva Kumar and Yonatan Zunger},
651
+
author = {Gary D. {Lopez Munoz} and Amanda J. Minnich and Roman Lutz and Richard Lundeen and Raja Sekhar Rao Dheekonda and Nina Chikanov and Bolor-Erdene Jagdagdorj and Martin Pouliot and Shiven Chawla and Whitney Maxwell and Blake Bullwinkel and Katherine Pratt and Joris de Gruyter and Charlotte Siska and Pete Bryan and Tori Westerhoff and Chang Kawaguchi and Christian Seifert and Ram Shankar Siva Kumar and Yonatan Zunger},
647
652
journal = {arXiv preprint arXiv:2410.02828},
648
653
year = {2024},
649
654
url = {https://arxiv.org/abs/2410.02828},
@@ -667,12 +672,13 @@ @inproceedings{wang2025siuo
667
672
note = {Introduces the {SIUO} (Safe Inputs but Unsafe Output) benchmark},
668
673
}
669
674
670
-
@misc{darkbench2025,
671
-
title = {{DarkBench}: A Comprehensive Benchmark for Dark Design Patterns in Large Language Models},
672
-
author = {{Apart Research}},
675
+
@inproceedings{darkbench2025,
676
+
title = {{DarkBench}: Benchmarking Dark Patterns in Large Language Models},
677
+
author = {Esben Kran and Hieu Minh Nguyen and Akash Kundu and Sami Jawhar and Jinsuk Park and Mateusz Maria Jurewicz},
678
+
booktitle = {International Conference on Learning Representations (ICLR)},
0 commit comments