You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
title = {Online learning and online convex optimization},
4
+
journal = {Foundations and Trends® in Machine Learning},
5
+
volume = {4},
6
+
number = {2},
7
+
pages = {107-194},
8
+
year = {2011},
9
+
doi = {10.1561/2200000018},
10
+
}
11
+
12
+
@misc{behrouz2025atlas,
13
+
title = {Atlas: Learning to optimally memorize the context at Test Time},
14
+
author = {Behrouz, Ali and Li, Zeman and Kacham, Praneeth and Daliri, Majid and Deng, Yuan and Zhong, Peilin and Razaviyayn, Meisam and Mirrokni, Vahab},
15
+
year = {2025},
16
+
eprint = {2505.23735},
17
+
archivePrefix = {arXiv},
18
+
primaryClass = {cs.LG},
19
+
url = {https://arxiv.org/abs/2505.23735},
20
+
}
21
+
22
+
@misc{behrouz2025its,
23
+
title = {It's All connected: A journey through test-time memorization, attentional bias, retention, and online optimization},
24
+
author = {Behrouz, Ali and Razaviyayn, Meisam and Zhong, Peilin and Mirrokni, Vahab},
25
+
year = {2025},
26
+
eprint = {2504.13173},
27
+
archivePrefix = {arXiv},
28
+
primaryClass = {cs.LG},
29
+
url = {https://arxiv.org/abs/2504.13173},
30
+
}
31
+
32
+
@misc{behrouz2024titans,
33
+
title = {Titans: Learning to memorize at Test Time},
34
+
author = {Behrouz, Ali and Zhong, Peilin and Mirrokni, Vahab},
35
+
year = {2024},
36
+
eprint = {2501.00663},
37
+
archivePrefix = {arXiv},
38
+
primaryClass = {cs.LG},
39
+
url = {https://arxiv.org/abs/2501.00663},
40
+
}
41
+
42
+
@misc{katharopoulos2020transformers,
43
+
title = {Transformers are RNNS: Fast autoregressive transformers with linear attention},
44
+
author = {Katharopoulos, Angelos and Vyas, Apoorv and Pappas, Nikolaos and Fleuret, François},
45
+
year = {2020},
46
+
eprint = {2006.16236},
47
+
archivePrefix = {arXiv},
48
+
primaryClass = {cs.LG},
49
+
url = {https://arxiv.org/abs/2006.16236},
50
+
}
51
+
52
+
@misc{nichani2024understanding,
53
+
title = {Understanding factual recall in Transformers via associative memories},
54
+
author = {Nichani, Eshaan and Lee, Jason D. and Bietti, Alberto},
55
+
year = {2024},
56
+
eprint = {2412.06538},
57
+
archivePrefix = {arXiv},
58
+
primaryClass = {cs.LG},
59
+
url = {https://arxiv.org/abs/2412.06538},
60
+
}
61
+
62
+
@misc{sun2023retentive,
63
+
title = {Retentive network: A successor to Transformer for large language models},
64
+
author = {Sun, Yutao and Dong, Li and Huang, Shaohan and Ma, Shuming and Xia, Yuqing and Xue, Jilong and Wang, Jianyong and Wei, Furu},
65
+
year = {2023},
66
+
eprint = {2307.08621},
67
+
archivePrefix = {arXiv},
68
+
primaryClass = {cs.LG},
69
+
url = {https://arxiv.org/abs/2307.08621},
70
+
}
71
+
72
+
@misc{sun2025learning,
73
+
title = {Learning to (Learn at test time): RNNS with Expressive Hidden States},
74
+
author = {Sun, Yu and Li, Xinhao and Dalal, Karan and Xu, Jiarui and Vikram, Arjun and Zhang, Genghan and Dubois, Yann and Chen, Xinlei and Wang, Xiaolong and Koyejo, Sanmi and et al.},
75
+
year = {2025},
76
+
eprint = {2407.04620},
77
+
archivePrefix = {arXiv},
78
+
primaryClass = {cs.LG},
79
+
url = {https://arxiv.org/abs/2407.04620},
80
+
}
81
+
82
+
@misc{voswald2024uncovering,
83
+
title = {Uncovering Mesa-optimization algorithms in transformers},
84
+
author = {von Oswald, Johannes and Schlegel, Maximilian and Meulemans, Alexander and Kobayashi, Seijin and Niklasson, Eyvind and Zucchet, Nicolas and Scherrer, Nino and Miller, Nolan and Sandler, Mark and Arcas, Blaise Agüera y and et al.},
85
+
year = {2024},
86
+
eprint = {2309.05858},
87
+
archivePrefix = {arXiv},
88
+
primaryClass = {cs.LG},
89
+
url = {https://arxiv.org/abs/2309.05858},
90
+
}
91
+
92
+
@misc{wang2025testtime,
93
+
title = {Test-time regression: A unifying framework for designing sequence models with associative memory},
94
+
author = {Wang, Ke Alexander and Shi, Jiaxin and Fox, Emily B.},
95
+
year = {2025},
96
+
eprint = {2501.12352},
97
+
archivePrefix = {arXiv},
98
+
primaryClass = {cs.LG},
99
+
url = {https://arxiv.org/abs/2501.12352},
100
+
}
101
+
102
+
@misc{yang2025gated,
103
+
title = {Gated Delta Networks: Improving Mamba2 with delta rule},
104
+
author = {Yang, Songlin and Kautz, Jan and Hatamizadeh, Ali},
105
+
year = {2025},
106
+
eprint = {2412.06464},
107
+
archivePrefix = {arXiv},
108
+
primaryClass = {cs.LG},
109
+
url = {https://arxiv.org/abs/2412.06464},
110
+
}
111
+
112
+
@misc{yang2025parallelizing,
113
+
title = {Parallelizing Linear Transformers with the delta rule over sequence length},
114
+
author = {Yang, Songlin and Wang, Bailin and Zhang, Yu and Shen, Yikang and Kim, Yoon},
0 commit comments