cite.bib (1319B)
1 @misc{andriushchenko2023sgd, 2 title={SGD with Large Step Sizes Learns Sparse Features}, 3 author={Maksym Andriushchenko and Aditya Varre and Loucas Pillaud-Vivien and Nicolas Flammarion}, 4 year={2023}, 5 eprint={2210.05337}, 6 archivePrefix={arXiv}, 7 primaryClass={cs.LG} 8 } 9 10 @book{shalev2014understanding, 11 title={Understanding Machine Learning: From Theory to Algorithms}, 12 author={Shalev-Shwartz, S. and Ben-David, S.}, 13 isbn={9781107057135}, 14 lccn={2014001779}, 15 series={Understanding Machine Learning: From Theory to Algorithms}, 16 url={https://books.google.pt/books?id=ttJkAwAAQBAJ}, 17 year={2014}, 18 publisher={Cambridge University Press} 19 } 20 21 @misc{li2018stochastic, 22 title={Stochastic Modified Equations and Dynamics of Stochastic Gradient Algorithms I: Mathematical Foundations}, 23 author={Qianxiao Li and Cheng Tai and Weinan E}, 24 year={2018}, 25 eprint={1811.01558}, 26 archivePrefix={arXiv}, 27 primaryClass={cs.LG} 28 } 29 30 @misc{pillaudvivien2022label, 31 title={Label noise (stochastic) gradient descent implicitly solves the Lasso for quadratic parametrisation}, 32 author={Loucas Pillaud-Vivien and Julien Reygner and Nicolas Flammarion}, 33 year={2022}, 34 eprint={2206.09841}, 35 archivePrefix={arXiv}, 36 primaryClass={stat.ML} 37 }