cite.bib (1599B)
1 @misc{andriushchenko2023sgd, 2 title={SGD with Large Step Sizes Learns Sparse Features}, 3 author={Maksym Andriushchenko and Aditya Varre and Loucas Pillaud-Vivien and Nicolas Flammarion}, 4 year={2023}, 5 eprint={2210.05337}, 6 archivePrefix={arXiv}, 7 primaryClass={cs.LG} 8 } 9 10 @article{fast_armijo_2022, 11 author = {Hafshejani, Sajad and Gaur, Daya and Hossain, Shahadat and Benkoczi, Robert}, 12 year = {2022}, 13 month = {11}, 14 pages = {}, 15 title = {Fast Armijo line search for stochastic gradient descent}, 16 doi = {10.21203/rs.3.rs-2285238/v1} 17 } 18 19 @book{shalev2014understanding, 20 title={Understanding Machine Learning: From Theory to Algorithms}, 21 author={Shalev-Shwartz, S. and Ben-David, S.}, 22 isbn={9781107057135}, 23 lccn={2014001779}, 24 series={Understanding Machine Learning: From Theory to Algorithms}, 25 url={https://books.google.pt/books?id=ttJkAwAAQBAJ}, 26 year={2014}, 27 publisher={Cambridge University Press} 28 } 29 30 @misc{pillaudvivien2022label, 31 title={Label noise (stochastic) gradient descent implicitly solves the Lasso for quadratic parametrisation}, 32 author={Loucas Pillaud-Vivien and Julien Reygner and Nicolas Flammarion}, 33 year={2022}, 34 eprint={2206.09841}, 35 archivePrefix={arXiv}, 36 primaryClass={stat.ML} 37 } 38 39 40 @misc{li2018stochastic, 41 title={Stochastic Modified Equations and Dynamics of Stochastic Gradient Algorithms I: Mathematical Foundations}, 42 author={Qianxiao Li and Cheng Tai and Weinan E}, 43 year={2018}, 44 eprint={1811.01558}, 45 archivePrefix={arXiv}, 46 primaryClass={cs.LG} 47 } 48