@misc{dong2024flexattentionprogrammingmodel, title={Flex Attention: A Programming Model for Generating Optimized Attention Kernels}, author={Juechu Dong and Boyuan Feng and Driss Guessous and Yanbo Liang and Horace He}, year={2024}, eprint={2412.05496}, archivePrefix={arXiv}, primaryClass={cs.LG}, url={https://arxiv.org/abs/2412.05496}, }