Ahmad Abdelfattah, Stanimire Tomov, Jack J. Dongarra. Fast Batched Matrix Multiplication for Small Sizes Using Half-Precision Arithmetic on GPUs. In 2019 IEEE International Parallel and Distributed Processing Symposium, IPDPS 2019, Rio de Janeiro, Brazil, May 20-24, 2019. pages 111-122, IEEE, 2019. [doi]
@inproceedings{AbdelfattahTD19, title = {Fast Batched Matrix Multiplication for Small Sizes Using Half-Precision Arithmetic on GPUs}, author = {Ahmad Abdelfattah and Stanimire Tomov and Jack J. Dongarra}, year = {2019}, doi = {10.1109/IPDPS.2019.00022}, url = {https://doi.org/10.1109/IPDPS.2019.00022}, researchr = {https://researchr.org/publication/AbdelfattahTD19}, cites = {0}, citedby = {0}, pages = {111-122}, booktitle = {2019 IEEE International Parallel and Distributed Processing Symposium, IPDPS 2019, Rio de Janeiro, Brazil, May 20-24, 2019}, publisher = {IEEE}, isbn = {978-1-7281-1246-6}, }