Rethinking algorithm-based fault tolerance with a cooperative software-hardware approach

Dong Li, Zizhong Chen, Panruo Wu, Jeffrey S. Vetter. Rethinking algorithm-based fault tolerance with a cooperative software-hardware approach. In William Gropp, Satoshi Matsuoka, editors, International Conference for High Performance Computing, Networking, Storage and Analysis, SC'13, Denver, CO, USA - November 17 - 21, 2013. pages 44, ACM, 2013. [doi]

@inproceedings{LiCWV13,
  title = {Rethinking algorithm-based fault tolerance with a cooperative software-hardware approach},
  author = {Dong Li and Zizhong Chen and Panruo Wu and Jeffrey S. Vetter},
  year = {2013},
  doi = {10.1145/2503210.2503226},
  url = {http://doi.acm.org/10.1145/2503210.2503226},
  researchr = {https://researchr.org/publication/LiCWV13},
  cites = {0},
  citedby = {0},
  pages = {44},
  booktitle = {International Conference for High Performance Computing, Networking, Storage and Analysis, SC'13, Denver, CO, USA - November 17 - 21, 2013},
  editor = {William Gropp and Satoshi Matsuoka},
  publisher = {ACM},
  isbn = {978-1-4503-2378-9},
}