Yoshua Bengio, Patrice Simard, Paolo Frasconi. Learning long-term dependencies with gradient descent is difficult. IEEE Transactions on Neural Networks, 5(2):157-166, 1994. [doi]
@article{BengioSF94, title = {Learning long-term dependencies with gradient descent is difficult}, author = {Yoshua Bengio and Patrice Simard and Paolo Frasconi}, year = {1994}, doi = {10.1109/72.279181}, url = {http://dx.doi.org/10.1109/72.279181}, researchr = {https://researchr.org/publication/BengioSF94}, cites = {0}, citedby = {0}, journal = {IEEE Transactions on Neural Networks}, volume = {5}, number = {2}, pages = {157-166}, }