Tsu-Jui Fu, Xin Eric Wang, Scott T. Grafton, Miguel P. Eckstein, William Yang Wang. 3L: Language-based Video Editing via Multi-Modal Multi-Level Transformers. In IEEE/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, June 18-24, 2022. pages 10503-10512, IEEE, 2022. [doi]
@inproceedings{FuWGEW22, title = {3L: Language-based Video Editing via Multi-Modal Multi-Level Transformers}, author = {Tsu-Jui Fu and Xin Eric Wang and Scott T. Grafton and Miguel P. Eckstein and William Yang Wang}, year = {2022}, doi = {10.1109/CVPR52688.2022.01026}, url = {https://doi.org/10.1109/CVPR52688.2022.01026}, researchr = {https://researchr.org/publication/FuWGEW22}, cites = {0}, citedby = {0}, pages = {10503-10512}, booktitle = {IEEE/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, June 18-24, 2022}, publisher = {IEEE}, isbn = {978-1-6654-6946-3}, }