Instruction-ViT: Multi-modal prompts for instruction learning in vision transformer

Zhenxiang Xiao, Yuzhong Chen, Junjie Yao, Lu Zhang 0050, Zhengliang Liu, Zihao Wu 0001, Xiaowei Yu, Yi Pan, Lin Zhao, Chong Ma, Xinyu Liu, Wei Liu, Xiang Li 0001, Yixuan Yuan, Dinggang Shen, Dajiang Zhu, Dezhong Yao 0001, Tianming Liu, Xi Jiang 0001. Instruction-ViT: Multi-modal prompts for instruction learning in vision transformer. Information Fusion, 104:102204, April 2024. [doi]

@article{XiaoCYZLWYPZMLLLYSZYLJ24,
  title = {Instruction-ViT: Multi-modal prompts for instruction learning in vision transformer},
  author = {Zhenxiang Xiao and Yuzhong Chen and Junjie Yao and Lu Zhang 0050 and Zhengliang Liu and Zihao Wu 0001 and Xiaowei Yu and Yi Pan and Lin Zhao and Chong Ma and Xinyu Liu and Wei Liu and Xiang Li 0001 and Yixuan Yuan and Dinggang Shen and Dajiang Zhu and Dezhong Yao 0001 and Tianming Liu and Xi Jiang 0001},
  year = {2024},
  month = {April},
  doi = {10.1016/j.inffus.2023.102204},
  url = {https://doi.org/10.1016/j.inffus.2023.102204},
  researchr = {https://researchr.org/publication/XiaoCYZLWYPZMLLLYSZYLJ24},
  cites = {0},
  citedby = {0},
  journal = {Information Fusion},
  volume = {104},
  pages = {102204},
}