@article{hebeyond, title = {Beyond Referring Expressions: Scenario Comprehension Visual Grounding}, author = {He, Ruozhen and Shah, Nisarg A. and Dong, Qihua and Xiao, Zilin and Koo, Jaywon and Ordonez, Vicente}, journal = {arxiv:2604.02323}, url = {https://arxiv.org/abs/2604.02323}, }