The table below presents the performance of 14 models on DSI-Bench under two evaluation protocols: Sample-wise (each augmented video treated independently) and Group-wise (fraction of original video groups with ≥3 correct predictions among variants).
@misc{zhang2025dsibenchbenchmarkdynamicspatial, title={DSI-Bench: A Benchmark for Dynamic Spatial Intelligence}, author={Ziang Zhang and Zehan Wang and Guanghao Zhang and Weilong Dai and Yan Xia and Ziang Yan and Minjie Hong and Zhou Zhao}, year={2025}, eprint={2510.18873}, archivePrefix={arXiv}, primaryClass={cs.CV}, url={https://arxiv.org/abs/2510.18873}, }