Article Instance
API Endpoint for journals.
GET /api/articles/50464/?format=api
{ "pk": 50464, "title": "Probing Perceptual Constancy in Large Vision Language Models", "subtitle": null, "abstract": "Perceptual constancy is the ability to maintain stable perceptions of objects despite changes in sensory input, such as variations in distance, angle, or lighting. This ability is crucial for recognizing visual information in a dynamic world, making it essential for Vision-Language Models (VLMs). However, whether VLMs are currently and theoretically capable of mastering this ability remains underexplored. In this study, we evaluate 33 VLMs using 253 experiments across three domains: color, size, and shape constancy. The experiments include single-image and video adaptations of classic cognitive tasks, along with novel tasks in in-the-wild conditions, to evaluate the models' recognition of object properties under varying conditions. We find significant variability in VLM performance, with models excelling in shape constancy but struggling with color and size constancy. These results suggest that while VLMs are proficient in object recognition, they do not fully replicate the robustness of human perceptual constancy.", "language": "eng", "license": { "name": "", "short_name": "", "text": null, "url": "" }, "keywords": [ { "word": "Artificial Intelligence; Psychology; Machine learning; Perception; Vision" } ], "section": "Member Abstracts with Poster Presentation", "is_remote": true, "remote_url": "https://escholarship.org/uc/item/6sw7g30z", "frozenauthors": [ { "first_name": "Haoran", "middle_name": "", "last_name": "Sun", "name_suffix": "", "institution": "Johns Hopkins University", "department": "" }, { "first_name": "Suyang", "middle_name": "", "last_name": "Yu", "name_suffix": "", "institution": "University of Washington", "department": "" }, { "first_name": "Yijiang", "middle_name": "", "last_name": "Li", "name_suffix": "", "institution": "Electrical and computer engineering", "department": "" }, { "first_name": "Qingying", "middle_name": "", "last_name": "Gao", "name_suffix": "", "institution": "Computer Science", "department": "" }, { "first_name": "Haiyun", "middle_name": "", "last_name": "Lyu", "name_suffix": "", "institution": "University of North Carolina at Chapel Hill", "department": "" }, { "first_name": "Hokin", "middle_name": "", "last_name": "Deng", "name_suffix": "", "institution": "Harvard University", "department": "" }, { "first_name": "Dezhi", "middle_name": "", "last_name": "Luo", "name_suffix": "", "institution": "University of Michigan", "department": "" } ], "date_submitted": null, "date_accepted": null, "date_published": "2025-01-01T10:00:00-08:00", "render_galley": null, "galleys": [ { "label": "PDF", "type": "pdf", "path": "https://journalpub.escholarship.org/cognitivesciencesociety/article/50464/galley/38426/download/" } ] }