Article Instance
API Endpoint for journals.
GET /api/articles/49728/?format=api
{ "pk": 49728, "title": "Do Large Vision-Language Models Distinguish between the Actual and Apparent Features of Illusions?", "subtitle": null, "abstract": "Research has begun exploring the performance of large vision language models (LVLMs) in recognizing illusions. However, studies often have not distinguished actual and apparent features, leading to ambiguous assessments of machine cognition. \nWe introduce a visual question answering (VQA) dataset, categorized into genuine and fake illusions. Genuine illusions present discrepancies between actual and apparent features, whereas fake illusions have the same actual and apparent features even though they look illusory. We evaluate the performance of LVLMs for genuine and fake illusion VQA tasks and investigate whether the models discern actual and apparent features. Our findings indicate that although LVLMs may appear to recognize illusions by correctly answering questions about both feature types, they predict the same answers for both Genuine Illusion and Fake Illusion VQA questions. This suggests that their responses might be based on prior knowledge of illusions rather than genuine visual understanding.", "language": "eng", "license": { "name": "", "short_name": "", "text": null, "url": "" }, "keywords": [ { "word": "Computer Science; Natural Language Processing; Perception; Vision; Neural Networks" } ], "section": "Papers with Poster Presentation", "is_remote": true, "remote_url": "https://escholarship.org/uc/item/76m3w8tq", "frozenauthors": [ { "first_name": "Taiga", "middle_name": "", "last_name": "Shinozaki", "name_suffix": "", "institution": "Keio University", "department": "" }, { "first_name": "Tomoki", "middle_name": "", "last_name": "Doi", "name_suffix": "", "institution": "The University of Tokyo", "department": "" }, { "first_name": "Amane", "middle_name": "", "last_name": "Watahiki", "name_suffix": "", "institution": "The University of Tokyo", "department": "" }, { "first_name": "Satoshi", "middle_name": "", "last_name": "Nishida", "name_suffix": "", "institution": "National Institute of Information and Communications Technology", "department": "" }, { "first_name": "Hitomi", "middle_name": "", "last_name": "Yanaka", "name_suffix": "", "institution": "the University of Tokyo", "department": "" } ], "date_submitted": null, "date_accepted": null, "date_published": "2025-01-01T18:00:00Z", "render_galley": null, "galleys": [ { "label": "PDF", "type": "pdf", "path": "https://journalpub.escholarship.org/cognitivesciencesociety/article/49728/galley/37690/download/" } ] }