API Endpoint for journals.

GET /api/articles/49723/?format=api
HTTP 200 OK
Allow: GET
Content-Type: application/json
Vary: Accept

{
    "pk": 49723,
    "title": "Bridging Perception and Language: A Systematic Benchmark for LVLMs' Understanding of Amodal Completion Reports",
    "subtitle": null,
    "abstract": "One of the main objectives in developing large vision-language models (LVLMs) is to engineer systems that can assist humans with multimodal tasks, including interpreting descriptions of perceptual experiences. A central phenomenon in this context is amodal completion, in which people perceive objects even when parts of those objects are hidden. Although numerous studies have assessed whether computer-vision algorithms can detect or reconstruct occluded regions, the inferential abilities of LVLMs on texts related to amodal completion remain unexplored. To address this gap, we constructed a benchmark grounded in Basic Formal Ontology to achieve a systematic classification of amodal completion. Our results indicate that while many LVLMs achieve human-comparable performance overall, their accuracy diverges for certain types of objects being completed. Notably, in certain categories, some LLaVA-NeXT variants and Claude 3.5 Sonnet exhibit lower accuracy on original images compared to blank stimuli lacking visual content. Intriguingly, this disparity emerges only under Japanese prompting, suggesting a deficiency in Japanese-specific linguistic competence among these models.",
    "language": "eng",
    "license": {
        "name": "",
        "short_name": "",
        "text": null,
        "url": ""
    },
    "keywords": [
        {
            "word": "Computer Science; Linguistics; Philosophy; Natural Language Processing; Perception; Semantics of language"
        }
    ],
    "section": "Papers with Poster Presentation",
    "is_remote": true,
    "remote_url": "https://escholarship.org/uc/item/2qd160dz",
    "frozenauthors": [
        {
            "first_name": "Amane",
            "middle_name": "",
            "last_name": "Watahiki",
            "name_suffix": "",
            "institution": "The University of Tokyo",
            "department": ""
        },
        {
            "first_name": "Tomoki",
            "middle_name": "",
            "last_name": "Doi",
            "name_suffix": "",
            "institution": "The University of Tokyo",
            "department": ""
        },
        {
            "first_name": "Taiga",
            "middle_name": "",
            "last_name": "Shinozaki",
            "name_suffix": "",
            "institution": "Keio University",
            "department": ""
        },
        {
            "first_name": "Satoshi",
            "middle_name": "",
            "last_name": "Nishida",
            "name_suffix": "",
            "institution": "National Institute of Information and Communications Technology",
            "department": ""
        },
        {
            "first_name": "takuya",
            "middle_name": "",
            "last_name": "niikawa",
            "name_suffix": "",
            "institution": "Kobe University",
            "department": ""
        },
        {
            "first_name": "Katsunori",
            "middle_name": "",
            "last_name": "Miyahara",
            "name_suffix": "",
            "institution": "Hokkaido University",
            "department": ""
        },
        {
            "first_name": "Hitomi",
            "middle_name": "",
            "last_name": "Yanaka",
            "name_suffix": "",
            "institution": "the University of Tokyo",
            "department": ""
        }
    ],
    "date_submitted": null,
    "date_accepted": null,
    "date_published": "2025-01-01T18:00:00Z",
    "render_galley": null,
    "galleys": [
        {
            "label": "PDF",
            "type": "pdf",
            "path": "https://journalpub.escholarship.org/cognitivesciencesociety/article/49723/galley/37685/download/"
        }
    ]
}