Article Instance
API Endpoint for journals.
GET /api/articles/50355/?format=api
{ "pk": 50355, "title": "Exploring the mechanisms that enable multimodal reasoning about data visualizations in vision-language models", "subtitle": null, "abstract": "Humans can readily integrate visual, linguistic, and numerical information to extract meaning from symbolic displays of information. For instance, answering even a simple question about a data visualization requires connecting tokens of language to visual features in the plot to support quantitative inferences. What are the core computational mechanisms that enable integration across modalities to support such reasoning? Open-source vision-language models (VLMs) might provide a useful testbed for investigating these mechanisms, but doing so requires a high degree of experimental control. To achieve this control, we procedurally generated a large dataset containing pairs of questions and data visualizations that varied along several independent and ecologically important dimensions, including the number of observations and how they were distributed. We identified several open VLMs whose performance was sensitive to this variation, establishing their viability for further exploration of the mechanisms underlying multimodal reasoning.", "language": "eng", "license": { "name": "", "short_name": "", "text": null, "url": "" }, "keywords": [ { "word": "Artificial Intelligence; Language understanding; Reasoning; Vision; Neural Networks" } ], "section": "Member Abstracts with Poster Presentation", "is_remote": true, "remote_url": "https://escholarship.org/uc/item/7bs5p3g5", "frozenauthors": [ { "first_name": "Alexa", "middle_name": "R.", "last_name": "Tartaglini", "name_suffix": "", "institution": "Stanford University", "department": "" }, { "first_name": "Christopher", "middle_name": "", "last_name": "Potts", "name_suffix": "", "institution": "Stanford University", "department": "" }, { "first_name": "Judith", "middle_name": "E.", "last_name": "Fan", "name_suffix": "", "institution": "Stanford University", "department": "" } ], "date_submitted": null, "date_accepted": null, "date_published": "2025-01-01T18:00:00Z", "render_galley": null, "galleys": [ { "label": "PDF", "type": "pdf", "path": "https://journalpub.escholarship.org/cognitivesciencesociety/article/50355/galley/38317/download/" } ] }