Article Instance
API Endpoint for journals.
GET /api/articles/49148/?format=api
{ "pk": 49148, "title": "Relational Information Predicts Human Behavior and Neural Responses to Complex Social Scenes", "subtitle": null, "abstract": "Understanding social scenes depends on tracking relational visual information, which is prioritized behaviorally and represented in the superior temporal sulcus (STS), a region involved in processing social scenes. Despite its importance, relational information has been underutilized in computational models of social vision. In this study, we evaluate two neural network models—SocialGNN and RNN Edge—that explicitly incorporate relational cues, and compare their performance to state-of-the-art (SOTA) AI vision models. SocialGNN utilizes a graph neural network to organize input information about each video frame into a graph structure with nodes representing faces and key objects, and edges encoding relational information such as gaze direction and physical contact. RNN Edge is an even simpler model that processes only relational information without node features or graph-based structures. These models were tested on behavioral and neural data from 3-second natural videos of two people engaged in everyday activities, as well as on the PHASE dataset, a collection of 2D animations depicting agent-object interactions inspired by Heider and Simmel. Across both datasets, SocialGNN and RNN Edge achieved strong performance in predicting human behavioral ratings of social interactions and were comparable to SOTA AI models in behavioral encoding tasks, despite being trained on significantly less data and with simpler architectures. Notably, the success of RNN Edge suggests that additional visual features and the graph-based framework of SocialGNN do not significantly enhance performance, underscoring the primacy of gaze and physical contact as essential relational cues. These findings emphasize the importance of integrating relational information into computational models to develop better models of social perception and human-aligned AI.", "language": "eng", "license": { "name": "", "short_name": "", "text": null, "url": "" }, "keywords": [], "section": "Papers with Oral Presentation", "is_remote": true, "remote_url": "https://escholarship.org/uc/item/4680v4ws", "frozenauthors": [ { "first_name": "Wenshuo", "middle_name": "", "last_name": "Qin", "name_suffix": "", "institution": "Johns Hopkins University", "department": "" }, { "first_name": "Manasi", "middle_name": "", "last_name": "Malik", "name_suffix": "", "institution": "Johns Hopkins University", "department": "" }, { "first_name": "Leyla", "middle_name": "", "last_name": "Isik", "name_suffix": "", "institution": "Johns Hopkins University", "department": "" } ], "date_submitted": null, "date_accepted": null, "date_published": "2025-01-01T10:00:00-08:00", "render_galley": null, "galleys": [ { "label": "PDF", "type": "pdf", "path": "https://journalpub.escholarship.org/cognitivesciencesociety/article/49148/galley/37109/download/" }, { "label": "PDF", "type": "pdf", "path": "https://journalpub.escholarship.org/cognitivesciencesociety/article/49148/galley/38654/download/" } ] }