Article Instance
API Endpoint for journals.
GET /api/articles/24474/?format=api
{ "pk": 24474, "title": "Multimodal Input Aids a Bayesian Model of Phonetic Learning", "subtitle": null, "abstract": "One of the many tasks facing the typically-developing child language learner is learning to discriminate between distinctive sounds that make up words in their native language. We investigate whether multimodal information---specifically adult speech coupled with video frames of speakers' faces---benefits a computational model of phonetic learning. We introduce a method for creating high-quality synthetic videos of speakers' faces for an existing audio corpus. Our learning model, when trained and tested on audiovisual inputs, achieves 8.1% relative improvement on a phoneme discrimination battery compared to a model trained and tested on audio-only input. It outperforms the audio model by 3.9% when tested on audio-only data, suggesting that visual information facilitates the acquisition of acoustic distinctions. In noisy audio environments, our audiovisual model recovers 67% of the loss in performance of the audio model relative to non-noisy environments. These results demonstrate that visual information benefits an ideal learner and illustrate multiple ways that children might leverage visual cues when learning to discriminate speech sounds.", "language": "eng", "license": { "name": "", "short_name": "", "text": null, "url": "" }, "keywords": [ { "word": "Computer Science; Linguistics; Psychology; Face Processing; Language learning; Phonology; Computational Modeling" } ], "section": "Papers with Poster Presentation", "is_remote": true, "remote_url": "https://escholarship.org/uc/item/4n21p3k2", "frozenauthors": [ { "first_name": "Sophia", "middle_name": "", "last_name": "Zhi", "name_suffix": "", "institution": "Massachusetts Institute of Technology", "department": "" }, { "first_name": "Roger", "middle_name": "", "last_name": "Levy", "name_suffix": "", "institution": "Massachusetts Institute of Technology", "department": "" }, { "first_name": "Stephan", "middle_name": "C.", "last_name": "Meylan", "name_suffix": "", "institution": "MIT", "department": "" } ], "date_submitted": null, "date_accepted": null, "date_published": "2024-01-01T18:00:00Z", "render_galley": { "label": "PDF", "type": "pdf", "path": "https://journalpub.escholarship.org/cognitivesciencesociety/article/24474/galley/21390/download/" }, "galleys": [ { "label": "PDF", "type": "pdf", "path": "https://journalpub.escholarship.org/cognitivesciencesociety/article/24474/galley/14071/download/" }, { "label": "PDF", "type": "pdf", "path": "https://journalpub.escholarship.org/cognitivesciencesociety/article/24474/galley/21390/download/" } ] }