Article Instance
API Endpoint for journals.
GET /api/articles/49792/?format=api
{ "pk": 49792, "title": "Large language model tokens are psychologically salient", "subtitle": null, "abstract": "Large language models segment words into chunks called tokens, using compression algorithms that ignore semantics. We investigated whether tokenization corrupts representations of word meanings in 17 languages. We found that GPT-4o and Llama 3 inflate the similarity of words that share tokens. However, tokens turned out to be good predictors of orthographic priming, such that people recognize a target word faster after reading a prime that ends with the same token. This boost in priming far exceeds what other overlapping strings of letters explain, which suggests that tokenization selectively identifies functional subword units. The pattern extends to the production of word associates in English: Tokens capture phonologically motivated associations, while other strings of letters do not. So, tokenization does influence semantic representations, but because tokens correspond to psychologically salient orthographic and/or phonological constituents, they may endow large language models with human-like language networks and facilitate alignment with human word processing.", "language": "eng", "license": { "name": "", "short_name": "", "text": null, "url": "" }, "keywords": [ { "word": "Artificial Intelligence; Linguistics; Psychology; Natural Language Processing; Semantics of language" } ], "section": "Papers with Poster Presentation", "is_remote": true, "remote_url": "https://escholarship.org/uc/item/13k625dx", "frozenauthors": [ { "first_name": "David", "middle_name": "A.", "last_name": "Haslett", "name_suffix": "", "institution": "Hong Kong University of Science and Technology", "department": "" }, { "first_name": "Antoni", "middle_name": "B.", "last_name": "Chan", "name_suffix": "", "institution": "City University of Hong Kong", "department": "" }, { "first_name": "Janet", "middle_name": "", "last_name": "Hsiao", "name_suffix": "", "institution": "Hong Kong University of Science & Technology", "department": "" } ], "date_submitted": null, "date_accepted": null, "date_published": "2025-01-01T15:00:00-03:00", "render_galley": null, "galleys": [ { "label": "PDF", "type": "pdf", "path": "https://journalpub.escholarship.org/cognitivesciencesociety/article/49792/galley/37754/download/" } ] }