Article Instance
API Endpoint for journals.
GET /api/articles/27023/?format=api
{ "pk": 27023, "title": "Learning to reinforcement learn", "subtitle": null, "abstract": "In recent years deep reinforcement learning (RL) systems have attained superhuman performance in a number ofchallenging task domains, but are constrained by a demand for large training sets. A critical present objective is thus to developdeep RL methods that can adapt rapidly to new tasks. In the present work we introduce a novel approach to this challenge,which we refer to as deep meta-reinforcement learning. Previous work has shown that recurrent networks can support meta-learning in a fully supervised context. We extend this approach to the RL setting. What emerges is a system that is trainedusing one RL algorithm, but whose recurrent dynamics implement a second, quite separate RL procedure. This second, learnedRL algorithm can differ from the original one in arbitrary ways and exploit structure in the training domain. We unpack thesepoints in five proof-of-concept experiments to examine key aspects of deep meta-RL.", "language": "eng", "license": { "name": "", "short_name": "", "text": null, "url": "" }, "keywords": [], "section": "Talks: Papers", "is_remote": true, "remote_url": "https://escholarship.org/uc/item/1tn6q2t7", "frozenauthors": [ { "first_name": "Jane", "middle_name": "", "last_name": "Wang", "name_suffix": "", "institution": "DeepMind", "department": "" }, { "first_name": "Zeb", "middle_name": "", "last_name": "Kurth-Nelson", "name_suffix": "", "institution": "DeepMind", "department": "" }, { "first_name": "Hubert", "middle_name": "", "last_name": "Soyer", "name_suffix": "", "institution": "DeepMind", "department": "" }, { "first_name": "Joel", "middle_name": "", "last_name": "Leibo", "name_suffix": "", "institution": "DeepMind", "department": "" }, { "first_name": "Dhruva", "middle_name": "", "last_name": "Tirumala", "name_suffix": "", "institution": "DeepMind", "department": "" }, { "first_name": "Remi", "middle_name": "", "last_name": "Munos", "name_suffix": "", "institution": "DeepMind", "department": "" }, { "first_name": "Charles", "middle_name": "", "last_name": "Blundell", "name_suffix": "", "institution": "DeepMind", "department": "" }, { "first_name": "Dharshan", "middle_name": "", "last_name": "Kumaran", "name_suffix": "", "institution": "DeepMind", "department": "" }, { "first_name": "Matt", "middle_name": "", "last_name": "Botvinick", "name_suffix": "", "institution": "DeepMind", "department": "" } ], "date_submitted": null, "date_accepted": null, "date_published": "2017-01-01T18:00:00Z", "render_galley": null, "galleys": [ { "label": "PDF", "type": "pdf", "path": "https://journalpub.escholarship.org/cognitivesciencesociety/article/27023/galley/16659/download/" } ] }