Article Instance
API Endpoint for journals.
GET /api/articles/49274/?format=api
{ "pk": 49274, "title": "FD-Bench: Fine-Grained Evaluating the Decision-Making Capability of LLM Agents in Dynamic Scenarios", "subtitle": null, "abstract": "Large language models(LLMs) exhibit growing potential as autonomous agents, yet their decision-making capabilities in real-world scenarios remain underexplored, particularly in dynamic scenarios where conditions are constantly changing. Most existing benchmarks mainly focus on static environments, which significantly differ from real-world scenarios. Additionally, existing evaluation frameworks lack fine-grained assessments, providing limited insights during evaluation. To address these, we propose FD-Bench a benchmark for evaluating the decision-making in dynamic scenarios. FD-Bench employs a fire evacuation scenario as a representative dynamic setting and decomposes decision-making into perception, prediction, and action stages, enabling granular evaluation of 8 LLMs and different reasoning frameworks. Our results show that LLMs experience a performance drop of over 50% in dynamic versus static scenarios. Inspired by \"chunking\" principle in Cognitive Load Theory (CLT), our hierarchical prompting strategy demonstrates improved performance in dynamic decision-making tasks. This work provides insights into LLMs' limitations and pathways toward robust real-world deployment.", "language": "eng", "license": { "name": "", "short_name": "", "text": null, "url": "" }, "keywords": [ { "word": "Artificial Intelligence; Computer Science; Decision making; Intelligent agents; Natural Language Processing" } ], "section": "Papers with Oral Presentation", "is_remote": true, "remote_url": "https://escholarship.org/uc/item/4cd1t4p9", "frozenauthors": [ { "first_name": "Zhihao", "middle_name": "", "last_name": "Zhu", "name_suffix": "", "institution": "Shanghai Jiao Tong University", "department": "" }, { "first_name": "Yifan", "middle_name": "", "last_name": "Zheng", "name_suffix": "", "institution": "Shanghai Jiao Tong University", "department": "" }, { "first_name": "Yaohui", "middle_name": "", "last_name": "Jin", "name_suffix": "", "institution": "Shanghai Jiao Tong University", "department": "" } ], "date_submitted": null, "date_accepted": null, "date_published": "2025-01-01T18:00:00Z", "render_galley": null, "galleys": [ { "label": "PDF", "type": "pdf", "path": "https://journalpub.escholarship.org/cognitivesciencesociety/article/49274/galley/37235/download/" } ] }