five

THUIR/MemoryBench-Full

收藏
Hugging Face2025-12-08 更新2025-12-20 收录
下载链接:
https://hf-mirror.com/datasets/THUIR/MemoryBench-Full
下载链接
链接失效反馈
官方服务:
资源简介:
--- configs: - config_name: Locomo-0 data_files: - split: train path: "dataset/Locomo-0/train/data-00000-of-00001.arrow" - split: test path: "dataset/Locomo-0/test/data-00000-of-00001.arrow" - config_name: Locomo-1 data_files: - split: train path: "dataset/Locomo-1/train/data-00000-of-00001.arrow" - split: test path: "dataset/Locomo-1/test/data-00000-of-00001.arrow" - config_name: Locomo-2 data_files: - split: train path: "dataset/Locomo-2/train/data-00000-of-00001.arrow" - split: test path: "dataset/Locomo-2/test/data-00000-of-00001.arrow" - config_name: Locomo-3 data_files: - split: train path: "dataset/Locomo-3/train/data-00000-of-00001.arrow" - split: test path: "dataset/Locomo-3/test/data-00000-of-00001.arrow" - config_name: Locomo-4 data_files: - split: train path: "dataset/Locomo-4/train/data-00000-of-00001.arrow" - split: test path: "dataset/Locomo-4/test/data-00000-of-00001.arrow" - config_name: Locomo-5 data_files: - split: train path: "dataset/Locomo-5/train/data-00000-of-00001.arrow" - split: test path: "dataset/Locomo-5/test/data-00000-of-00001.arrow" - config_name: Locomo-6 data_files: - split: train path: "dataset/Locomo-6/train/data-00000-of-00001.arrow" - split: test path: "dataset/Locomo-6/test/data-00000-of-00001.arrow" - config_name: Locomo-7 data_files: - split: train path: "dataset/Locomo-7/train/data-00000-of-00001.arrow" - split: test path: "dataset/Locomo-7/test/data-00000-of-00001.arrow" - config_name: Locomo-8 data_files: - split: train path: "dataset/Locomo-8/train/data-00000-of-00001.arrow" - split: test path: "dataset/Locomo-8/test/data-00000-of-00001.arrow" - config_name: Locomo-9 data_files: - split: train path: "dataset/Locomo-9/train/data-00000-of-00001.arrow" - split: test path: "dataset/Locomo-9/test/data-00000-of-00001.arrow" - config_name: DialSim-friends data_files: - split: train path: "dataset/DialSim-friends/train/data-00000-of-00001.arrow" - split: test path: "dataset/DialSim-friends/test/data-00000-of-00001.arrow" - config_name: DialSim-bigbang data_files: - split: train path: "dataset/DialSim-bigbang/train/data-00000-of-00001.arrow" - split: test path: "dataset/DialSim-bigbang/test/data-00000-of-00001.arrow" - config_name: DialSim-theoffice data_files: - split: train path: "dataset/DialSim-theoffice/train/data-00000-of-00001.arrow" - split: test path: "dataset/DialSim-theoffice/test/data-00000-of-00001.arrow" - config_name: NFCats data_files: - split: train path: "dataset/NFCats/train/data-00000-of-00001.arrow" - split: test path: "dataset/NFCats/test/data-00000-of-00001.arrow" - config_name: HelloBench-Creative&Design data_files: - split: train path: "dataset/HelloBench-Creative&Design/train/data-00000-of-00001.arrow" - split: test path: "dataset/HelloBench-Creative&Design/test/data-00000-of-00001.arrow" - config_name: WritingPrompts data_files: - split: train path: "dataset/WritingPrompts/train/data-00000-of-00001.arrow" - split: test path: "dataset/WritingPrompts/test/data-00000-of-00001.arrow" - config_name: WritingBench-Creative&Design data_files: - split: train path: "dataset/WritingBench-Creative&Design/train/data-00000-of-00001.arrow" - split: test path: "dataset/WritingBench-Creative&Design/test/data-00000-of-00001.arrow" - config_name: JuDGE data_files: - split: train path: "dataset/JuDGE/train/data-00000-of-00001.arrow" - split: test path: "dataset/JuDGE/test/data-00000-of-00001.arrow" - config_name: LexEval-Summarization data_files: - split: train path: "dataset/LexEval-Summarization/train/data-00000-of-00001.arrow" - split: test path: "dataset/LexEval-Summarization/test/data-00000-of-00001.arrow" - config_name: LexEval-Judge data_files: - split: train path: "dataset/LexEval-Judge/train/data-00000-of-00001.arrow" - split: test path: "dataset/LexEval-Judge/test/data-00000-of-00001.arrow" - config_name: LexEval-QA data_files: - split: train path: "dataset/LexEval-QA/train/data-00000-of-00001.arrow" - split: test path: "dataset/LexEval-QA/test/data-00000-of-00001.arrow" - config_name: WritingBench-Politics&Law data_files: - split: train path: "dataset/WritingBench-Politics&Law/train/data-00000-of-00001.arrow" - split: test path: "dataset/WritingBench-Politics&Law/test/data-00000-of-00001.arrow" - config_name: HelloBench-Academic&Knowledge-QA data_files: - split: train path: "dataset/HelloBench-Academic&Knowledge-QA/train/data-00000-of-00001.arrow" - split: test path: "dataset/HelloBench-Academic&Knowledge-QA/test/data-00000-of-00001.arrow" - config_name: HelloBench-Academic&Knowledge-Writing data_files: - split: train path: "dataset/HelloBench-Academic&Knowledge-Writing/train/data-00000-of-00001.arrow" - split: test path: "dataset/HelloBench-Academic&Knowledge-Writing/test/data-00000-of-00001.arrow" - config_name: IdeaBench data_files: - split: train path: "dataset/IdeaBench/train/data-00000-of-00001.arrow" - split: test path: "dataset/IdeaBench/test/data-00000-of-00001.arrow" - config_name: JRE-L data_files: - split: train path: "dataset/JRE-L/train/data-00000-of-00001.arrow" - split: test path: "dataset/JRE-L/test/data-00000-of-00001.arrow" - config_name: LimitGen-Syn data_files: - split: train path: "dataset/LimitGen-Syn/train/data-00000-of-00001.arrow" - split: test path: "dataset/LimitGen-Syn/test/data-00000-of-00001.arrow" - config_name: WritingBench-Academic&Engineering data_files: - split: train path: "dataset/WritingBench-Academic&Engineering/train/data-00000-of-00001.arrow" - split: test path: "dataset/WritingBench-Academic&Engineering/test/data-00000-of-00001.arrow" license: mit language: - en - zh --- # MemoryBench MemoryBench aims to provide a standardized and extensible benchmark for evaluating memory and continual learning in LLM systems — encouraging future work toward more adaptive, feedback-driven, and efficient LLM systems. **Paper Link**: https://arxiv.org/abs/2510.17281 **Github**: https://github.com/LittleDinoC/MemoryBench/ This is an extended version of MemoryBench. The training and test sets of [THUIR/MemoryBench](https://huggingface.co/datasets/THUIR/MemoryBench)(the balanced version on which we conducted experiments in the paper) are, respectively, subsets of the training and test sets of this dataset and maintain the same proportion. However, please note that the sizes of different datasets vary significantly, which may require special handling when calculating averages to prevent any single dataset from disproportionately influencing the results. For guidance on how to use this dataset, please refer to https://huggingface.co/datasets/THUIR/MemoryBench. ## Citation If you use MemoryBench in your research, please cite our paper: ``` @misc{ai2025memorybenchbenchmarkmemorycontinual, title={MemoryBench: A Benchmark for Memory and Continual Learning in LLM Systems}, author={Qingyao Ai and Yichen Tang and Changyue Wang and Jianming Long and Weihang Su and Yiqun Liu}, year={2025}, eprint={2510.17281}, archivePrefix={arXiv}, primaryClass={cs.LG}, url={https://arxiv.org/abs/2510.17281}, } ```
提供机构:
THUIR
5,000+
优质数据集
54 个
任务类型
进入经典数据集
二维码
社区交流群

面向社区/商业的数据集话题

二维码
科研交流群

面向高校/科研机构的开源数据集话题

数据驱动未来

携手共赢发展

商业合作