taisazero/socratic-debugging-benchmark
收藏Socratic Debugging Benchmark 数据集概述
基本信息
- 许可证:MIT
- 任务类别:
- 文本到文本生成
- 文本生成
- 语言:英语
- 标签:代码
- 美观名称:Socratic Debugging Benchmark
数据集描述
该数据集伴随以下论文发布:
- "Socratic Questioning of Novice Debuggers: A Benchmark Dataset and Preliminary Evaluations",收录于 ACL 2023 的第18届NLP在教育应用创新研讨会(BEA 2023)。
- "Can Language Models Employ the Socratic Method? Experiments with Code Debugging",收录于 SIGCSE24 会议。
引用信息
如果您使用此数据集,请引用以下论文:
@inproceedings{al-hossami-etal-2023-socratic, title = "Socratic Questioning of Novice Debuggers: A Benchmark Dataset and Preliminary Evaluations", author = "Al-Hossami, Erfan and Bunescu, Razvan and Teehan, Ryan and Powell, Laurel and Mahajan, Khyati and Dorodchi, Mohsen", booktitle = "Proceedings of the 18th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2023)", month = jul, year = "2023", address = "Toronto, Canada", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2023.bea-1.57", pages = "709--726", abstract = "Socratic questioning is a teaching strategy where the student is guided towards solving a problem on their own, instead of being given the solution directly. In this paper, we introduce a dataset of Socratic conversations where an instructor helps a novice programmer fix buggy solutions to simple computational problems. The dataset is then used for benchmarking the Socratic debugging abilities of GPT-based language models. While GPT-4 is observed to perform much better than GPT-3.5, its precision, and recall still fall short of human expert abilities, motivating further work in this area.", }
@inproceedings{al-hossami-etal-2024-can, author = {Al-Hossami, Erfan and Bunescu, Razvan and Smith, Justin and Teehan, Ryan}, title = {Can Language Models Employ the Socratic Method? Experiments with Code Debugging}, year = {2024}, isbn = {9798400704239}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3626252.3630799}, doi = {10.1145/3626252.3630799}, abstract = {When employing the Socratic method of teaching, instructors guide students toward solving a problem on their own rather than providing the solution directly. While this strategy can substantially improve learning outcomes, it is usually time-consuming and cognitively demanding. Automated Socratic conversational agents can augment human instruction and provide the necessary scale, however their development is hampered by the lack of suitable data for training and evaluation. In this paper, we introduce a manually created dataset of multi-turn Socratic advice that is aimed at helping a novice programmer fix buggy solutions to simple computational problems. The dataset is then used for benchmarking the Socratic debugging abilities of a number of language models, ranging from fine-tuning the instruction-based text-to-text transformer Flan-T5 to zero-shot and chain of thought prompting of the much larger GPT-4. The code and datasets are made freely available for research at the link below.}, booktitle = {Proceedings of the 55th ACM Technical Symposium on Computer Science Education V. 1}, pages = {53–59}, numpages = {7}, keywords = {benchmark dataset, debugging, language models, socratic dialogue}, location = {<conf-loc>, <city>Portland</city>, <state>OR</state>, <country>USA</country>, </conf-loc>}, series = {SIGCSE 2024} }



