richardr1126/spider-natsql-context-validation
收藏数据集概述
数据集名称
Spider NatSQL Context Validation
数据集摘要
Spider是一个大规模、复杂且跨领域的语义解析和文本到SQL的数据集,由11名耶鲁大学学生标注。该数据集旨在验证大型语言模型在Spider开发数据集上使用NatSQL进行数据库上下文验证的能力。
数据集特征
- db_id: 数据类型为字符串
- prompt: 数据类型为字符串
- ground_truth: 数据类型为字符串
语言
数据集中的文本语言为英语
许可证
数据集遵循CC BY-SA 4.0许可证
引用信息
@article{yu2018spider, title={Spider: A large-scale human-labeled dataset for complex and cross-domain semantic parsing and text-to-sql task}, author={Yu, Tao and Zhang, Rui and Yang, Kai and Yasunaga, Michihiro and Wang, Dongxu and Li, Zifan and Ma, James and Li, Irene and Yao, Qingning and Roman, Shanelle and others}, journal={arXiv preprint arXiv:1809.08887}, year={2018} }
@inproceedings{gan-etal-2021-natural-sql, title = "Natural {SQL}: Making {SQL} Easier to Infer from Natural Language Specifications", author = "Gan, Yujian and Chen, Xinyun and Xie, Jinxia and Purver, Matthew and Woodward, John R. and Drake, John and Zhang, Qiaofu", booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021", month = nov, year = "2021", address = "Punta Cana, Dominican Republic", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.findings-emnlp.174", doi = "10.18653/v1/2021.findings-emnlp.174", pages = "2030--2042", }



