coref-data/winogrande_coref
收藏Wingrande Recast as Coreference Resolution
数据集概述
WinoGrande训练集和开发集被重新构造成指代消解任务,如Investigating Failures to Generalize for Coreference Resolution Models所述。使用Stanza解析Conllu列。
数据字段
python { "id": str, # 示例ID "text": str, # 未分词的示例文本 "sentences": [ { "id": int, # 句子索引 "text": str, # 未分词的句子文本 "speaker": None, # 说话者 "tokens": [ { # 键是conllu列:id, text, lemma, upos, xpos, feats, head, deprel, deps, misc }, ... ] }, ... ], "coref_chains": List[List[List[int]]], # 集群列表,每个集群是提及列表,每个提及是一个跨度,表示为[sent, start, end],包括端点 "genre": "crowdsourced", "meta_data": { "comment": "syntax_annotations=stanza|tokenizer=stanza|detokenizer=nltk", }, }
引用信息
@misc{porada2023investigating, title={Investigating Failures to Generalize for Coreference Resolution Models}, author={Ian Porada and Alexandra Olteanu and Kaheer Suleman and Adam Trischler and Jackie Chi Kit Cheung}, year={2023}, eprint={2303.09092}, archivePrefix={arXiv}, primaryClass={cs.CL} }
@InProceedings{ai2:winogrande, title = {WinoGrande: An Adversarial Winograd Schema Challenge at Scale}, authors={Keisuke, Sakaguchi and Ronan, Le Bras and Chandra, Bhagavatula and Yejin, Choi }, year={2019} }




