koukandre/xtreme
收藏Hugging Face2023-11-14 更新2024-03-04 收录
下载链接:
https://hf-mirror.com/datasets/koukandre/xtreme
下载链接
链接失效反馈官方服务:
资源简介:
---
license: apache-2.0
configs:
- config_name: mnli
data_files:
- split: train
path:
- "mnli/train-0000.parquet"
- "mnli/train-0001.parquet"
- "mnli/train-0002.parquet"
- "mnli/train-0003.parquet"
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: tydiqa
data_files:
- split: train
path:
- "tydiqa/ko/train.parquet"
- "tydiqa/sw/train.parquet"
- "tydiqa/ru/train.parquet"
- "tydiqa/te/train.parquet"
- "tydiqa/ar/train.parquet"
- "tydiqa/fi/train.parquet"
- "tydiqa/bn/train.parquet"
- "tydiqa/en/train.parquet"
- "tydiqa/id/train.parquet"
- split: validation
path:
- "tydiqa/ko/validation.parquet"
- "tydiqa/sw/validation.parquet"
- "tydiqa/ru/validation.parquet"
- "tydiqa/te/validation.parquet"
- "tydiqa/ar/validation.parquet"
- "tydiqa/fi/validation.parquet"
- "tydiqa/bn/validation.parquet"
- "tydiqa/en/validation.parquet"
- "tydiqa/id/validation.parquet"
- config_name: tydiqa.ko
data_files:
- split: train
path: "tydiqa/ko/train.parquet"
- split: validation
path: "tydiqa/ko/validation.parquet"
- config_name: tydiqa.sw
data_files:
- split: train
path: "tydiqa/sw/train.parquet"
- split: validation
path: "tydiqa/sw/validation.parquet"
- config_name: tydiqa.ru
data_files:
- split: train
path: "tydiqa/ru/train.parquet"
- split: validation
path: "tydiqa/ru/validation.parquet"
- config_name: tydiqa.te
data_files:
- split: train
path: "tydiqa/te/train.parquet"
- split: validation
path: "tydiqa/te/validation.parquet"
- config_name: tydiqa.ar
data_files:
- split: train
path: "tydiqa/ar/train.parquet"
- split: validation
path: "tydiqa/ar/validation.parquet"
- config_name: tydiqa.fi
data_files:
- split: train
path: "tydiqa/fi/train.parquet"
- split: validation
path: "tydiqa/fi/validation.parquet"
- config_name: tydiqa.bn
data_files:
- split: train
path: "tydiqa/bn/train.parquet"
- split: validation
path: "tydiqa/bn/validation.parquet"
- config_name: tydiqa.en
data_files:
- split: train
path: "tydiqa/en/train.parquet"
- split: validation
path: "tydiqa/en/validation.parquet"
- config_name: tydiqa.id
data_files:
- split: train
path: "tydiqa/id/train.parquet"
- split: validation
path: "tydiqa/id/validation.parquet"
- config_name: xnli
data_files:
- split: validation
path:
- xnli/hi/validation.parquet
- xnli/zh/validation.parquet
- xnli/sw/validation.parquet
- xnli/tr/validation.parquet
- xnli/en/validation.parquet
- xnli/th/validation.parquet
- xnli/ru/validation.parquet
- xnli/ar/validation.parquet
- xnli/vi/validation.parquet
- xnli/bg/validation.parquet
- xnli/es/validation.parquet
- xnli/el/validation.parquet
- xnli/fr/validation.parquet
- xnli/ur/validation.parquet
- xnli/de/validation.parquet
- split: test
path:
- xnli/hi/test.parquet
- xnli/zh/test.parquet
- xnli/sw/test.parquet
- xnli/tr/test.parquet
- xnli/en/test.parquet
- xnli/th/test.parquet
- xnli/ru/test.parquet
- xnli/ar/test.parquet
- xnli/vi/test.parquet
- xnli/bg/test.parquet
- xnli/es/test.parquet
- xnli/el/test.parquet
- xnli/fr/test.parquet
- xnli/ur/test.parquet
- xnli/de/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: xnli.hi
data_files:
- split: validation
path: xnli/hi/validation.parquet
- split: test
path: xnli/hi/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: xnli.zh
data_files:
- split: validation
path: xnli/zh/validation.parquet
- split: test
path: xnli/zh/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: xnli.sw
data_files:
- split: validation
path: xnli/sw/validation.parquet
- split: test
path: xnli/sw/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: xnli.tr
data_files:
- split: validation
path: xnli/tr/validation.parquet
- split: test
path: xnli/tr/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: xnli.en
data_files:
- split: validation
path: xnli/en/validation.parquet
- split: test
path: xnli/en/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: xnli.th
data_files:
- split: validation
path: xnli/th/validation.parquet
- split: test
path: xnli/th/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: xnli.ru
data_files:
- split: validation
path: xnli/ru/validation.parquet
- split: test
path: xnli/ru/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: xnli.ar
data_files:
- split: validation
path: xnli/ar/validation.parquet
- split: test
path: xnli/ar/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: xnli.vi
data_files:
- split: validation
path: xnli/vi/validation.parquet
- split: test
path: xnli/vi/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: xnli.bg
data_files:
- split: validation
path: xnli/bg/validation.parquet
- split: test
path: xnli/bg/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: xnli.es
data_files:
- split: validation
path: xnli/es/validation.parquet
- split: test
path: xnli/es/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: xnli.el
data_files:
- split: validation
path: xnli/el/validation.parquet
- split: test
path: xnli/el/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: xnli.fr
data_files:
- split: validation
path: xnli/fr/validation.parquet
- split: test
path: xnli/fr/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: xnli.ur
data_files:
- split: validation
path: xnli/ur/validation.parquet
- split: test
path: xnli/ur/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: xnli.de
data_files:
- split: validation
path: xnli/de/validation.parquet
- split: test
path: xnli/de/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- entailment
- neutral
- contradiction
_type: ClassLabel
idx:
dtype: int32
_type: Value
- config_name: paws-x.de
data_files:
- split: train
path: paws-x/de/train.parquet
- split: validation
path: paws-x/de/validation.parquet
- split: test
path: paws-x/de/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- not_paraphrase
- paraphrase
_type: ClassLabel
- config_name: paws-x.en
data_files:
- split: train
path: paws-x/en/train.parquet
- split: validation
path: paws-x/en/validation.parquet
- split: test
path: paws-x/en/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- not_paraphrase
- paraphrase
_type: ClassLabel
- config_name: paws-x.es
data_files:
- split: train
path: paws-x/es/train.parquet
- split: validation
path: paws-x/es/validation.parquet
- split: test
path: paws-x/es/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- not_paraphrase
- paraphrase
_type: ClassLabel
- config_name: paws-x.fr
data_files:
- split: train
path: paws-x/fr/train.parquet
- split: validation
path: paws-x/fr/validation.parquet
- split: test
path: paws-x/fr/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- not_paraphrase
- paraphrase
_type: ClassLabel
- config_name: paws-x.ja
data_files:
- split: train
path: paws-x/ja/train.parquet
- split: validation
path: paws-x/ja/validation.parquet
- split: test
path: paws-x/ja/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- not_paraphrase
- paraphrase
_type: ClassLabel
- config_name: paws-x.ko
data_files:
- split: train
path: paws-x/ko/train.parquet
- split: validation
path: paws-x/ko/validation.parquet
- split: test
path: paws-x/ko/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- not_paraphrase
- paraphrase
_type: ClassLabel
- config_name: paws-x.zh
data_files:
- split: train
path: paws-x/zh/train.parquet
- split: validation
path: paws-x/zh/validation.parquet
- split: test
path: paws-x/zh/test.parquet
features:
sentence1:
dtype: string
_type: Value
sentence2:
dtype: string
_type: Value
label:
names:
- not_paraphrase
- paraphrase
_type: ClassLabel
---
提供机构:
koukandre
原始信息汇总
数据集概述
数据集配置
MNLI
- 数据文件:
- 训练集:
- "mnli/train-0000.parquet"
- "mnli/train-0001.parquet"
- "mnli/train-0002.parquet"
- "mnli/train-0003.parquet"
- 训练集:
- 特征:
sentence1: 字符串类型sentence2: 字符串类型label: 分类标签,包括entailment,neutral,contradictionidx: 整数类型
TyDiQA
- 数据文件:
- 训练集:
- "tydiqa/ko/train.parquet"
- "tydiqa/sw/train.parquet"
- "tydiqa/ru/train.parquet"
- "tydiqa/te/train.parquet"
- "tydiqa/ar/train.parquet"
- "tydiqa/fi/train.parquet"
- "tydiqa/bn/train.parquet"
- "tydiqa/en/train.parquet"
- "tydiqa/id/train.parquet"
- 验证集:
- "tydiqa/ko/validation.parquet"
- "tydiqa/sw/validation.parquet"
- "tydiqa/ru/validation.parquet"
- "tydiqa/te/validation.parquet"
- "tydiqa/ar/validation.parquet"
- "tydiqa/fi/validation.parquet"
- "tydiqa/bn/validation.parquet"
- "tydiqa/en/validation.parquet"
- "tydiqa/id/validation.parquet"
- 训练集:
TyDiQA 语言特定配置
- 韩语 (ko):
- 训练集: "tydiqa/ko/train.parquet"
- 验证集: "tydiqa/ko/validation.parquet"
- 斯瓦希里语 (sw):
- 训练集: "tydiqa/sw/train.parquet"
- 验证集: "tydiqa/sw/validation.parquet"
- 俄语 (ru):
- 训练集: "tydiqa/ru/train.parquet"
- 验证集: "tydiqa/ru/validation.parquet"
- 泰卢固语 (te):
- 训练集: "tydiqa/te/train.parquet"
- 验证集: "tydiqa/te/validation.parquet"
- 阿拉伯语 (ar):
- 训练集: "tydiqa/ar/train.parquet"
- 验证集: "tydiqa/ar/validation.parquet"
- 芬兰语 (fi):
- 训练集: "tydiqa/fi/train.parquet"
- 验证集: "tydiqa/fi/validation.parquet"
- 孟加拉语 (bn):
- 训练集: "tydiqa/bn/train.parquet"
- 验证集: "tydiqa/bn/validation.parquet"
- 英语 (en):
- 训练集: "tydiqa/en/train.parquet"
- 验证集: "tydiqa/en/validation.parquet"
- 印度尼西亚语 (id):
- 训练集: "tydiqa/id/train.parquet"
- 验证集: "tydiqa/id/validation.parquet"
XNLI
- 数据文件:
- 验证集:
- "xnli/hi/validation.parquet"
- "xnli/zh/validation.parquet"
- "xnli/sw/validation.parquet"
- "xnli/tr/validation.parquet"
- "xnli/en/validation.parquet"
- "xnli/th/validation.parquet"
- "xnli/ru/validation.parquet"
- "xnli/ar/validation.parquet"
- "xnli/vi/validation.parquet"
- "xnli/bg/validation.parquet"
- "xnli/es/validation.parquet"
- "xnli/el/validation.parquet"
- "xnli/fr/validation.parquet"
- "xnli/ur/validation.parquet"
- "xnli/de/validation.parquet"
- 测试集:
- "xnli/hi/test.parquet"
- "xnli/zh/test.parquet"
- "xnli/sw/test.parquet"
- "xnli/tr/test.parquet"
- "xnli/en/test.parquet"
- "xnli/th/test.parquet"
- "xnli/ru/test.parquet"
- "xnli/ar/test.parquet"
- "xnli/vi/test.parquet"
- "xnli/bg/test.parquet"
- "xnli/es/test.parquet"
- "xnli/el/test.parquet"
- "xnli/fr/test.parquet"
- "xnli/ur/test.parquet"
- "xnli/de/test.parquet"
- 验证集:
- 特征:
sentence1: 字符串类型sentence2: 字符串类型label: 分类标签,包括entailment,neutral,contradictionidx: 整数类型
XNLI 语言特定配置
- 印地语 (hi):
- 验证集: "xnli/hi/validation.parquet"
- 测试集: "xnli/hi/test.parquet"
- 中文 (zh):
- 验证集: "xnli/zh/validation.parquet"
- 测试集: "xnli/zh/test.parquet"
- 斯瓦希里语 (sw):
- 验证集: "xnli/sw/validation.parquet"
- 测试集: "xnli/sw/test.parquet"
- 土耳其语 (tr):
- 验证集: "xnli/tr/validation.parquet"
- 测试集: "xnli/tr/test.parquet"
- 英语 (en):
- 验证集: "xnli/en/validation.parquet"
- 测试集: "xnli/en/test.parquet"
- 泰语 (th):
- 验证集: "xnli/th/validation.parquet"
- 测试集: "xnli/th/test.parquet"
- 俄语 (ru):
- 验证集: "xnli/ru/validation.parquet"
- 测试集: "xnli/ru/test.parquet"
- 阿拉伯语 (ar):
- 验证集: "xnli/ar/validation.parquet"
- 测试集: "xnli/ar/test.parquet"
- 越南语 (vi):
- 验证集: "xnli/vi/validation.parquet"
- 测试集: "xnli/vi/test.parquet"
- 保加利亚语 (bg):
- 验证集: "xnli/bg/validation.parquet"
- 测试集: "xnli/bg/test.parquet"
- 西班牙语 (es):
- 验证集: "xnli/es/validation.parquet"
- 测试集: "xnli/es/test.parquet"
- 希腊语 (el):
- 验证集: "xnli/el/validation.parquet"
- 测试集: "xnli/el/test.parquet"
- 法语 (fr):
- 验证集: "xnli/fr/validation.parquet"
- 测试集: "xnli/fr/test.parquet"
- 乌尔都语 (ur):
- 验证集: "xnli/ur/validation.parquet"
- 测试集: "xnli/ur/test.parquet"
- 德语 (de):
- 验证集: "xnli/de/validation.parquet"
- 测试集: "xnli/de/test.parquet"
PAWS-X
- 德语 (de):
- 训练集: "paws-x/de/train.parquet"
- 验证集: "paws-x/de/validation.parquet"
- 测试集: "paws-x/de/test.parquet"
- 英语 (en):
- 训练集: "paws-x/en/train.parquet"
- 验证集: "paws-x/en/validation.parquet"
- 测试集: "paws-x/en/test.parquet"
- 西班牙语 (es):
- 训练集: "paws-x/es/train.parquet"
- 验证集: "paws-x/es/validation.parquet"
- 测试集: "paws-x/es/test.parquet"
- 法语 (fr):
- 训练集: "paws-x/fr/train.parquet"
- 验证集: "paws-x/fr/validation.parquet"
- 测试集: "paws-x/fr/test.parquet"
- 日语 (ja):
- 训练集: "paws-x/ja/train.parquet"
- 验证集: "paws-x/ja/validation.parquet"
- 测试集: "paws-x/ja/test.parquet"
- 韩语 (ko):
- 训练集: "paws-x/ko/train.parquet"
- 验证集: "paws-x/ko/validation.parquet"
- 测试集: "paws-x/ko/test.parquet"
- 中文 (zh):
- 训练集: "paws-x/zh/train.parquet"
- 验证集: "paws-x/zh/validation.parquet"
- 测试集: "paws-x/zh/test.parquet"
- 特征:
sentence1: 字符串类型sentence2: 字符串类型label: 分类标签,包括not_paraphrase,paraphrase



