five

koukandre/xtreme

收藏
Hugging Face2023-11-14 更新2024-03-04 收录
下载链接:
https://hf-mirror.com/datasets/koukandre/xtreme
下载链接
链接失效反馈
官方服务:
资源简介:
--- license: apache-2.0 configs: - config_name: mnli data_files: - split: train path: - "mnli/train-0000.parquet" - "mnli/train-0001.parquet" - "mnli/train-0002.parquet" - "mnli/train-0003.parquet" features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: tydiqa data_files: - split: train path: - "tydiqa/ko/train.parquet" - "tydiqa/sw/train.parquet" - "tydiqa/ru/train.parquet" - "tydiqa/te/train.parquet" - "tydiqa/ar/train.parquet" - "tydiqa/fi/train.parquet" - "tydiqa/bn/train.parquet" - "tydiqa/en/train.parquet" - "tydiqa/id/train.parquet" - split: validation path: - "tydiqa/ko/validation.parquet" - "tydiqa/sw/validation.parquet" - "tydiqa/ru/validation.parquet" - "tydiqa/te/validation.parquet" - "tydiqa/ar/validation.parquet" - "tydiqa/fi/validation.parquet" - "tydiqa/bn/validation.parquet" - "tydiqa/en/validation.parquet" - "tydiqa/id/validation.parquet" - config_name: tydiqa.ko data_files: - split: train path: "tydiqa/ko/train.parquet" - split: validation path: "tydiqa/ko/validation.parquet" - config_name: tydiqa.sw data_files: - split: train path: "tydiqa/sw/train.parquet" - split: validation path: "tydiqa/sw/validation.parquet" - config_name: tydiqa.ru data_files: - split: train path: "tydiqa/ru/train.parquet" - split: validation path: "tydiqa/ru/validation.parquet" - config_name: tydiqa.te data_files: - split: train path: "tydiqa/te/train.parquet" - split: validation path: "tydiqa/te/validation.parquet" - config_name: tydiqa.ar data_files: - split: train path: "tydiqa/ar/train.parquet" - split: validation path: "tydiqa/ar/validation.parquet" - config_name: tydiqa.fi data_files: - split: train path: "tydiqa/fi/train.parquet" - split: validation path: "tydiqa/fi/validation.parquet" - config_name: tydiqa.bn data_files: - split: train path: "tydiqa/bn/train.parquet" - split: validation path: "tydiqa/bn/validation.parquet" - config_name: tydiqa.en data_files: - split: train path: "tydiqa/en/train.parquet" - split: validation path: "tydiqa/en/validation.parquet" - config_name: tydiqa.id data_files: - split: train path: "tydiqa/id/train.parquet" - split: validation path: "tydiqa/id/validation.parquet" - config_name: xnli data_files: - split: validation path: - xnli/hi/validation.parquet - xnli/zh/validation.parquet - xnli/sw/validation.parquet - xnli/tr/validation.parquet - xnli/en/validation.parquet - xnli/th/validation.parquet - xnli/ru/validation.parquet - xnli/ar/validation.parquet - xnli/vi/validation.parquet - xnli/bg/validation.parquet - xnli/es/validation.parquet - xnli/el/validation.parquet - xnli/fr/validation.parquet - xnli/ur/validation.parquet - xnli/de/validation.parquet - split: test path: - xnli/hi/test.parquet - xnli/zh/test.parquet - xnli/sw/test.parquet - xnli/tr/test.parquet - xnli/en/test.parquet - xnli/th/test.parquet - xnli/ru/test.parquet - xnli/ar/test.parquet - xnli/vi/test.parquet - xnli/bg/test.parquet - xnli/es/test.parquet - xnli/el/test.parquet - xnli/fr/test.parquet - xnli/ur/test.parquet - xnli/de/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: xnli.hi data_files: - split: validation path: xnli/hi/validation.parquet - split: test path: xnli/hi/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: xnli.zh data_files: - split: validation path: xnli/zh/validation.parquet - split: test path: xnli/zh/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: xnli.sw data_files: - split: validation path: xnli/sw/validation.parquet - split: test path: xnli/sw/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: xnli.tr data_files: - split: validation path: xnli/tr/validation.parquet - split: test path: xnli/tr/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: xnli.en data_files: - split: validation path: xnli/en/validation.parquet - split: test path: xnli/en/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: xnli.th data_files: - split: validation path: xnli/th/validation.parquet - split: test path: xnli/th/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: xnli.ru data_files: - split: validation path: xnli/ru/validation.parquet - split: test path: xnli/ru/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: xnli.ar data_files: - split: validation path: xnli/ar/validation.parquet - split: test path: xnli/ar/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: xnli.vi data_files: - split: validation path: xnli/vi/validation.parquet - split: test path: xnli/vi/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: xnli.bg data_files: - split: validation path: xnli/bg/validation.parquet - split: test path: xnli/bg/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: xnli.es data_files: - split: validation path: xnli/es/validation.parquet - split: test path: xnli/es/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: xnli.el data_files: - split: validation path: xnli/el/validation.parquet - split: test path: xnli/el/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: xnli.fr data_files: - split: validation path: xnli/fr/validation.parquet - split: test path: xnli/fr/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: xnli.ur data_files: - split: validation path: xnli/ur/validation.parquet - split: test path: xnli/ur/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: xnli.de data_files: - split: validation path: xnli/de/validation.parquet - split: test path: xnli/de/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - entailment - neutral - contradiction _type: ClassLabel idx: dtype: int32 _type: Value - config_name: paws-x.de data_files: - split: train path: paws-x/de/train.parquet - split: validation path: paws-x/de/validation.parquet - split: test path: paws-x/de/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - not_paraphrase - paraphrase _type: ClassLabel - config_name: paws-x.en data_files: - split: train path: paws-x/en/train.parquet - split: validation path: paws-x/en/validation.parquet - split: test path: paws-x/en/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - not_paraphrase - paraphrase _type: ClassLabel - config_name: paws-x.es data_files: - split: train path: paws-x/es/train.parquet - split: validation path: paws-x/es/validation.parquet - split: test path: paws-x/es/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - not_paraphrase - paraphrase _type: ClassLabel - config_name: paws-x.fr data_files: - split: train path: paws-x/fr/train.parquet - split: validation path: paws-x/fr/validation.parquet - split: test path: paws-x/fr/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - not_paraphrase - paraphrase _type: ClassLabel - config_name: paws-x.ja data_files: - split: train path: paws-x/ja/train.parquet - split: validation path: paws-x/ja/validation.parquet - split: test path: paws-x/ja/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - not_paraphrase - paraphrase _type: ClassLabel - config_name: paws-x.ko data_files: - split: train path: paws-x/ko/train.parquet - split: validation path: paws-x/ko/validation.parquet - split: test path: paws-x/ko/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - not_paraphrase - paraphrase _type: ClassLabel - config_name: paws-x.zh data_files: - split: train path: paws-x/zh/train.parquet - split: validation path: paws-x/zh/validation.parquet - split: test path: paws-x/zh/test.parquet features: sentence1: dtype: string _type: Value sentence2: dtype: string _type: Value label: names: - not_paraphrase - paraphrase _type: ClassLabel ---
提供机构:
koukandre
原始信息汇总

数据集概述

数据集配置

MNLI

  • 数据文件:
    • 训练集:
      • "mnli/train-0000.parquet"
      • "mnli/train-0001.parquet"
      • "mnli/train-0002.parquet"
      • "mnli/train-0003.parquet"
  • 特征:
    • sentence1: 字符串类型
    • sentence2: 字符串类型
    • label: 分类标签,包括 entailment, neutral, contradiction
    • idx: 整数类型

TyDiQA

  • 数据文件:
    • 训练集:
      • "tydiqa/ko/train.parquet"
      • "tydiqa/sw/train.parquet"
      • "tydiqa/ru/train.parquet"
      • "tydiqa/te/train.parquet"
      • "tydiqa/ar/train.parquet"
      • "tydiqa/fi/train.parquet"
      • "tydiqa/bn/train.parquet"
      • "tydiqa/en/train.parquet"
      • "tydiqa/id/train.parquet"
    • 验证集:
      • "tydiqa/ko/validation.parquet"
      • "tydiqa/sw/validation.parquet"
      • "tydiqa/ru/validation.parquet"
      • "tydiqa/te/validation.parquet"
      • "tydiqa/ar/validation.parquet"
      • "tydiqa/fi/validation.parquet"
      • "tydiqa/bn/validation.parquet"
      • "tydiqa/en/validation.parquet"
      • "tydiqa/id/validation.parquet"

TyDiQA 语言特定配置

  • 韩语 (ko):
    • 训练集: "tydiqa/ko/train.parquet"
    • 验证集: "tydiqa/ko/validation.parquet"
  • 斯瓦希里语 (sw):
    • 训练集: "tydiqa/sw/train.parquet"
    • 验证集: "tydiqa/sw/validation.parquet"
  • 俄语 (ru):
    • 训练集: "tydiqa/ru/train.parquet"
    • 验证集: "tydiqa/ru/validation.parquet"
  • 泰卢固语 (te):
    • 训练集: "tydiqa/te/train.parquet"
    • 验证集: "tydiqa/te/validation.parquet"
  • 阿拉伯语 (ar):
    • 训练集: "tydiqa/ar/train.parquet"
    • 验证集: "tydiqa/ar/validation.parquet"
  • 芬兰语 (fi):
    • 训练集: "tydiqa/fi/train.parquet"
    • 验证集: "tydiqa/fi/validation.parquet"
  • 孟加拉语 (bn):
    • 训练集: "tydiqa/bn/train.parquet"
    • 验证集: "tydiqa/bn/validation.parquet"
  • 英语 (en):
    • 训练集: "tydiqa/en/train.parquet"
    • 验证集: "tydiqa/en/validation.parquet"
  • 印度尼西亚语 (id):
    • 训练集: "tydiqa/id/train.parquet"
    • 验证集: "tydiqa/id/validation.parquet"

XNLI

  • 数据文件:
    • 验证集:
      • "xnli/hi/validation.parquet"
      • "xnli/zh/validation.parquet"
      • "xnli/sw/validation.parquet"
      • "xnli/tr/validation.parquet"
      • "xnli/en/validation.parquet"
      • "xnli/th/validation.parquet"
      • "xnli/ru/validation.parquet"
      • "xnli/ar/validation.parquet"
      • "xnli/vi/validation.parquet"
      • "xnli/bg/validation.parquet"
      • "xnli/es/validation.parquet"
      • "xnli/el/validation.parquet"
      • "xnli/fr/validation.parquet"
      • "xnli/ur/validation.parquet"
      • "xnli/de/validation.parquet"
    • 测试集:
      • "xnli/hi/test.parquet"
      • "xnli/zh/test.parquet"
      • "xnli/sw/test.parquet"
      • "xnli/tr/test.parquet"
      • "xnli/en/test.parquet"
      • "xnli/th/test.parquet"
      • "xnli/ru/test.parquet"
      • "xnli/ar/test.parquet"
      • "xnli/vi/test.parquet"
      • "xnli/bg/test.parquet"
      • "xnli/es/test.parquet"
      • "xnli/el/test.parquet"
      • "xnli/fr/test.parquet"
      • "xnli/ur/test.parquet"
      • "xnli/de/test.parquet"
  • 特征:
    • sentence1: 字符串类型
    • sentence2: 字符串类型
    • label: 分类标签,包括 entailment, neutral, contradiction
    • idx: 整数类型

XNLI 语言特定配置

  • 印地语 (hi):
    • 验证集: "xnli/hi/validation.parquet"
    • 测试集: "xnli/hi/test.parquet"
  • 中文 (zh):
    • 验证集: "xnli/zh/validation.parquet"
    • 测试集: "xnli/zh/test.parquet"
  • 斯瓦希里语 (sw):
    • 验证集: "xnli/sw/validation.parquet"
    • 测试集: "xnli/sw/test.parquet"
  • 土耳其语 (tr):
    • 验证集: "xnli/tr/validation.parquet"
    • 测试集: "xnli/tr/test.parquet"
  • 英语 (en):
    • 验证集: "xnli/en/validation.parquet"
    • 测试集: "xnli/en/test.parquet"
  • 泰语 (th):
    • 验证集: "xnli/th/validation.parquet"
    • 测试集: "xnli/th/test.parquet"
  • 俄语 (ru):
    • 验证集: "xnli/ru/validation.parquet"
    • 测试集: "xnli/ru/test.parquet"
  • 阿拉伯语 (ar):
    • 验证集: "xnli/ar/validation.parquet"
    • 测试集: "xnli/ar/test.parquet"
  • 越南语 (vi):
    • 验证集: "xnli/vi/validation.parquet"
    • 测试集: "xnli/vi/test.parquet"
  • 保加利亚语 (bg):
    • 验证集: "xnli/bg/validation.parquet"
    • 测试集: "xnli/bg/test.parquet"
  • 西班牙语 (es):
    • 验证集: "xnli/es/validation.parquet"
    • 测试集: "xnli/es/test.parquet"
  • 希腊语 (el):
    • 验证集: "xnli/el/validation.parquet"
    • 测试集: "xnli/el/test.parquet"
  • 法语 (fr):
    • 验证集: "xnli/fr/validation.parquet"
    • 测试集: "xnli/fr/test.parquet"
  • 乌尔都语 (ur):
    • 验证集: "xnli/ur/validation.parquet"
    • 测试集: "xnli/ur/test.parquet"
  • 德语 (de):
    • 验证集: "xnli/de/validation.parquet"
    • 测试集: "xnli/de/test.parquet"

PAWS-X

  • 德语 (de):
    • 训练集: "paws-x/de/train.parquet"
    • 验证集: "paws-x/de/validation.parquet"
    • 测试集: "paws-x/de/test.parquet"
  • 英语 (en):
    • 训练集: "paws-x/en/train.parquet"
    • 验证集: "paws-x/en/validation.parquet"
    • 测试集: "paws-x/en/test.parquet"
  • 西班牙语 (es):
    • 训练集: "paws-x/es/train.parquet"
    • 验证集: "paws-x/es/validation.parquet"
    • 测试集: "paws-x/es/test.parquet"
  • 法语 (fr):
    • 训练集: "paws-x/fr/train.parquet"
    • 验证集: "paws-x/fr/validation.parquet"
    • 测试集: "paws-x/fr/test.parquet"
  • 日语 (ja):
    • 训练集: "paws-x/ja/train.parquet"
    • 验证集: "paws-x/ja/validation.parquet"
    • 测试集: "paws-x/ja/test.parquet"
  • 韩语 (ko):
    • 训练集: "paws-x/ko/train.parquet"
    • 验证集: "paws-x/ko/validation.parquet"
    • 测试集: "paws-x/ko/test.parquet"
  • 中文 (zh):
    • 训练集: "paws-x/zh/train.parquet"
    • 验证集: "paws-x/zh/validation.parquet"
    • 测试集: "paws-x/zh/test.parquet"
  • 特征:
    • sentence1: 字符串类型
    • sentence2: 字符串类型
    • label: 分类标签,包括 not_paraphrase, paraphrase
5,000+
优质数据集
54 个
任务类型
进入经典数据集
二维码
社区交流群

面向社区/商业的数据集话题

二维码
科研交流群

面向高校/科研机构的开源数据集话题

数据驱动未来

携手共赢发展

商业合作