omai-research/parallel_mmarco_passage
收藏Hugging Face2025-08-09 更新2026-03-29 收录
下载链接:
https://hf-mirror.com/datasets/omai-research/parallel_mmarco_passage
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: en_ar
features:
- name: id
dtype: int64
- name: english
dtype: string
- name: non_english
dtype: string
splits:
- name: train
num_bytes: 7710413622
num_examples: 8841823
download_size: 3742142957
dataset_size: 7710413622
- config_name: en_de
features:
- name: id
dtype: int64
- name: english
dtype: string
- name: non_english
dtype: string
splits:
- name: train
num_bytes: 6534293992
num_examples: 8841823
download_size: 3526338698
dataset_size: 6534293992
- config_name: en_es
features:
- name: id
dtype: int64
- name: english
dtype: string
- name: non_english
dtype: string
splits:
- name: train
num_bytes: 6617665984
num_examples: 8841823
download_size: 3501762295
dataset_size: 6617665984
- config_name: en_fr
features:
- name: id
dtype: int64
- name: english
dtype: string
- name: non_english
dtype: string
splits:
- name: train
num_bytes: 6702325410
num_examples: 8841823
download_size: 3549702986
dataset_size: 6702325410
- config_name: en_hi
features:
- name: id
dtype: int64
- name: english
dtype: string
- name: non_english
dtype: string
splits:
- name: train
num_bytes: 10695426957
num_examples: 8841823
download_size: 4293480551
dataset_size: 10695426957
- config_name: en_id
features:
- name: id
dtype: int64
- name: english
dtype: string
- name: non_english
dtype: string
splits:
- name: train
num_bytes: 6343889797
num_examples: 8841823
download_size: 3317462874
dataset_size: 6343889797
- config_name: en_it
features:
- name: id
dtype: int64
- name: english
dtype: string
- name: non_english
dtype: string
splits:
- name: train
num_bytes: 6490199279
num_examples: 8841823
download_size: 3491361875
dataset_size: 6490199279
- config_name: en_ja
features:
- name: id
dtype: int64
- name: english
dtype: string
- name: non_english
dtype: string
splits:
- name: train
num_bytes: 6971088848
num_examples: 8841823
download_size: 3558700498
dataset_size: 6971088848
- config_name: en_nl
features:
- name: id
dtype: int64
- name: english
dtype: string
- name: non_english
dtype: string
splits:
- name: train
num_bytes: 6408738954
num_examples: 8841823
download_size: 3456595150
dataset_size: 6408738954
- config_name: en_pt
features:
- name: id
dtype: int64
- name: english
dtype: string
- name: non_english
dtype: string
splits:
- name: train
num_bytes: 6477118211
num_examples: 8841823
download_size: 3480021021
dataset_size: 6477118211
- config_name: en_ru
features:
- name: id
dtype: int64
- name: english
dtype: string
- name: non_english
dtype: string
splits:
- name: train
num_bytes: 8815621423
num_examples: 8841823
download_size: 4200560855
dataset_size: 8815621423
- config_name: en_vi
features:
- name: id
dtype: int64
- name: english
dtype: string
- name: non_english
dtype: string
splits:
- name: train
num_bytes: 7186160959
num_examples: 8841823
download_size: 3561675849
dataset_size: 7186160959
- config_name: en_zh
features:
- name: id
dtype: int64
- name: english
dtype: string
- name: non_english
dtype: string
splits:
- name: train
num_bytes: 5766361470
num_examples: 8841823
download_size: 3272205520
dataset_size: 5766361470
configs:
- config_name: en_ar
data_files:
- split: train
path: en_ar/train-*
- config_name: en_de
data_files:
- split: train
path: en_de/train-*
- config_name: en_es
data_files:
- split: train
path: en_es/train-*
- config_name: en_fr
data_files:
- split: train
path: en_fr/train-*
- config_name: en_hi
data_files:
- split: train
path: en_hi/train-*
- config_name: en_id
data_files:
- split: train
path: en_id/train-*
- config_name: en_it
data_files:
- split: train
path: en_it/train-*
- config_name: en_ja
data_files:
- split: train
path: en_ja/train-*
- config_name: en_nl
data_files:
- split: train
path: en_nl/train-*
- config_name: en_pt
data_files:
- split: train
path: en_pt/train-*
- config_name: en_ru
data_files:
- split: train
path: en_ru/train-*
- config_name: en_vi
data_files:
- split: train
path: en_vi/train-*
- config_name: en_zh
data_files:
- split: train
path: en_zh/train-*
---
提供机构:
omai-research



