kotoba-speech/wiki40b_lines_es
收藏Hugging Face2025-12-10 更新2025-12-20 收录
下载链接:
https://hf-mirror.com/datasets/kotoba-speech/wiki40b_lines_es
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: shard_01
features: &id001
- name: text
dtype: string
- name: key
dtype: string
splits:
- name: train
num_bytes: 209531710
num_examples: 200000
download_size: 126351148
dataset_size: 209531710
- config_name: shard_02
features:
- name: text
dtype: string
- name: key
dtype: string
splits:
- name: train
num_bytes: 209826286
num_examples: 200000
download_size: 126537651
dataset_size: 209826286
- config_name: shard_03
features:
- name: text
dtype: string
- name: key
dtype: string
splits:
- name: train
num_bytes: 209563107
num_examples: 200000
download_size: 126312660
dataset_size: 209563107
- config_name: shard_04
features:
- name: text
dtype: string
- name: key
dtype: string
splits:
- name: train
num_bytes: 211162205
num_examples: 200000
download_size: 127430222
dataset_size: 211162205
- config_name: shard_05
features:
- name: text
dtype: string
- name: key
dtype: string
splits:
- name: train
num_bytes: 208966742
num_examples: 200000
download_size: 126020584
dataset_size: 208966742
- config_name: shard_06
features:
- name: text
dtype: string
- name: key
dtype: string
splits:
- name: train
num_bytes: 207746608
num_examples: 200000
download_size: 125223159
dataset_size: 207746608
- config_name: shard_07
features:
- name: text
dtype: string
- name: key
dtype: string
splits:
- name: train
num_bytes: 208949036
num_examples: 200000
download_size: 126039832
dataset_size: 208949036
- config_name: shard_08
features:
- name: text
dtype: string
- name: key
dtype: string
splits:
- name: train
num_bytes: 209273862
num_examples: 200000
download_size: 126199258
dataset_size: 209273862
- config_name: shard_09
features:
- name: text
dtype: string
- name: key
dtype: string
splits:
- name: train
num_bytes: 208456201
num_examples: 200000
download_size: 125762437
dataset_size: 208456201
- config_name: shard_10
features:
- name: text
dtype: string
- name: key
dtype: string
splits:
- name: train
num_bytes: 208295117
num_examples: 200000
download_size: 125624132
dataset_size: 208295117
- config_name: shard_11
features:
- name: text
dtype: string
- name: key
dtype: string
splits:
- name: train
num_bytes: 209640003
num_examples: 200000
download_size: 126442856
dataset_size: 209640003
- config_name: shard_12
features:
- name: text
dtype: string
- name: key
dtype: string
splits:
- name: train
num_bytes: 133430843
num_examples: 127005
download_size: 80461130
dataset_size: 133430843
- config_name: subset_400K
features: *id001
splits:
- name: train
num_examples: 400000
configs:
- config_name: shard_01
data_files:
- split: train
path: shard_01/train-*
- config_name: shard_02
data_files:
- split: train
path: shard_02/train-*
- config_name: shard_03
data_files:
- split: train
path: shard_03/train-*
- config_name: shard_04
data_files:
- split: train
path: shard_04/train-*
- config_name: shard_05
data_files:
- split: train
path: shard_05/train-*
- config_name: shard_06
data_files:
- split: train
path: shard_06/train-*
- config_name: shard_07
data_files:
- split: train
path: shard_07/train-*
- config_name: shard_08
data_files:
- split: train
path: shard_08/train-*
- config_name: shard_09
data_files:
- split: train
path: shard_09/train-*
- config_name: shard_10
data_files:
- split: train
path: shard_10/train-*
- config_name: shard_11
data_files:
- split: train
path: shard_11/train-*
- config_name: shard_12
data_files:
- split: train
path: shard_12/train-*
- config_name: subset_400K
data_files:
- split: train
path:
- shard_01/train-*
- shard_02/train-*
---
提供机构:
kotoba-speech



