parkpoongpa/data4whipser
收藏Hugging Face2025-11-17 更新2025-12-20 收录
下载链接:
https://hf-mirror.com/datasets/parkpoongpa/data4whipser
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: de
features:
- name: id
dtype: string
- name: audio_path
dtype: string
- name: text
dtype: string
- name: duration
dtype: float64
- name: source
dtype: string
- name: audio
dtype: audio
splits:
- name: train
num_bytes: 17008328787.528
num_examples: 494116
- name: validation
num_bytes: 2185682597.576
num_examples: 63466
- name: test
num_bytes: 2089053994.12
num_examples: 60824
download_size: 21066365697
dataset_size: 21283065379.224
- config_name: en
features:
- name: id
dtype: string
- name: audio_path
dtype: string
- name: text
dtype: string
- name: duration
dtype: float64
- name: source
dtype: string
- name: audio
dtype: audio
splits:
- name: train
num_bytes: 17913897045.6
num_examples: 509890
- name: validation
num_bytes: 2170096789.88
num_examples: 62980
- name: test
num_bytes: 2289223729.464
num_examples: 65448
download_size: 22383441014
dataset_size: 22373217564.944
- config_name: fr
features:
- name: id
dtype: string
- name: audio_path
dtype: string
- name: text
dtype: string
- name: duration
dtype: float64
- name: source
dtype: string
- name: audio
dtype: audio
splits:
- name: train
num_bytes: 16029865366.52
num_examples: 475436
- name: validation
num_bytes: 2045214114.52
num_examples: 61410
- name: test
num_bytes: 1993043516.548
num_examples: 58411
download_size: 20006640244
dataset_size: 20068122997.588
- config_name: ja
features:
- name: id
dtype: string
- name: audio_path
dtype: string
- name: text
dtype: string
- name: duration
dtype: float64
- name: source
dtype: string
- name: audio
dtype: audio
splits:
- name: train
num_bytes: 16103053280.6
num_examples: 549414
- name: validation
num_bytes: 2132202749.296
num_examples: 71819
- name: test
num_bytes: 1976157590.636
num_examples: 67659
download_size: 20149658014
dataset_size: 20211413620.532
- config_name: ko
features:
- name: id
dtype: string
- name: audio_path
dtype: string
- name: text
dtype: string
- name: duration
dtype: float64
- name: source
dtype: string
- name: audio
dtype: audio
splits:
- name: train
num_bytes: 16128054290.988
num_examples: 477953
- name: validation
num_bytes: 1921727883.504
num_examples: 56964
- name: test
num_bytes: 2036410202.708
num_examples: 59351
download_size: 20157727115
dataset_size: 20086192377.2
- config_name: vi
features:
- name: id
dtype: string
- name: audio_path
dtype: string
- name: text
dtype: string
- name: duration
dtype: float64
- name: source
dtype: string
- name: speaker
dtype: string
- name: audio
dtype: audio
splits:
- name: train
num_bytes: 76164908008.722
num_examples: 898551
- name: validation
num_bytes: 9564567679.084
num_examples: 112318
- name: test
num_bytes: 9330162808.24
num_examples: 112320
download_size: 114153709445
dataset_size: 95059638496.046
- config_name: zh
features:
- name: id
dtype: string
- name: audio_path
dtype: string
- name: text
dtype: string
- name: duration
dtype: float64
- name: source
dtype: string
- name: audio
dtype: audio
splits:
- name: train
num_bytes: 17161014240.44
num_examples: 502535
- name: validation
num_bytes: 2297519149.36
num_examples: 65032
- name: test
num_bytes: 2231782550.884
num_examples: 64983
download_size: 21892911064
dataset_size: 21690315940.684
configs:
- config_name: de
data_files:
- split: train
path: de/train-*
- split: validation
path: de/validation-*
- split: test
path: de/test-*
- config_name: en
data_files:
- split: train
path: en/train-*
- split: validation
path: en/validation-*
- split: test
path: en/test-*
- config_name: fr
data_files:
- split: train
path: fr/train-*
- split: validation
path: fr/validation-*
- split: test
path: fr/test-*
- config_name: ja
data_files:
- split: train
path: ja/train-*
- split: validation
path: ja/validation-*
- split: test
path: ja/test-*
- config_name: ko
data_files:
- split: train
path: ko/train-*
- split: validation
path: ko/validation-*
- split: test
path: ko/test-*
- config_name: vi
data_files:
- split: train
path: vi/train-*
- split: validation
path: vi/validation-*
- split: test
path: vi/test-*
- config_name: zh
data_files:
- split: train
path: zh/train-*
- split: validation
path: zh/validation-*
- split: test
path: zh/test-*
---
提供机构:
parkpoongpa



