kotoba-speech/whisper-large-v3-vectorized
收藏Hugging Face2024-08-23 更新2025-04-08 收录
下载链接:
https://hf-mirror.com/datasets/kotoba-speech/whisper-large-v3-vectorized
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: subset_0
features:
- name: input_features
sequence:
sequence: float32
- name: input_length
dtype: int64
- name: labels
sequence: int64
splits:
- name: train
num_bytes: 1376819402768
num_examples: 895960
- name: eval
num_bytes: 768350496
num_examples: 500
download_size: 266546754703
dataset_size: 1377587753264
- config_name: subset_1
features:
- name: input_features
sequence:
sequence: float32
- name: input_length
dtype: int64
- name: labels
sequence: int64
splits:
- name: train
num_bytes: 153669752
num_examples: 100
- name: eval
num_bytes: 153669752
num_examples: 100
download_size: 60387108
dataset_size: 307339504
- config_name: subset_2
features:
- name: input_features
sequence:
sequence: float32
- name: input_length
dtype: int64
- name: en_labels
sequence: int64
- name: ja_labels
sequence: int64
splits:
- name: train
num_bytes: 153688776
num_examples: 100
- name: eval
num_bytes: 153690608
num_examples: 100
download_size: 54617428
dataset_size: 307379384
- config_name: subset_2_0
features:
- name: input_features
sequence:
sequence: float32
- name: input_length
dtype: int64
- name: en_labels
sequence: int64
- name: ja_labels
sequence: int64
splits:
- name: train
num_bytes: 188916280864
num_examples: 122918
download_size: 36479353381
dataset_size: 188916280864
- config_name: subset_2_1
features:
- name: input_features
sequence:
sequence: float32
- name: input_length
dtype: int64
- name: en_labels
sequence: int64
- name: ja_labels
sequence: int64
splits:
- name: train
num_bytes: 188916421616
num_examples: 122918
download_size: 36561234018
dataset_size: 188916421616
- config_name: subset_2_2
features:
- name: input_features
sequence:
sequence: float32
- name: input_length
dtype: int64
- name: en_labels
sequence: int64
- name: ja_labels
sequence: int64
splits:
- name: train
num_bytes: 188916509408
num_examples: 122918
download_size: 36647983300
dataset_size: 188916509408
- config_name: subset_2_3
features:
- name: input_features
sequence:
sequence: float32
- name: input_length
dtype: int64
- name: en_labels
sequence: int64
- name: ja_labels
sequence: int64
splits:
- name: train
num_bytes: 188916391392
num_examples: 122918
download_size: 36559851013
dataset_size: 188916391392
- config_name: subset_2_4
features:
- name: input_features
sequence:
sequence: float32
- name: input_length
dtype: int64
- name: en_labels
sequence: int64
- name: ja_labels
sequence: int64
splits:
- name: train
num_bytes: 188916459512
num_examples: 122918
download_size: 36598370220
dataset_size: 188916459512
- config_name: subset_2_5
features:
- name: input_features
sequence:
sequence: float32
- name: input_length
dtype: int64
- name: en_labels
sequence: int64
- name: ja_labels
sequence: int64
splits:
- name: train
num_bytes: 188916412768
num_examples: 122918
download_size: 36555510291
dataset_size: 188916412768
- config_name: subset_2_6
features:
- name: input_features
sequence:
sequence: float32
- name: input_length
dtype: int64
- name: en_labels
sequence: int64
- name: ja_labels
sequence: int64
splits:
- name: train
num_bytes: 188916405840
num_examples: 122918
download_size: 36562581499
dataset_size: 188916405840
- config_name: subset_2_7
features:
- name: input_features
sequence:
sequence: float32
- name: input_length
dtype: int64
- name: en_labels
sequence: int64
- name: ja_labels
sequence: int64
splits:
- name: train
num_bytes: 188916345080
num_examples: 122918
download_size: 36530275304
dataset_size: 188916345080
- config_name: subset_2_8
features:
- name: input_features
sequence:
sequence: float32
- name: input_length
dtype: int64
- name: en_labels
sequence: int64
- name: ja_labels
sequence: int64
splits:
- name: train
num_bytes: 188916326472
num_examples: 122918
download_size: 36499977708
dataset_size: 188916326472
- config_name: subset_2_9
features:
- name: input_features
sequence:
sequence: float32
- name: input_length
dtype: int64
- name: en_labels
sequence: int64
- name: ja_labels
sequence: int64
splits:
- name: train
num_bytes: 188927210092
num_examples: 122925
download_size: 36587569180
dataset_size: 188927210092
- config_name: subset_3_0
features:
- name: input_features
sequence:
sequence: float32
- name: input_length
dtype: int64
- name: en_labels
sequence: int64
- name: ja_labels
sequence: int64
splits:
- name: train
num_bytes: 188854906520
num_examples: 122878
download_size: 36534806072
dataset_size: 188854906520
configs:
- config_name: subset_0
data_files:
- split: train
path: subset_0/train-*
- split: eval
path: subset_0/eval-*
- config_name: subset_1
data_files:
- split: train
path: subset_1/train-*
- split: eval
path: subset_1/eval-*
- config_name: subset_2
data_files:
- split: train
path: subset_2/train-*
- split: eval
path: subset_2/eval-*
- config_name: subset_2_0
data_files:
- split: train
path: subset_2_0/train-*
- config_name: subset_2_1
data_files:
- split: train
path: subset_2_1/train-*
- config_name: subset_2_2
data_files:
- split: train
path: subset_2_2/train-*
- config_name: subset_2_3
data_files:
- split: train
path: subset_2_3/train-*
- config_name: subset_2_4
data_files:
- split: train
path: subset_2_4/train-*
- config_name: subset_2_5
data_files:
- split: train
path: subset_2_5/train-*
- config_name: subset_2_6
data_files:
- split: train
path: subset_2_6/train-*
- config_name: subset_2_7
data_files:
- split: train
path: subset_2_7/train-*
- config_name: subset_2_8
data_files:
- split: train
path: subset_2_8/train-*
- config_name: subset_2_9
data_files:
- split: train
path: subset_2_9/train-*
- config_name: subset_3_0
data_files:
- split: train
path: subset_3_0/train-*
---
提供机构:
kotoba-speech



