five

kotoba-speech/whisper-large-v3-vectorized

收藏
Hugging Face2024-08-23 更新2025-04-08 收录
下载链接:
https://hf-mirror.com/datasets/kotoba-speech/whisper-large-v3-vectorized
下载链接
链接失效反馈
官方服务:
资源简介:
--- dataset_info: - config_name: subset_0 features: - name: input_features sequence: sequence: float32 - name: input_length dtype: int64 - name: labels sequence: int64 splits: - name: train num_bytes: 1376819402768 num_examples: 895960 - name: eval num_bytes: 768350496 num_examples: 500 download_size: 266546754703 dataset_size: 1377587753264 - config_name: subset_1 features: - name: input_features sequence: sequence: float32 - name: input_length dtype: int64 - name: labels sequence: int64 splits: - name: train num_bytes: 153669752 num_examples: 100 - name: eval num_bytes: 153669752 num_examples: 100 download_size: 60387108 dataset_size: 307339504 - config_name: subset_2 features: - name: input_features sequence: sequence: float32 - name: input_length dtype: int64 - name: en_labels sequence: int64 - name: ja_labels sequence: int64 splits: - name: train num_bytes: 153688776 num_examples: 100 - name: eval num_bytes: 153690608 num_examples: 100 download_size: 54617428 dataset_size: 307379384 - config_name: subset_2_0 features: - name: input_features sequence: sequence: float32 - name: input_length dtype: int64 - name: en_labels sequence: int64 - name: ja_labels sequence: int64 splits: - name: train num_bytes: 188916280864 num_examples: 122918 download_size: 36479353381 dataset_size: 188916280864 - config_name: subset_2_1 features: - name: input_features sequence: sequence: float32 - name: input_length dtype: int64 - name: en_labels sequence: int64 - name: ja_labels sequence: int64 splits: - name: train num_bytes: 188916421616 num_examples: 122918 download_size: 36561234018 dataset_size: 188916421616 - config_name: subset_2_2 features: - name: input_features sequence: sequence: float32 - name: input_length dtype: int64 - name: en_labels sequence: int64 - name: ja_labels sequence: int64 splits: - name: train num_bytes: 188916509408 num_examples: 122918 download_size: 36647983300 dataset_size: 188916509408 - config_name: subset_2_3 features: - name: input_features sequence: sequence: float32 - name: input_length dtype: int64 - name: en_labels sequence: int64 - name: ja_labels sequence: int64 splits: - name: train num_bytes: 188916391392 num_examples: 122918 download_size: 36559851013 dataset_size: 188916391392 - config_name: subset_2_4 features: - name: input_features sequence: sequence: float32 - name: input_length dtype: int64 - name: en_labels sequence: int64 - name: ja_labels sequence: int64 splits: - name: train num_bytes: 188916459512 num_examples: 122918 download_size: 36598370220 dataset_size: 188916459512 - config_name: subset_2_5 features: - name: input_features sequence: sequence: float32 - name: input_length dtype: int64 - name: en_labels sequence: int64 - name: ja_labels sequence: int64 splits: - name: train num_bytes: 188916412768 num_examples: 122918 download_size: 36555510291 dataset_size: 188916412768 - config_name: subset_2_6 features: - name: input_features sequence: sequence: float32 - name: input_length dtype: int64 - name: en_labels sequence: int64 - name: ja_labels sequence: int64 splits: - name: train num_bytes: 188916405840 num_examples: 122918 download_size: 36562581499 dataset_size: 188916405840 - config_name: subset_2_7 features: - name: input_features sequence: sequence: float32 - name: input_length dtype: int64 - name: en_labels sequence: int64 - name: ja_labels sequence: int64 splits: - name: train num_bytes: 188916345080 num_examples: 122918 download_size: 36530275304 dataset_size: 188916345080 - config_name: subset_2_8 features: - name: input_features sequence: sequence: float32 - name: input_length dtype: int64 - name: en_labels sequence: int64 - name: ja_labels sequence: int64 splits: - name: train num_bytes: 188916326472 num_examples: 122918 download_size: 36499977708 dataset_size: 188916326472 - config_name: subset_2_9 features: - name: input_features sequence: sequence: float32 - name: input_length dtype: int64 - name: en_labels sequence: int64 - name: ja_labels sequence: int64 splits: - name: train num_bytes: 188927210092 num_examples: 122925 download_size: 36587569180 dataset_size: 188927210092 - config_name: subset_3_0 features: - name: input_features sequence: sequence: float32 - name: input_length dtype: int64 - name: en_labels sequence: int64 - name: ja_labels sequence: int64 splits: - name: train num_bytes: 188854906520 num_examples: 122878 download_size: 36534806072 dataset_size: 188854906520 configs: - config_name: subset_0 data_files: - split: train path: subset_0/train-* - split: eval path: subset_0/eval-* - config_name: subset_1 data_files: - split: train path: subset_1/train-* - split: eval path: subset_1/eval-* - config_name: subset_2 data_files: - split: train path: subset_2/train-* - split: eval path: subset_2/eval-* - config_name: subset_2_0 data_files: - split: train path: subset_2_0/train-* - config_name: subset_2_1 data_files: - split: train path: subset_2_1/train-* - config_name: subset_2_2 data_files: - split: train path: subset_2_2/train-* - config_name: subset_2_3 data_files: - split: train path: subset_2_3/train-* - config_name: subset_2_4 data_files: - split: train path: subset_2_4/train-* - config_name: subset_2_5 data_files: - split: train path: subset_2_5/train-* - config_name: subset_2_6 data_files: - split: train path: subset_2_6/train-* - config_name: subset_2_7 data_files: - split: train path: subset_2_7/train-* - config_name: subset_2_8 data_files: - split: train path: subset_2_8/train-* - config_name: subset_2_9 data_files: - split: train path: subset_2_9/train-* - config_name: subset_3_0 data_files: - split: train path: subset_3_0/train-* ---
提供机构:
kotoba-speech
5,000+
优质数据集
54 个
任务类型
进入经典数据集
二维码
社区交流群

面向社区/商业的数据集话题

二维码
科研交流群

面向高校/科研机构的开源数据集话题

数据驱动未来

携手共赢发展

商业合作