mteb/common_voice_21_0_mini
收藏Hugging Face2025-12-30 更新2026-01-03 收录
下载链接:
https://hf-mirror.com/datasets/mteb/common_voice_21_0_mini
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: ab
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 73586017.0
num_examples: 200
- name: validation
num_bytes: 69626724.0
num_examples: 200
- name: test
num_bytes: 70271521.0
num_examples: 200
download_size: 178447151
dataset_size: 213484262.0
- config_name: af
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 40676731.0
num_examples: 99
- name: validation
num_bytes: 51091224.0
num_examples: 123
- name: test
num_bytes: 47320768.0
num_examples: 117
download_size: 126038806
dataset_size: 139088723.0
- config_name: am
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 18768440.0
num_examples: 50
- name: validation
num_bytes: 18132785.0
num_examples: 50
- name: test
num_bytes: 81211014.0
num_examples: 200
download_size: 106473081
dataset_size: 118112239.0
- config_name: ar
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 57741640.0
num_examples: 200
- name: validation
num_bytes: 64331931.0
num_examples: 200
- name: test
num_bytes: 70384645.0
num_examples: 200
download_size: 160197741
dataset_size: 192458216.0
- config_name: as
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 45075427.0
num_examples: 100
- name: validation
num_bytes: 37488796.0
num_examples: 100
- name: test
num_bytes: 82478184.0
num_examples: 200
download_size: 134434421
dataset_size: 165042407.0
- config_name: ast
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 29322744.0
num_examples: 100
- name: validation
num_bytes: 13362754.0
num_examples: 50
- name: test
num_bytes: 45654522.0
num_examples: 157
download_size: 73399597
dataset_size: 88340020.0
- config_name: az
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 45707762.0
num_examples: 136
- name: validation
num_bytes: 21069055.0
num_examples: 71
- name: test
num_bytes: 36218026.0
num_examples: 92
download_size: 94434630
dataset_size: 102994843.0
- config_name: ba
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 62497218.0
num_examples: 200
- name: validation
num_bytes: 63314317.0
num_examples: 200
- name: test
num_bytes: 63285854.0
num_examples: 200
download_size: 149905700
dataset_size: 189097389.0
- config_name: bas
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 25031243.0
num_examples: 100
- name: validation
num_bytes: 38486789.0
num_examples: 150
- name: test
num_bytes: 54325257.0
num_examples: 200
download_size: 92889675
dataset_size: 117843289.0
- config_name: be
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 78255589.0
num_examples: 200
- name: validation
num_bytes: 82903876.0
num_examples: 200
- name: test
num_bytes: 86197028.0
num_examples: 200
download_size: 212561645
dataset_size: 247356493.0
- config_name: bg
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 50168579.0
num_examples: 150
- name: validation
num_bytes: 72448928.0
num_examples: 200
- name: test
num_bytes: 72917517.0
num_examples: 200
download_size: 175320912
dataset_size: 195535024.0
- config_name: bn
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 78411932.0
num_examples: 200
- name: validation
num_bytes: 80235270.0
num_examples: 200
- name: test
num_bytes: 84165140.0
num_examples: 200
download_size: 209448221
dataset_size: 242812342.0
- config_name: br
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 52949678.0
num_examples: 200
- name: validation
num_bytes: 57803862.0
num_examples: 200
- name: test
num_bytes: 58459517.0
num_examples: 200
download_size: 134128074
dataset_size: 169213057.0
- config_name: ca
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 101474061.0
num_examples: 200
- name: validation
num_bytes: 94975203.0
num_examples: 200
- name: test
num_bytes: 90072108.0
num_examples: 200
download_size: 237405375
dataset_size: 286521372.0
- config_name: ckb
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 52708643.0
num_examples: 200
- name: validation
num_bytes: 55295189.0
num_examples: 200
- name: test
num_bytes: 60178623.0
num_examples: 200
download_size: 134848569
dataset_size: 168182455.0
- config_name: cnh
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 57681533.0
num_examples: 200
- name: validation
num_bytes: 69276947.0
num_examples: 200
- name: test
num_bytes: 77591897.0
num_examples: 200
download_size: 191550710
dataset_size: 204550377.0
- config_name: cs
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 90431583.0
num_examples: 200
- name: validation
num_bytes: 79626497.0
num_examples: 200
- name: test
num_bytes: 82859808.0
num_examples: 200
download_size: 215769132
dataset_size: 252917888.0
- config_name: cv
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 82521366.0
num_examples: 200
- name: validation
num_bytes: 88205549.0
num_examples: 200
- name: test
num_bytes: 91481892.0
num_examples: 200
download_size: 225391903
dataset_size: 262208807.0
- config_name: cy
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 93203449.0
num_examples: 200
- name: validation
num_bytes: 91359797.0
num_examples: 200
- name: test
num_bytes: 87400228.0
num_examples: 200
download_size: 227716578
dataset_size: 271963474.0
- config_name: da
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 51359414.0
num_examples: 200
- name: validation
num_bytes: 60023688.0
num_examples: 200
- name: test
num_bytes: 60657309.0
num_examples: 200
download_size: 143258503
dataset_size: 172040411.0
- config_name: dav
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 38669630.0
num_examples: 150
- name: validation
num_bytes: 24695442.0
num_examples: 100
- name: test
num_bytes: 61039795.0
num_examples: 200
download_size: 98725168
dataset_size: 124404867.0
- config_name: de
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 105378536.0
num_examples: 200
- name: validation
num_bytes: 107489325.0
num_examples: 200
- name: test
num_bytes: 109389070.0
num_examples: 200
download_size: 282376779
dataset_size: 322256931.0
- config_name: dv
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 87686144.0
num_examples: 200
- name: validation
num_bytes: 81065293.0
num_examples: 200
- name: test
num_bytes: 94720573.0
num_examples: 200
download_size: 232138791
dataset_size: 263472010.0
- config_name: dyu
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 32893513.0
num_examples: 86
- name: validation
num_bytes: 20139647.0
num_examples: 50
- name: test
num_bytes: 25473101.0
num_examples: 63
download_size: 70468623
dataset_size: 78506261.0
- config_name: el
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 67599586.0
num_examples: 200
- name: validation
num_bytes: 62001372.0
num_examples: 200
- name: test
num_bytes: 65447353.0
num_examples: 200
download_size: 169161803
dataset_size: 195048311.0
- config_name: en
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 103143409.0
num_examples: 200
- name: validation
num_bytes: 111996612.0
num_examples: 200
- name: test
num_bytes: 104435681.0
num_examples: 200
download_size: 281201866
dataset_size: 319575702.0
- config_name: eo
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 91585787.0
num_examples: 200
- name: validation
num_bytes: 96840001.0
num_examples: 200
- name: test
num_bytes: 114725080.0
num_examples: 200
download_size: 272735414
dataset_size: 303150868.0
- config_name: es
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 103578057.0
num_examples: 200
- name: validation
num_bytes: 111635877.0
num_examples: 200
- name: test
num_bytes: 114215294.0
num_examples: 200
download_size: 292968894
dataset_size: 329429228.0
- config_name: et
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 107395917.0
num_examples: 200
- name: validation
num_bytes: 99239340.0
num_examples: 200
- name: test
num_bytes: 114906661.0
num_examples: 200
download_size: 291448297
dataset_size: 321541918.0
- config_name: eu
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 101773310.0
num_examples: 200
- name: validation
num_bytes: 86074717.0
num_examples: 200
- name: test
num_bytes: 84349456.0
num_examples: 200
download_size: 226312465
dataset_size: 272197483.0
- config_name: fa
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 70375030.0
num_examples: 200
- name: validation
num_bytes: 81249815.0
num_examples: 200
- name: test
num_bytes: 99628910.0
num_examples: 200
download_size: 206223686
dataset_size: 251253755.0
- config_name: fi
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 58557317.0
num_examples: 200
- name: validation
num_bytes: 63150533.0
num_examples: 200
- name: test
num_bytes: 68665268.0
num_examples: 200
download_size: 164752232
dataset_size: 190373118.0
- config_name: fr
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 86852847.0
num_examples: 200
- name: validation
num_bytes: 110783804.0
num_examples: 200
- name: test
num_bytes: 107626621.0
num_examples: 200
download_size: 266694403
dataset_size: 305263272.0
- config_name: fy-NL
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 80795332.0
num_examples: 200
- name: validation
num_bytes: 80923167.0
num_examples: 200
- name: test
num_bytes: 76225129.0
num_examples: 200
download_size: 189348906
dataset_size: 237943628.0
- config_name: ga-IE
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 74195091.0
num_examples: 200
- name: validation
num_bytes: 71130481.0
num_examples: 200
- name: test
num_bytes: 68177899.0
num_examples: 200
download_size: 172762247
dataset_size: 213503471.0
- config_name: gl
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 57530204.0
num_examples: 200
- name: validation
num_bytes: 66910052.0
num_examples: 200
- name: test
num_bytes: 67253010.0
num_examples: 200
download_size: 162648256
dataset_size: 191693266.0
- config_name: gn
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 24956805.0
num_examples: 100
- name: validation
num_bytes: 42307269.0
num_examples: 150
- name: test
num_bytes: 59721586.0
num_examples: 200
download_size: 103994421
dataset_size: 126985660.0
- config_name: ha
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 13522700.0
num_examples: 50
- name: validation
num_bytes: 13506434.0
num_examples: 50
- name: test
num_bytes: 60619588.0
num_examples: 200
download_size: 73977088
dataset_size: 87648722.0
- config_name: he
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 28532167.0
num_examples: 100
- name: validation
num_bytes: 14492650.0
num_examples: 50
- name: test
num_bytes: 67690380.0
num_examples: 200
download_size: 96392774
dataset_size: 110715197.0
- config_name: hi
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 43700912.0
num_examples: 150
- name: validation
num_bytes: 61612415.0
num_examples: 200
- name: test
num_bytes: 70505203.0
num_examples: 200
download_size: 159141336
dataset_size: 175818530.0
- config_name: hsb
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 31304266.0
num_examples: 50
- name: validation
num_bytes: 29709490.0
num_examples: 50
- name: test
num_bytes: 129156879.0
num_examples: 200
download_size: 173689431
dataset_size: 190170635.0
- config_name: hu
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 81566922.0
num_examples: 200
- name: validation
num_bytes: 65974323.0
num_examples: 200
- name: test
num_bytes: 72765811.0
num_examples: 200
download_size: 190125854
dataset_size: 220307056.0
- config_name: hy-AM
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 69163570.0
num_examples: 200
- name: validation
num_bytes: 68241116.0
num_examples: 200
- name: test
num_bytes: 72763421.0
num_examples: 200
download_size: 185233476
dataset_size: 210168107.0
- config_name: ia
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 29310687.0
num_examples: 100
- name: validation
num_bytes: 23569973.0
num_examples: 50
- name: test
num_bytes: 69789190.0
num_examples: 200
download_size: 107818904
dataset_size: 122669850.0
- config_name: id
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 70191154.0
num_examples: 200
- name: validation
num_bytes: 67232407.0
num_examples: 200
- name: test
num_bytes: 65730414.0
num_examples: 200
download_size: 182332696
dataset_size: 203153975.0
- config_name: it
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 90743624.0
num_examples: 200
- name: validation
num_bytes: 112606407.0
num_examples: 200
- name: test
num_bytes: 118010628.0
num_examples: 200
download_size: 289513724
dataset_size: 321360659.0
- config_name: ja
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 74747078.0
num_examples: 200
- name: validation
num_bytes: 64432462.0
num_examples: 200
- name: test
num_bytes: 66897745.0
num_examples: 200
download_size: 185080718
dataset_size: 206077285.0
- config_name: ka
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 85844479.0
num_examples: 200
- name: validation
num_bytes: 80155152.0
num_examples: 200
- name: test
num_bytes: 81416441.0
num_examples: 200
download_size: 219841670
dataset_size: 247416072.0
- config_name: kab
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 64521762.0
num_examples: 200
- name: validation
num_bytes: 71339274.0
num_examples: 200
- name: test
num_bytes: 77615017.0
num_examples: 200
download_size: 174744200
dataset_size: 213476053.0
- config_name: kk
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 64338007.0
num_examples: 200
- name: validation
num_bytes: 60675789.0
num_examples: 200
- name: test
num_bytes: 65062777.0
num_examples: 200
download_size: 155410925
dataset_size: 190076573.0
- config_name: kln
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 59917569.0
num_examples: 200
- name: validation
num_bytes: 60958521.0
num_examples: 200
- name: test
num_bytes: 67436355.0
num_examples: 200
download_size: 143634817
dataset_size: 188312445.0
- config_name: kmr
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 49945843.0
num_examples: 200
- name: validation
num_bytes: 54542997.0
num_examples: 200
- name: test
num_bytes: 58533224.0
num_examples: 200
download_size: 122170183
dataset_size: 163022064.0
- config_name: ko
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 60788973.0
num_examples: 150
- name: validation
num_bytes: 67864672.0
num_examples: 200
- name: test
num_bytes: 68157696.0
num_examples: 200
download_size: 174570151
dataset_size: 196811341.0
- config_name: ky
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 66553199.0
num_examples: 200
- name: validation
num_bytes: 74986704.0
num_examples: 200
- name: test
num_bytes: 81271022.0
num_examples: 200
download_size: 190620466
dataset_size: 222810925.0
- config_name: lg
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 69256617.0
num_examples: 200
- name: validation
num_bytes: 77969151.0
num_examples: 200
- name: test
num_bytes: 79850919.0
num_examples: 200
download_size: 200483157
dataset_size: 227076687.0
- config_name: lij
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 38655915.0
num_examples: 150
- name: validation
num_bytes: 37304151.0
num_examples: 150
- name: test
num_bytes: 54269499.0
num_examples: 200
download_size: 106478140
dataset_size: 130229565.0
- config_name: lt
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 162521027.0
num_examples: 450
- name: validation
num_bytes: 177752548.0
num_examples: 500
- name: test
num_bytes: 179261014.0
num_examples: 500
download_size: 454083300
dataset_size: 519534589.0
- config_name: ltg
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 143931054.0
num_examples: 500
- name: validation
num_bytes: 165382992.0
num_examples: 500
- name: test
num_bytes: 171016857.0
num_examples: 500
download_size: 398937527
dataset_size: 480330903.0
- config_name: luo
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 46861481.0
num_examples: 150
- name: validation
num_bytes: 31954217.0
num_examples: 100
- name: test
num_bytes: 58049426.0
num_examples: 200
download_size: 110756897
dataset_size: 136865124.0
- config_name: lv
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 161391732.0
num_examples: 500
- name: validation
num_bytes: 174746620.0
num_examples: 500
- name: test
num_bytes: 173455884.0
num_examples: 500
download_size: 411345789
dataset_size: 509594236.0
- config_name: mdf
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 35442385.0
num_examples: 100
- name: validation
num_bytes: 16545974.0
num_examples: 50
- name: test
num_bytes: 34903424.0
num_examples: 107
download_size: 74339924
dataset_size: 86891783.0
- config_name: mhr
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 164342658.0
num_examples: 500
- name: validation
num_bytes: 154739764.0
num_examples: 500
- name: test
num_bytes: 154547474.0
num_examples: 500
download_size: 400356070
dataset_size: 473629896.0
- config_name: mk
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 55763576.0
num_examples: 200
- name: validation
num_bytes: 157201504.0
num_examples: 500
- name: test
num_bytes: 168569226.0
num_examples: 500
download_size: 332017074
dataset_size: 381534306.0
- config_name: ml
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 25369410.0
num_examples: 100
- name: validation
num_bytes: 39965647.0
num_examples: 150
- name: test
num_bytes: 133311232.0
num_examples: 500
download_size: 169272919
dataset_size: 198646289.0
- config_name: mn
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 210486000.0
num_examples: 500
- name: validation
num_bytes: 242463207.0
num_examples: 500
- name: test
num_bytes: 259728010.0
num_examples: 500
download_size: 632329130
dataset_size: 712677217.0
- config_name: mr
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 173832470.0
num_examples: 450
- name: validation
num_bytes: 205522801.0
num_examples: 500
- name: test
num_bytes: 201297917.0
num_examples: 500
download_size: 529849692
dataset_size: 580653188.0
- config_name: mrj
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 67147960.0
num_examples: 250
- name: validation
num_bytes: 98791386.0
num_examples: 350
- name: test
num_bytes: 152789966.0
num_examples: 500
download_size: 262960429
dataset_size: 318729312.0
- config_name: mt
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 103410589.0
num_examples: 250
- name: validation
num_bytes: 222179468.0
num_examples: 500
- name: test
num_bytes: 245209080.0
num_examples: 500
download_size: 483700272
dataset_size: 570799137.0
- config_name: myv
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 40586025.0
num_examples: 100
- name: validation
num_bytes: 18495047.0
num_examples: 50
- name: test
num_bytes: 139762460.0
num_examples: 375
download_size: 168454041
dataset_size: 198843532.0
- config_name: nan-tw
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 31207040.0
num_examples: 200
- name: validation
num_bytes: 85924368.0
num_examples: 500
- name: test
num_bytes: 96551491.0
num_examples: 500
download_size: 163645338
dataset_size: 213682899.0
- config_name: ne-NP
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 33872096.0
num_examples: 150
- name: validation
num_bytes: 64730606.0
num_examples: 240
- name: test
num_bytes: 76516998.0
num_examples: 272
download_size: 156716720
dataset_size: 175119700.0
- config_name: nl
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 235496204.0
num_examples: 500
- name: validation
num_bytes: 197262054.0
num_examples: 500
- name: test
num_bytes: 209630464.0
num_examples: 500
download_size: 555779107
dataset_size: 642388722.0
- config_name: nn-NO
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 26236395.0
num_examples: 100
- name: validation
num_bytes: 69262211.0
num_examples: 241
- name: test
num_bytes: 120445166.0
num_examples: 412
download_size: 181925525
dataset_size: 215943772.0
- config_name: oc
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 96287707.0
num_examples: 305
- name: validation
num_bytes: 80388328.0
num_examples: 266
- name: test
num_bytes: 88659631.0
num_examples: 274
download_size: 214830192
dataset_size: 265335666.0
- config_name: or
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 21869647.0
num_examples: 50
- name: validation
num_bytes: 20571899.0
num_examples: 50
- name: test
num_bytes: 189338081.0
num_examples: 420
download_size: 192744993
dataset_size: 231779627.0
- config_name: os
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 34320748.0
num_examples: 100
- name: validation
num_bytes: 13804961.0
num_examples: 50
- name: test
num_bytes: 46903229.0
num_examples: 130
download_size: 79562514
dataset_size: 95028938.0
- config_name: pa-IN
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 25061517.0
num_examples: 50
- name: validation
num_bytes: 42250015.0
num_examples: 100
- name: test
num_bytes: 183221302.0
num_examples: 500
download_size: 221664707
dataset_size: 250532834.0
- config_name: pl
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 214392940.0
num_examples: 500
- name: validation
num_bytes: 215399032.0
num_examples: 500
- name: test
num_bytes: 235473486.0
num_examples: 500
download_size: 564833646
dataset_size: 665265458.0
- config_name: ps
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 158810870.0
num_examples: 500
- name: validation
num_bytes: 165127732.0
num_examples: 500
- name: test
num_bytes: 176279458.0
num_examples: 500
download_size: 413765421
dataset_size: 500218060.0
- config_name: pt
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 224918004.0
num_examples: 500
- name: validation
num_bytes: 189820714.0
num_examples: 500
- name: test
num_bytes: 196127066.0
num_examples: 500
download_size: 532538805
dataset_size: 610865784.0
- config_name: rm-sursilv
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 190222053.0
num_examples: 350
- name: validation
num_bytes: 252612721.0
num_examples: 500
- name: test
num_bytes: 253370652.0
num_examples: 500
download_size: 621829869
dataset_size: 696205426.0
- config_name: rm-vallader
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 60738928.0
num_examples: 100
- name: validation
num_bytes: 143385831.0
num_examples: 250
- name: test
num_bytes: 253188016.0
num_examples: 433
download_size: 394323472
dataset_size: 457312775.0
- config_name: ro
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 155185918.0
num_examples: 500
- name: validation
num_bytes: 139928885.0
num_examples: 500
- name: test
num_bytes: 161475160.0
num_examples: 500
download_size: 400656706
dataset_size: 456589963.0
- config_name: ru
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 256431818.0
num_examples: 500
- name: validation
num_bytes: 262018812.0
num_examples: 500
- name: test
num_bytes: 260842211.0
num_examples: 500
download_size: 701852434
dataset_size: 779292841.0
- config_name: rw
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 68574508.0
num_examples: 200
- name: validation
num_bytes: 112230724.0
num_examples: 200
- name: test
num_bytes: 99543390.0
num_examples: 200
download_size: 265871913
dataset_size: 280348622.0
- config_name: sah
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 80209685.0
num_examples: 150
- name: validation
num_bytes: 99953271.0
num_examples: 200
- name: test
num_bytes: 101717612.0
num_examples: 200
download_size: 248789367
dataset_size: 281880568.0
- config_name: sat
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 15399118.0
num_examples: 50
- name: validation
num_bytes: 10473099.0
num_examples: 50
- name: test
num_bytes: 38485156.0
num_examples: 113
download_size: 50273201
dataset_size: 64357373.0
- config_name: sc
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 41882778.0
num_examples: 150
- name: validation
num_bytes: 27469071.0
num_examples: 100
- name: test
num_bytes: 65659725.0
num_examples: 200
download_size: 115498382
dataset_size: 135011574.0
- config_name: sk
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 56403823.0
num_examples: 200
- name: validation
num_bytes: 51509524.0
num_examples: 200
- name: test
num_bytes: 53803441.0
num_examples: 200
download_size: 133257730
dataset_size: 161716788.0
- config_name: skr
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 10836917.0
num_examples: 50
- name: validation
num_bytes: 28099006.0
num_examples: 100
- name: test
num_bytes: 58224413.0
num_examples: 200
download_size: 81604741
dataset_size: 97160336.0
- config_name: sl
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 53083273.0
num_examples: 200
- name: validation
num_bytes: 61274932.0
num_examples: 200
- name: test
num_bytes: 60888225.0
num_examples: 200
download_size: 148746950
dataset_size: 175246430.0
- config_name: sq
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 34634597.0
num_examples: 100
- name: validation
num_bytes: 63091022.0
num_examples: 200
- name: test
num_bytes: 65931026.0
num_examples: 200
download_size: 141477594
dataset_size: 163656645.0
- config_name: sr
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 36731185.0
num_examples: 200
- name: validation
num_bytes: 37713152.0
num_examples: 200
- name: test
num_bytes: 42632540.0
num_examples: 200
download_size: 91524376
dataset_size: 117076877.0
- config_name: sv-SE
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 56718313.0
num_examples: 200
- name: validation
num_bytes: 58151839.0
num_examples: 200
- name: test
num_bytes: 64849464.0
num_examples: 200
download_size: 150103922
dataset_size: 179719616.0
- config_name: sw
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 70886562.0
num_examples: 200
- name: validation
num_bytes: 70647399.0
num_examples: 200
- name: test
num_bytes: 76104834.0
num_examples: 200
download_size: 199720910
dataset_size: 217638795.0
- config_name: ta
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 81439538.0
num_examples: 200
- name: validation
num_bytes: 78024238.0
num_examples: 200
- name: test
num_bytes: 85289902.0
num_examples: 200
download_size: 216283173
dataset_size: 244753678.0
- config_name: te
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 20748358.0
num_examples: 73
- name: validation
num_bytes: 15984148.0
num_examples: 58
- name: test
num_bytes: 17496003.0
num_examples: 61
download_size: 46253763
dataset_size: 54228509.0
- config_name: th
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 57995595.0
num_examples: 200
- name: validation
num_bytes: 63238580.0
num_examples: 200
- name: test
num_bytes: 63663126.0
num_examples: 200
download_size: 160248993
dataset_size: 184897301.0
- config_name: tig
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 67626754.0
num_examples: 200
- name: validation
num_bytes: 80172175.0
num_examples: 200
- name: test
num_bytes: 81786548.0
num_examples: 200
download_size: 167126419
dataset_size: 229585477.0
- config_name: tk
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 34416950.0
num_examples: 100
- name: validation
num_bytes: 65211039.0
num_examples: 200
- name: test
num_bytes: 72032695.0
num_examples: 200
download_size: 144367282
dataset_size: 171660684.0
- config_name: tn
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 13642139.0
num_examples: 50
- name: validation
num_bytes: 15599977.0
num_examples: 50
- name: test
num_bytes: 51782328.0
num_examples: 200
download_size: 59714026
dataset_size: 81024444.0
- config_name: tok
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 44365100.0
num_examples: 200
- name: validation
num_bytes: 54439983.0
num_examples: 200
- name: test
num_bytes: 51025817.0
num_examples: 200
download_size: 132287715
dataset_size: 149830900.0
- config_name: tr
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 45602998.0
num_examples: 200
- name: validation
num_bytes: 49071724.0
num_examples: 200
- name: test
num_bytes: 74900470.0
num_examples: 200
download_size: 139146737
dataset_size: 169575192.0
- config_name: tt
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 35519976.0
num_examples: 100
- name: validation
num_bytes: 68507694.0
num_examples: 200
- name: test
num_bytes: 72393821.0
num_examples: 200
download_size: 141642693
dataset_size: 176421491.0
- config_name: ug
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 64199455.0
num_examples: 200
- name: validation
num_bytes: 75831227.0
num_examples: 200
- name: test
num_bytes: 77465250.0
num_examples: 200
download_size: 186230777
dataset_size: 217495932.0
- config_name: uk
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 76625999.0
num_examples: 200
- name: validation
num_bytes: 84986214.0
num_examples: 200
- name: test
num_bytes: 94081081.0
num_examples: 200
download_size: 216132987
dataset_size: 255693294.0
- config_name: ur
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 53090755.0
num_examples: 200
- name: validation
num_bytes: 55027104.0
num_examples: 200
- name: test
num_bytes: 58523014.0
num_examples: 200
download_size: 153117367
dataset_size: 166640873.0
- config_name: uz
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 55212620.0
num_examples: 200
- name: validation
num_bytes: 62484966.0
num_examples: 200
- name: test
num_bytes: 69414599.0
num_examples: 200
download_size: 153999121
dataset_size: 187112185.0
- config_name: vi
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 15358712.0
num_examples: 50
- name: validation
num_bytes: 34797137.0
num_examples: 150
- name: test
num_bytes: 59411319.0
num_examples: 200
download_size: 95053609
dataset_size: 109567168.0
- config_name: yi
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 31940158.0
num_examples: 123
- name: validation
num_bytes: 36649035.0
num_examples: 144
- name: test
num_bytes: 39208106.0
num_examples: 143
download_size: 92209489
dataset_size: 107797299.0
- config_name: yo
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 77893021.0
num_examples: 200
- name: validation
num_bytes: 68603094.0
num_examples: 200
- name: test
num_bytes: 78675133.0
num_examples: 200
download_size: 194500571
dataset_size: 225171248.0
- config_name: yue
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 50498195.0
num_examples: 200
- name: validation
num_bytes: 53596265.0
num_examples: 200
- name: test
num_bytes: 58492657.0
num_examples: 200
download_size: 135186820
dataset_size: 162587117.0
- config_name: zgh
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 10143627.0
num_examples: 50
- name: validation
num_bytes: 9585443.0
num_examples: 50
- name: test
num_bytes: 51964289.0
num_examples: 200
download_size: 53711158
dataset_size: 71693359.0
- config_name: zh-CN
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 119070549.0
num_examples: 200
- name: validation
num_bytes: 108339346.0
num_examples: 200
- name: test
num_bytes: 116545855.0
num_examples: 200
download_size: 310125536
dataset_size: 343955750.0
- config_name: zh-HK
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 79243947.0
num_examples: 200
- name: validation
num_bytes: 91435548.0
num_examples: 200
- name: test
num_bytes: 103798367.0
num_examples: 200
download_size: 226972866
dataset_size: 274477862.0
- config_name: zh-TW
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 53660047.0
num_examples: 200
- name: validation
num_bytes: 56217622.0
num_examples: 200
- name: test
num_bytes: 64240587.0
num_examples: 200
download_size: 150901721
dataset_size: 174118256.0
- config_name: zza
features:
- name: client_id
dtype: string
- name: path
dtype: string
- name: audio
dtype: audio
- name: sentence
dtype: string
- name: up_votes
dtype: int64
- name: down_votes
dtype: int64
- name: age
dtype: string
- name: gender
dtype: string
- name: accent
dtype: string
- name: locale
dtype: string
- name: segment
dtype: string
- name: variant
dtype: string
splits:
- name: train
num_bytes: 12938326.0
num_examples: 50
- name: validation
num_bytes: 28293959.0
num_examples: 100
- name: test
num_bytes: 52806737.0
num_examples: 200
download_size: 70889893
dataset_size: 94039022.0
configs:
- config_name: ab
data_files:
- split: train
path: ab/train-*
- split: validation
path: ab/validation-*
- split: test
path: ab/test-*
- config_name: af
data_files:
- split: train
path: af/train-*
- split: validation
path: af/validation-*
- split: test
path: af/test-*
- config_name: am
data_files:
- split: train
path: am/train-*
- split: validation
path: am/validation-*
- split: test
path: am/test-*
- config_name: ar
data_files:
- split: train
path: ar/train-*
- split: validation
path: ar/validation-*
- split: test
path: ar/test-*
- config_name: as
data_files:
- split: train
path: as/train-*
- split: validation
path: as/validation-*
- split: test
path: as/test-*
- config_name: ast
data_files:
- split: train
path: ast/train-*
- split: validation
path: ast/validation-*
- split: test
path: ast/test-*
- config_name: az
data_files:
- split: train
path: az/train-*
- split: validation
path: az/validation-*
- split: test
path: az/test-*
- config_name: ba
data_files:
- split: train
path: ba/train-*
- split: validation
path: ba/validation-*
- split: test
path: ba/test-*
- config_name: bas
data_files:
- split: train
path: bas/train-*
- split: validation
path: bas/validation-*
- split: test
path: bas/test-*
- config_name: be
data_files:
- split: train
path: be/train-*
- split: validation
path: be/validation-*
- split: test
path: be/test-*
- config_name: bg
data_files:
- split: train
path: bg/train-*
- split: validation
path: bg/validation-*
- split: test
path: bg/test-*
- config_name: bn
data_files:
- split: train
path: bn/train-*
- split: validation
path: bn/validation-*
- split: test
path: bn/test-*
- config_name: br
data_files:
- split: train
path: br/train-*
- split: validation
path: br/validation-*
- split: test
path: br/test-*
- config_name: ca
data_files:
- split: train
path: ca/train-*
- split: validation
path: ca/validation-*
- split: test
path: ca/test-*
- config_name: ckb
data_files:
- split: train
path: ckb/train-*
- split: validation
path: ckb/validation-*
- split: test
path: ckb/test-*
- config_name: cnh
data_files:
- split: train
path: cnh/train-*
- split: validation
path: cnh/validation-*
- split: test
path: cnh/test-*
- config_name: cs
data_files:
- split: train
path: cs/train-*
- split: validation
path: cs/validation-*
- split: test
path: cs/test-*
- config_name: cv
data_files:
- split: train
path: cv/train-*
- split: validation
path: cv/validation-*
- split: test
path: cv/test-*
- config_name: cy
data_files:
- split: train
path: cy/train-*
- split: validation
path: cy/validation-*
- split: test
path: cy/test-*
- config_name: da
data_files:
- split: train
path: da/train-*
- split: validation
path: da/validation-*
- split: test
path: da/test-*
- config_name: dav
data_files:
- split: train
path: dav/train-*
- split: validation
path: dav/validation-*
- split: test
path: dav/test-*
- config_name: de
data_files:
- split: train
path: de/train-*
- split: validation
path: de/validation-*
- split: test
path: de/test-*
- config_name: dv
data_files:
- split: train
path: dv/train-*
- split: validation
path: dv/validation-*
- split: test
path: dv/test-*
- config_name: dyu
data_files:
- split: train
path: dyu/train-*
- split: validation
path: dyu/validation-*
- split: test
path: dyu/test-*
- config_name: el
data_files:
- split: train
path: el/train-*
- split: validation
path: el/validation-*
- split: test
path: el/test-*
- config_name: en
data_files:
- split: train
path: en/train-*
- split: validation
path: en/validation-*
- split: test
path: en/test-*
- config_name: eo
data_files:
- split: train
path: eo/train-*
- split: validation
path: eo/validation-*
- split: test
path: eo/test-*
- config_name: es
data_files:
- split: train
path: es/train-*
- split: validation
path: es/validation-*
- split: test
path: es/test-*
- config_name: et
data_files:
- split: train
path: et/train-*
- split: validation
path: et/validation-*
- split: test
path: et/test-*
- config_name: eu
data_files:
- split: train
path: eu/train-*
- split: validation
path: eu/validation-*
- split: test
path: eu/test-*
- config_name: fa
data_files:
- split: train
path: fa/train-*
- split: validation
path: fa/validation-*
- split: test
path: fa/test-*
- config_name: fi
data_files:
- split: train
path: fi/train-*
- split: validation
path: fi/validation-*
- split: test
path: fi/test-*
- config_name: fr
data_files:
- split: train
path: fr/train-*
- split: validation
path: fr/validation-*
- split: test
path: fr/test-*
- config_name: fy-NL
data_files:
- split: train
path: fy-NL/train-*
- split: validation
path: fy-NL/validation-*
- split: test
path: fy-NL/test-*
- config_name: ga-IE
data_files:
- split: train
path: ga-IE/train-*
- split: validation
path: ga-IE/validation-*
- split: test
path: ga-IE/test-*
- config_name: gl
data_files:
- split: train
path: gl/train-*
- split: validation
path: gl/validation-*
- split: test
path: gl/test-*
- config_name: gn
data_files:
- split: train
path: gn/train-*
- split: validation
path: gn/validation-*
- split: test
path: gn/test-*
- config_name: ha
data_files:
- split: train
path: ha/train-*
- split: validation
path: ha/validation-*
- split: test
path: ha/test-*
- config_name: he
data_files:
- split: train
path: he/train-*
- split: validation
path: he/validation-*
- split: test
path: he/test-*
- config_name: hi
data_files:
- split: train
path: hi/train-*
- split: validation
path: hi/validation-*
- split: test
path: hi/test-*
- config_name: hsb
data_files:
- split: train
path: hsb/train-*
- split: validation
path: hsb/validation-*
- split: test
path: hsb/test-*
- config_name: hu
data_files:
- split: train
path: hu/train-*
- split: validation
path: hu/validation-*
- split: test
path: hu/test-*
- config_name: hy-AM
data_files:
- split: train
path: hy-AM/train-*
- split: validation
path: hy-AM/validation-*
- split: test
path: hy-AM/test-*
- config_name: ia
data_files:
- split: train
path: ia/train-*
- split: validation
path: ia/validation-*
- split: test
path: ia/test-*
- config_name: id
data_files:
- split: train
path: id/train-*
- split: validation
path: id/validation-*
- split: test
path: id/test-*
- config_name: it
data_files:
- split: train
path: it/train-*
- split: validation
path: it/validation-*
- split: test
path: it/test-*
- config_name: ja
data_files:
- split: train
path: ja/train-*
- split: validation
path: ja/validation-*
- split: test
path: ja/test-*
- config_name: ka
data_files:
- split: train
path: ka/train-*
- split: validation
path: ka/validation-*
- split: test
path: ka/test-*
- config_name: kab
data_files:
- split: train
path: kab/train-*
- split: validation
path: kab/validation-*
- split: test
path: kab/test-*
- config_name: kk
data_files:
- split: train
path: kk/train-*
- split: validation
path: kk/validation-*
- split: test
path: kk/test-*
- config_name: kln
data_files:
- split: train
path: kln/train-*
- split: validation
path: kln/validation-*
- split: test
path: kln/test-*
- config_name: kmr
data_files:
- split: train
path: kmr/train-*
- split: validation
path: kmr/validation-*
- split: test
path: kmr/test-*
- config_name: ko
data_files:
- split: train
path: ko/train-*
- split: validation
path: ko/validation-*
- split: test
path: ko/test-*
- config_name: ky
data_files:
- split: train
path: ky/train-*
- split: validation
path: ky/validation-*
- split: test
path: ky/test-*
- config_name: lg
data_files:
- split: train
path: lg/train-*
- split: validation
path: lg/validation-*
- split: test
path: lg/test-*
- config_name: lij
data_files:
- split: train
path: lij/train-*
- split: validation
path: lij/validation-*
- split: test
path: lij/test-*
- config_name: lt
data_files:
- split: train
path: lt/train-*
- split: validation
path: lt/validation-*
- split: test
path: lt/test-*
- config_name: ltg
data_files:
- split: train
path: ltg/train-*
- split: validation
path: ltg/validation-*
- split: test
path: ltg/test-*
- config_name: luo
data_files:
- split: train
path: luo/train-*
- split: validation
path: luo/validation-*
- split: test
path: luo/test-*
- config_name: lv
data_files:
- split: train
path: lv/train-*
- split: validation
path: lv/validation-*
- split: test
path: lv/test-*
- config_name: mdf
data_files:
- split: train
path: mdf/train-*
- split: validation
path: mdf/validation-*
- split: test
path: mdf/test-*
- config_name: mhr
data_files:
- split: train
path: mhr/train-*
- split: validation
path: mhr/validation-*
- split: test
path: mhr/test-*
- config_name: mk
data_files:
- split: train
path: mk/train-*
- split: validation
path: mk/validation-*
- split: test
path: mk/test-*
- config_name: ml
data_files:
- split: train
path: ml/train-*
- split: validation
path: ml/validation-*
- split: test
path: ml/test-*
- config_name: mn
data_files:
- split: train
path: mn/train-*
- split: validation
path: mn/validation-*
- split: test
path: mn/test-*
- config_name: mr
data_files:
- split: train
path: mr/train-*
- split: validation
path: mr/validation-*
- split: test
path: mr/test-*
- config_name: mrj
data_files:
- split: train
path: mrj/train-*
- split: validation
path: mrj/validation-*
- split: test
path: mrj/test-*
- config_name: mt
data_files:
- split: train
path: mt/train-*
- split: validation
path: mt/validation-*
- split: test
path: mt/test-*
- config_name: myv
data_files:
- split: train
path: myv/train-*
- split: validation
path: myv/validation-*
- split: test
path: myv/test-*
- config_name: nan-tw
data_files:
- split: train
path: nan-tw/train-*
- split: validation
path: nan-tw/validation-*
- split: test
path: nan-tw/test-*
- config_name: ne-NP
data_files:
- split: train
path: ne-NP/train-*
- split: validation
path: ne-NP/validation-*
- split: test
path: ne-NP/test-*
- config_name: nl
data_files:
- split: train
path: nl/train-*
- split: validation
path: nl/validation-*
- split: test
path: nl/test-*
- config_name: nn-NO
data_files:
- split: train
path: nn-NO/train-*
- split: validation
path: nn-NO/validation-*
- split: test
path: nn-NO/test-*
- config_name: oc
data_files:
- split: train
path: oc/train-*
- split: validation
path: oc/validation-*
- split: test
path: oc/test-*
- config_name: or
data_files:
- split: train
path: or/train-*
- split: validation
path: or/validation-*
- split: test
path: or/test-*
- config_name: os
data_files:
- split: train
path: os/train-*
- split: validation
path: os/validation-*
- split: test
path: os/test-*
- config_name: pa-IN
data_files:
- split: train
path: pa-IN/train-*
- split: validation
path: pa-IN/validation-*
- split: test
path: pa-IN/test-*
- config_name: pl
data_files:
- split: train
path: pl/train-*
- split: validation
path: pl/validation-*
- split: test
path: pl/test-*
- config_name: ps
data_files:
- split: train
path: ps/train-*
- split: validation
path: ps/validation-*
- split: test
path: ps/test-*
- config_name: pt
data_files:
- split: train
path: pt/train-*
- split: validation
path: pt/validation-*
- split: test
path: pt/test-*
- config_name: rm-sursilv
data_files:
- split: train
path: rm-sursilv/train-*
- split: validation
path: rm-sursilv/validation-*
- split: test
path: rm-sursilv/test-*
- config_name: rm-vallader
data_files:
- split: train
path: rm-vallader/train-*
- split: validation
path: rm-vallader/validation-*
- split: test
path: rm-vallader/test-*
- config_name: ro
data_files:
- split: train
path: ro/train-*
- split: validation
path: ro/validation-*
- split: test
path: ro/test-*
- config_name: ru
data_files:
- split: train
path: ru/train-*
- split: validation
path: ru/validation-*
- split: test
path: ru/test-*
- config_name: rw
data_files:
- split: train
path: rw/train-*
- split: validation
path: rw/validation-*
- split: test
path: rw/test-*
- config_name: sah
data_files:
- split: train
path: sah/train-*
- split: validation
path: sah/validation-*
- split: test
path: sah/test-*
- config_name: sat
data_files:
- split: train
path: sat/train-*
- split: validation
path: sat/validation-*
- split: test
path: sat/test-*
- config_name: sc
data_files:
- split: train
path: sc/train-*
- split: validation
path: sc/validation-*
- split: test
path: sc/test-*
- config_name: sk
data_files:
- split: train
path: sk/train-*
- split: validation
path: sk/validation-*
- split: test
path: sk/test-*
- config_name: skr
data_files:
- split: train
path: skr/train-*
- split: validation
path: skr/validation-*
- split: test
path: skr/test-*
- config_name: sl
data_files:
- split: train
path: sl/train-*
- split: validation
path: sl/validation-*
- split: test
path: sl/test-*
- config_name: sq
data_files:
- split: train
path: sq/train-*
- split: validation
path: sq/validation-*
- split: test
path: sq/test-*
- config_name: sr
data_files:
- split: train
path: sr/train-*
- split: validation
path: sr/validation-*
- split: test
path: sr/test-*
- config_name: sv-SE
data_files:
- split: train
path: sv-SE/train-*
- split: validation
path: sv-SE/validation-*
- split: test
path: sv-SE/test-*
- config_name: sw
data_files:
- split: train
path: sw/train-*
- split: validation
path: sw/validation-*
- split: test
path: sw/test-*
- config_name: ta
data_files:
- split: train
path: ta/train-*
- split: validation
path: ta/validation-*
- split: test
path: ta/test-*
- config_name: te
data_files:
- split: train
path: te/train-*
- split: validation
path: te/validation-*
- split: test
path: te/test-*
- config_name: th
data_files:
- split: train
path: th/train-*
- split: validation
path: th/validation-*
- split: test
path: th/test-*
- config_name: tig
data_files:
- split: train
path: tig/train-*
- split: validation
path: tig/validation-*
- split: test
path: tig/test-*
- config_name: tk
data_files:
- split: train
path: tk/train-*
- split: validation
path: tk/validation-*
- split: test
path: tk/test-*
- config_name: tn
data_files:
- split: train
path: tn/train-*
- split: validation
path: tn/validation-*
- split: test
path: tn/test-*
- config_name: tok
data_files:
- split: train
path: tok/train-*
- split: validation
path: tok/validation-*
- split: test
path: tok/test-*
- config_name: tr
data_files:
- split: train
path: tr/train-*
- split: validation
path: tr/validation-*
- split: test
path: tr/test-*
- config_name: tt
data_files:
- split: train
path: tt/train-*
- split: validation
path: tt/validation-*
- split: test
path: tt/test-*
- config_name: ug
data_files:
- split: train
path: ug/train-*
- split: validation
path: ug/validation-*
- split: test
path: ug/test-*
- config_name: uk
data_files:
- split: train
path: uk/train-*
- split: validation
path: uk/validation-*
- split: test
path: uk/test-*
- config_name: ur
data_files:
- split: train
path: ur/train-*
- split: validation
path: ur/validation-*
- split: test
path: ur/test-*
- config_name: uz
data_files:
- split: train
path: uz/train-*
- split: validation
path: uz/validation-*
- split: test
path: uz/test-*
- config_name: vi
data_files:
- split: train
path: vi/train-*
- split: validation
path: vi/validation-*
- split: test
path: vi/test-*
- config_name: yi
data_files:
- split: train
path: yi/train-*
- split: validation
path: yi/validation-*
- split: test
path: yi/test-*
- config_name: yo
data_files:
- split: train
path: yo/train-*
- split: validation
path: yo/validation-*
- split: test
path: yo/test-*
- config_name: yue
data_files:
- split: train
path: yue/train-*
- split: validation
path: yue/validation-*
- split: test
path: yue/test-*
- config_name: zgh
data_files:
- split: train
path: zgh/train-*
- split: validation
path: zgh/validation-*
- split: test
path: zgh/test-*
- config_name: zh-CN
data_files:
- split: train
path: zh-CN/train-*
- split: validation
path: zh-CN/validation-*
- split: test
path: zh-CN/test-*
- config_name: zh-HK
data_files:
- split: train
path: zh-HK/train-*
- split: validation
path: zh-HK/validation-*
- split: test
path: zh-HK/test-*
- config_name: zh-TW
data_files:
- split: train
path: zh-TW/train-*
- split: validation
path: zh-TW/validation-*
- split: test
path: zh-TW/test-*
- config_name: zza
data_files:
- split: train
path: zza/train-*
- split: validation
path: zza/validation-*
- split: test
path: zza/test-*
---
提供机构:
mteb



