five

deepdml/fleurs-neucodec

收藏
Hugging Face2026-04-08 更新2026-03-29 收录
下载链接:
https://hf-mirror.com/datasets/deepdml/fleurs-neucodec
下载链接
链接失效反馈
官方服务:
资源简介:
--- dataset_info: - config_name: af_za features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 2674598 num_examples: 1032 - name: validation num_bytes: 469653 num_examples: 198 - name: test num_bytes: 660480 num_examples: 264 download_size: 2481292 dataset_size: 3804731 - config_name: am_et features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 8152555 num_examples: 3163 - name: validation num_bytes: 480967 num_examples: 223 - name: test num_bytes: 1186649 num_examples: 516 download_size: 6039583 dataset_size: 9820171 - config_name: ar_eg features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 4461675 num_examples: 2104 - name: validation num_bytes: 643845 num_examples: 295 - name: test num_bytes: 958209 num_examples: 428 download_size: 7992262 dataset_size: 6063729 - config_name: as_in features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7838734 num_examples: 2812 - name: validation num_bytes: 1034094 num_examples: 418 - name: test num_bytes: 2546934 num_examples: 984 download_size: 6835033 dataset_size: 11419762 - config_name: ast_es features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5555866 num_examples: 2511 - name: validation num_bytes: 730349 num_examples: 398 - name: test num_bytes: 1801941 num_examples: 946 download_size: 5165514 dataset_size: 8088156 - config_name: az_az features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6840300 num_examples: 2665 - name: validation num_bytes: 991003 num_examples: 400 - name: test num_bytes: 2376591 num_examples: 923 download_size: 6176359 dataset_size: 10207894 - config_name: be_by features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6973946 num_examples: 2433 - name: validation num_bytes: 1207162 num_examples: 408 - name: test num_bytes: 2951263 num_examples: 967 download_size: 6586056 dataset_size: 11132371 - config_name: bg_bg features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6992579 num_examples: 2973 - name: validation num_bytes: 779567 num_examples: 395 - name: test num_bytes: 1369891 num_examples: 658 download_size: 5697042 dataset_size: 9142037 - config_name: bn_in features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7874521 num_examples: 3006 - name: validation num_bytes: 1062282 num_examples: 402 - name: test num_bytes: 2517463 num_examples: 920 download_size: 6855687 dataset_size: 11454266 - config_name: bs_ba features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7349746 num_examples: 3091 - name: validation num_bytes: 982530 num_examples: 400 - name: test num_bytes: 2323555 num_examples: 925 download_size: 6481850 dataset_size: 10655831 - config_name: ca_es features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5461798 num_examples: 2300 - name: validation num_bytes: 952851 num_examples: 404 - name: test num_bytes: 2303993 num_examples: 940 download_size: 10906478 dataset_size: 8718642 - config_name: ceb_ph features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 8932815 num_examples: 3261 - name: validation num_bytes: 644106 num_examples: 225 - name: test num_bytes: 1622539 num_examples: 541 download_size: 6783332 dataset_size: 11199460 - config_name: ckb_iq features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7684223 num_examples: 3040 - name: validation num_bytes: 906946 num_examples: 386 - name: test num_bytes: 2202021 num_examples: 922 download_size: 6490917 dataset_size: 10793190 - config_name: cmn_hans_cn features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7185724 num_examples: 3246 - name: validation num_bytes: 937433 num_examples: 409 - name: test num_bytes: 2265405 num_examples: 945 download_size: 6456784 dataset_size: 10388562 - config_name: cs_cz features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6220359 num_examples: 2811 - name: validation num_bytes: 728541 num_examples: 305 - name: test num_bytes: 1799684 num_examples: 723 download_size: 5399635 dataset_size: 8748584 - config_name: cy_gb features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 8935350 num_examples: 3427 - name: validation num_bytes: 1320018 num_examples: 447 - name: test num_bytes: 3134709 num_examples: 1021 download_size: 8054066 dataset_size: 13390077 - config_name: da_dk features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5527571 num_examples: 2465 - name: validation num_bytes: 862498 num_examples: 395 - name: test num_bytes: 2161795 num_examples: 930 download_size: 5247344 dataset_size: 8551864 - config_name: de_de features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6631036 num_examples: 2987 - name: validation num_bytes: 927034 num_examples: 363 - name: test num_bytes: 2312448 num_examples: 862 download_size: 5994007 dataset_size: 9870518 - config_name: el_gr features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7392728 num_examples: 3215 - name: validation num_bytes: 565178 num_examples: 271 - name: test num_bytes: 1404500 num_examples: 650 download_size: 5925312 dataset_size: 9362406 - config_name: en_us features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5524510 num_examples: 2602 - name: validation num_bytes: 772092 num_examples: 394 - name: test num_bytes: 1309507 num_examples: 647 download_size: 4860355 dataset_size: 7606109 - config_name: es_419 features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6483654 num_examples: 2796 - name: validation num_bytes: 994474 num_examples: 408 - name: test num_bytes: 2271000 num_examples: 908 download_size: 5947646 dataset_size: 9749128 - config_name: et_ee features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5380471 num_examples: 2501 - name: validation num_bytes: 912961 num_examples: 387 - name: test num_bytes: 2209176 num_examples: 893 download_size: 5231292 dataset_size: 8502608 - config_name: fa_ir features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 8846677 num_examples: 3101 - name: validation num_bytes: 1117914 num_examples: 369 - name: test num_bytes: 2707410 num_examples: 871 download_size: 7606459 dataset_size: 12672001 - config_name: ff_sn features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 10161946 num_examples: 3235 - name: validation num_bytes: 757360 num_examples: 273 - name: test num_bytes: 1924061 num_examples: 660 download_size: 7517093 dataset_size: 12843367 - config_name: fi_fi features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6480514 num_examples: 2704 - name: validation num_bytes: 1037258 num_examples: 415 - name: test num_bytes: 2424247 num_examples: 918 download_size: 6092276 dataset_size: 9942019 - config_name: fil_ph features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5647352 num_examples: 1884 - name: validation num_bytes: 1443329 num_examples: 418 - name: test num_bytes: 3497208 num_examples: 964 download_size: 6178889 dataset_size: 10587889 - config_name: fr_fr features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7593430 num_examples: 3193 - name: validation num_bytes: 586548 num_examples: 289 - name: test num_bytes: 1438209 num_examples: 676 download_size: 6024280 dataset_size: 9618187 - config_name: ga_ie features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 8869591 num_examples: 2845 - name: validation num_bytes: 1087938 num_examples: 369 - name: test num_bytes: 2534016 num_examples: 842 download_size: 7363897 dataset_size: 12491545 - config_name: gl_es features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 4922061 num_examples: 2175 - name: validation num_bytes: 773155 num_examples: 395 - name: test num_bytes: 1920946 num_examples: 927 download_size: 9673610 dataset_size: 7616162 - config_name: gu_in features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6615605 num_examples: 3145 - name: validation num_bytes: 880495 num_examples: 432 - name: test num_bytes: 2148116 num_examples: 1000 download_size: 6009761 dataset_size: 9644216 - config_name: ha_ng features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 9969323 num_examples: 3259 - name: validation num_bytes: 1115634 num_examples: 296 - name: test num_bytes: 2435472 num_examples: 621 download_size: 7989604 dataset_size: 13520429 - config_name: he_il features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6985386 num_examples: 3242 - name: validation num_bytes: 606115 num_examples: 328 - name: test num_bytes: 1515631 num_examples: 792 download_size: 5790999 dataset_size: 9107132 - config_name: hi_in features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 4898980 num_examples: 2120 - name: validation num_bytes: 524598 num_examples: 239 - name: test num_bytes: 987108 num_examples: 418 download_size: 4155278 dataset_size: 6410686 - config_name: hr_hr features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 8653788 num_examples: 3461 - name: validation num_bytes: 755704 num_examples: 377 - name: test num_bytes: 1892642 num_examples: 914 download_size: 6884181 dataset_size: 11302134 - config_name: hu_hu features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6842174 num_examples: 3095 - name: validation num_bytes: 981755 num_examples: 407 - name: test num_bytes: 2254837 num_examples: 905 download_size: 6218333 dataset_size: 10078766 - config_name: hy_am features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7619250 num_examples: 3053 - name: validation num_bytes: 888859 num_examples: 395 - name: test num_bytes: 2210714 num_examples: 932 download_size: 6434386 dataset_size: 10718823 - config_name: id_id features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6677360 num_examples: 2579 - name: validation num_bytes: 851560 num_examples: 350 - name: test num_bytes: 1737061 num_examples: 687 download_size: 5714158 dataset_size: 9265981 - config_name: ig_ng features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 10074523 num_examples: 2839 - name: validation num_bytes: 1386397 num_examples: 413 - name: test num_bytes: 3512213 num_examples: 969 download_size: 8570497 dataset_size: 14973133 - config_name: is_is features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 2096749 num_examples: 926 - name: validation num_bytes: 88270 num_examples: 36 - name: test num_bytes: 131252 num_examples: 46 download_size: 1517809 dataset_size: 2316271 - config_name: it_it features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6631336 num_examples: 3030 - name: validation num_bytes: 1132559 num_examples: 391 - name: test num_bytes: 2577973 num_examples: 865 download_size: 6349035 dataset_size: 10341868 - config_name: ja_jp features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5457068 num_examples: 2292 - name: validation num_bytes: 690777 num_examples: 266 - name: test num_bytes: 1734346 num_examples: 650 download_size: 4915501 dataset_size: 7882191 - config_name: jv_id features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 8211757 num_examples: 3051 - name: validation num_bytes: 815491 num_examples: 295 - name: test num_bytes: 2061097 num_examples: 728 download_size: 6703062 dataset_size: 11088345 - config_name: ka_ge features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 3723501 num_examples: 1491 - name: validation num_bytes: 898213 num_examples: 409 - name: test num_bytes: 2259061 num_examples: 979 download_size: 4253080 dataset_size: 6880775 - config_name: kam_ke features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 10790396 num_examples: 3340 - name: validation num_bytes: 1057282 num_examples: 338 - name: test num_bytes: 2727307 num_examples: 827 download_size: 8622958 dataset_size: 14574985 - config_name: kea_cv features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7705834 num_examples: 2715 - name: validation num_bytes: 970985 num_examples: 366 - name: test num_bytes: 2395683 num_examples: 864 download_size: 6633006 dataset_size: 11072502 - config_name: kk_kz features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 8654476 num_examples: 3200 - name: validation num_bytes: 1116808 num_examples: 369 - name: test num_bytes: 2803105 num_examples: 856 download_size: 13283394 dataset_size: 12574389 - config_name: km_kh features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5197343 num_examples: 1675 - name: validation num_bytes: 944319 num_examples: 326 - name: test num_bytes: 2306769 num_examples: 771 download_size: 5109206 dataset_size: 8448431 - config_name: kn_in features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6076680 num_examples: 2283 - name: validation num_bytes: 953942 num_examples: 368 - name: test num_bytes: 2329006 num_examples: 838 download_size: 5764998 dataset_size: 9359628 - config_name: ko_kr features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5828089 num_examples: 2307 - name: validation num_bytes: 566914 num_examples: 226 - name: test num_bytes: 979867 num_examples: 382 download_size: 4634073 dataset_size: 7374870 - config_name: ky_kg features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6856651 num_examples: 2818 - name: validation num_bytes: 981710 num_examples: 422 - name: test num_bytes: 2385553 num_examples: 977 download_size: 6045261 dataset_size: 10223914 - config_name: lb_lu features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6216735 num_examples: 2502 - name: validation num_bytes: 823189 num_examples: 408 - name: test num_bytes: 2012863 num_examples: 934 download_size: 5569676 dataset_size: 9052787 - config_name: lg_ug features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 9222118 num_examples: 2478 - name: validation num_bytes: 1017861 num_examples: 306 - name: test num_bytes: 2485663 num_examples: 723 download_size: 7356228 dataset_size: 12725642 - config_name: ln_cd features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 13299414 num_examples: 3350 - name: validation num_bytes: 777227 num_examples: 209 - name: test num_bytes: 1884053 num_examples: 478 download_size: 9156053 dataset_size: 15960694 - config_name: lo_la features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5381591 num_examples: 1809 - name: validation num_bytes: 423024 num_examples: 191 - name: test num_bytes: 1000636 num_examples: 404 download_size: 4080214 dataset_size: 6805251 - config_name: lt_lt features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7169812 num_examples: 2937 - name: validation num_bytes: 863579 num_examples: 416 - name: test num_bytes: 2186001 num_examples: 986 download_size: 6199043 dataset_size: 10219392 - config_name: luo_ke features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7410676 num_examples: 2384 - name: validation num_bytes: 273189 num_examples: 102 - name: test num_bytes: 714425 num_examples: 256 download_size: 5114906 dataset_size: 8398290 - config_name: lv_lv features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 4807633 num_examples: 2110 - name: validation num_bytes: 826551 num_examples: 356 - name: test num_bytes: 2085756 num_examples: 851 download_size: 4797068 dataset_size: 7719940 - config_name: mi_nz features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 13250252 num_examples: 3249 - name: validation num_bytes: 1687871 num_examples: 429 - name: test num_bytes: 4257056 num_examples: 1008 download_size: 11029260 dataset_size: 19195179 - config_name: mk_mk features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5018626 num_examples: 2337 - name: validation num_bytes: 947152 num_examples: 415 - name: test num_bytes: 2354578 num_examples: 973 download_size: 9058245 dataset_size: 8320356 - config_name: ml_in features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7393010 num_examples: 3043 - name: validation num_bytes: 1230561 num_examples: 418 - name: test num_bytes: 2861822 num_examples: 958 download_size: 6873942 dataset_size: 11485393 - config_name: mn_mn features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 8459959 num_examples: 3074 - name: validation num_bytes: 859179 num_examples: 405 - name: test num_bytes: 2100263 num_examples: 949 download_size: 6827128 dataset_size: 11419401 - config_name: mr_in features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 8767758 num_examples: 3269 - name: validation num_bytes: 1145883 num_examples: 443 - name: test num_bytes: 2826769 num_examples: 1015 download_size: 7778139 dataset_size: 12740410 - config_name: ms_my features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7010332 num_examples: 2667 - name: validation num_bytes: 681778 num_examples: 324 - name: test num_bytes: 1667091 num_examples: 749 download_size: 5749549 dataset_size: 9359201 - config_name: mt_mt features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7280378 num_examples: 2895 - name: validation num_bytes: 1095283 num_examples: 404 - name: test num_bytes: 2599894 num_examples: 926 download_size: 6592968 dataset_size: 10975555 - config_name: my_mm features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 8891367 num_examples: 3058 - name: validation num_bytes: 1201430 num_examples: 384 - name: test num_bytes: 2789781 num_examples: 880 download_size: 7637855 dataset_size: 12882578 - config_name: nb_no features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 8016740 num_examples: 3167 - name: validation num_bytes: 422981 num_examples: 163 - name: test num_bytes: 915410 num_examples: 357 download_size: 5815763 dataset_size: 9355131 - config_name: ne_np features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 8266083 num_examples: 3331 - name: validation num_bytes: 667715 num_examples: 305 - name: test num_bytes: 1680720 num_examples: 726 download_size: 6594697 dataset_size: 10614518 - config_name: nl_nl features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5662932 num_examples: 2918 - name: validation num_bytes: 333005 num_examples: 171 - name: test num_bytes: 715459 num_examples: 364 download_size: 4250650 dataset_size: 6711396 - config_name: nso_za features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 9540682 num_examples: 1990 - name: validation num_bytes: 1334538 num_examples: 363 - name: test num_bytes: 3065454 num_examples: 790 download_size: 7917510 dataset_size: 13940674 - config_name: ny_mw features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7964147 num_examples: 2694 - name: validation num_bytes: 1002159 num_examples: 311 - name: test num_bytes: 2569668 num_examples: 761 download_size: 6656435 dataset_size: 11535974 - config_name: oc_fr features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 10442703 num_examples: 3379 - name: validation num_bytes: 1320904 num_examples: 427 - name: test num_bytes: 3295520 num_examples: 998 download_size: 8859902 dataset_size: 15059127 - config_name: om_et features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 4878103 num_examples: 1701 - name: validation num_bytes: 44058 num_examples: 19 - name: test num_bytes: 95607 num_examples: 41 download_size: 3067745 dataset_size: 5017768 - config_name: or_in features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 2527557 num_examples: 1081 - name: validation num_bytes: 912794 num_examples: 392 - name: test num_bytes: 2178538 num_examples: 883 download_size: 3512317 dataset_size: 5618889 - config_name: pa_in features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 4677347 num_examples: 1923 - name: validation num_bytes: 550134 num_examples: 251 - name: test num_bytes: 1350674 num_examples: 574 download_size: 4124886 dataset_size: 6578155 - config_name: pl_pl features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6752707 num_examples: 2841 - name: validation num_bytes: 624610 num_examples: 338 - name: test num_bytes: 1513897 num_examples: 758 download_size: 5484038 dataset_size: 8891214 - config_name: ps_af features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6523119 num_examples: 2513 - name: validation num_bytes: 528227 num_examples: 217 - name: test num_bytes: 1295216 num_examples: 512 download_size: 5126702 dataset_size: 8346562 - config_name: pt_br features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7469492 num_examples: 2793 - name: validation num_bytes: 948190 num_examples: 386 - name: test num_bytes: 2377456 num_examples: 919 download_size: 6550140 dataset_size: 10795138 - config_name: ro_ro features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7434753 num_examples: 2891 - name: validation num_bytes: 794566 num_examples: 387 - name: test num_bytes: 1863493 num_examples: 883 download_size: 6161795 dataset_size: 10092812 - config_name: ru_ru features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5925332 num_examples: 2562 - name: validation num_bytes: 793946 num_examples: 356 - name: test num_bytes: 1837048 num_examples: 775 download_size: 5330969 dataset_size: 8556326 - config_name: sd_in features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 9016634 num_examples: 3443 - name: validation num_bytes: 978036 num_examples: 426 - name: test num_bytes: 2418790 num_examples: 980 download_size: 7512276 dataset_size: 12413460 - config_name: sk_sk features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 4323532 num_examples: 1957 - name: validation num_bytes: 800149 num_examples: 352 - name: test num_bytes: 1920862 num_examples: 792 download_size: 4348813 dataset_size: 7044543 - config_name: sl_si features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5717153 num_examples: 2512 - name: validation num_bytes: 660422 num_examples: 349 - name: test num_bytes: 1675844 num_examples: 834 download_size: 4953783 dataset_size: 8053419 - config_name: sn_zw features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7308792 num_examples: 2463 - name: validation num_bytes: 1131817 num_examples: 393 - name: test num_bytes: 2794764 num_examples: 925 download_size: 6622484 dataset_size: 11235373 - config_name: so_so features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 9719921 num_examples: 3148 - name: validation num_bytes: 1136749 num_examples: 432 - name: test num_bytes: 2869504 num_examples: 1019 download_size: 8213841 dataset_size: 13726174 - config_name: sr_rs features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7824032 num_examples: 2944 - name: validation num_bytes: 614595 num_examples: 290 - name: test num_bytes: 1564507 num_examples: 700 download_size: 6108297 dataset_size: 10003134 - config_name: sv_se features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6139966 num_examples: 2385 - name: validation num_bytes: 724956 num_examples: 330 - name: test num_bytes: 1717316 num_examples: 759 download_size: 5263829 dataset_size: 8582238 - config_name: sw_ke features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 9853821 num_examples: 3070 - name: validation num_bytes: 587422 num_examples: 211 - name: test num_bytes: 1411960 num_examples: 487 download_size: 7089436 dataset_size: 11853203 - config_name: ta_in features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6367761 num_examples: 2367 - name: validation num_bytes: 921690 num_examples: 377 - name: test num_bytes: 1563661 num_examples: 591 download_size: 5551970 dataset_size: 8853112 - config_name: te_in features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5815785 num_examples: 2302 - name: validation num_bytes: 658792 num_examples: 311 - name: test num_bytes: 1066554 num_examples: 472 download_size: 4809066 dataset_size: 7541131 - config_name: tg_tj features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6307404 num_examples: 2298 - name: validation num_bytes: 694155 num_examples: 240 - name: test num_bytes: 1773823 num_examples: 600 download_size: 5132344 dataset_size: 8775382 - config_name: th_th features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6244649 num_examples: 2602 - name: validation num_bytes: 1049175 num_examples: 439 - name: test num_bytes: 2514143 num_examples: 1021 download_size: 5968386 dataset_size: 9807967 - config_name: tr_tr features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6103326 num_examples: 2526 - name: validation num_bytes: 823153 num_examples: 338 - name: test num_bytes: 1912738 num_examples: 743 download_size: 5502046 dataset_size: 8839217 - config_name: uk_ua features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6646786 num_examples: 2810 - name: validation num_bytes: 693905 num_examples: 325 - name: test num_bytes: 1666076 num_examples: 750 download_size: 5590233 dataset_size: 9006767 - config_name: umb_ao features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 8221357 num_examples: 1597 - name: validation num_bytes: 689247 num_examples: 135 - name: test num_bytes: 1995904 num_examples: 379 download_size: 6301136 dataset_size: 10906508 - config_name: ur_pk features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5166662 num_examples: 2109 - name: validation num_bytes: 556691 num_examples: 267 - name: test num_bytes: 601148 num_examples: 299 download_size: 3929974 dataset_size: 6324501 - config_name: uz_uz features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7407165 num_examples: 2943 - name: validation num_bytes: 855447 num_examples: 363 - name: test num_bytes: 2085574 num_examples: 862 download_size: 6226305 dataset_size: 10348186 - config_name: vi_vn features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6685954 num_examples: 2994 - name: validation num_bytes: 877361 num_examples: 361 - name: test num_bytes: 2205075 num_examples: 857 download_size: 5949654 dataset_size: 9768390 - config_name: wo_sn features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 6395646 num_examples: 2279 - name: validation num_bytes: 560712 num_examples: 169 - name: test num_bytes: 1276391 num_examples: 371 download_size: 4889392 dataset_size: 8232749 - config_name: xh_za features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 9776050 num_examples: 3466 - name: validation num_bytes: 1131158 num_examples: 446 - name: test num_bytes: 2774749 num_examples: 1041 download_size: 7845439 dataset_size: 13681957 - config_name: yo_ng features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 7340819 num_examples: 2339 - name: validation num_bytes: 1251912 num_examples: 378 - name: test num_bytes: 2752857 num_examples: 831 download_size: 6768330 dataset_size: 11345588 - config_name: yue_hant_hk features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 5337604 num_examples: 1939 - name: validation num_bytes: 841394 num_examples: 362 - name: test num_bytes: 1953243 num_examples: 819 download_size: 4929177 dataset_size: 8132241 - config_name: zu_za features: - name: audio_path dtype: string - name: duration dtype: float32 - name: codes sequence: int32 - name: language dtype: string splits: - name: train num_bytes: 10842601 num_examples: 2858 - name: validation num_bytes: 1117482 num_examples: 354 - name: test num_bytes: 2832400 num_examples: 854 download_size: 8476982 dataset_size: 14792483 configs: - config_name: af_za data_files: - split: train path: af_za/train-* - split: validation path: af_za/validation-* - split: test path: af_za/test-* - config_name: am_et data_files: - split: train path: am_et/train-* - split: validation path: am_et/validation-* - split: test path: am_et/test-* - config_name: ar_eg data_files: - split: train path: ar_eg/train-* - split: validation path: ar_eg/validation-* - split: test path: ar_eg/test-* - config_name: as_in data_files: - split: train path: as_in/train-* - split: validation path: as_in/validation-* - split: test path: as_in/test-* - config_name: ast_es data_files: - split: train path: ast_es/train-* - split: validation path: ast_es/validation-* - split: test path: ast_es/test-* - config_name: az_az data_files: - split: train path: az_az/train-* - split: validation path: az_az/validation-* - split: test path: az_az/test-* - config_name: be_by data_files: - split: train path: be_by/train-* - split: validation path: be_by/validation-* - split: test path: be_by/test-* - config_name: bg_bg data_files: - split: train path: bg_bg/train-* - split: validation path: bg_bg/validation-* - split: test path: bg_bg/test-* - config_name: bn_in data_files: - split: train path: bn_in/train-* - split: validation path: bn_in/validation-* - split: test path: bn_in/test-* - config_name: bs_ba data_files: - split: train path: bs_ba/train-* - split: validation path: bs_ba/validation-* - split: test path: bs_ba/test-* - config_name: ca_es data_files: - split: train path: ca_es/train-* - split: validation path: ca_es/validation-* - split: test path: ca_es/test-* - config_name: ceb_ph data_files: - split: train path: ceb_ph/train-* - split: validation path: ceb_ph/validation-* - split: test path: ceb_ph/test-* - config_name: ckb_iq data_files: - split: train path: ckb_iq/train-* - split: validation path: ckb_iq/validation-* - split: test path: ckb_iq/test-* - config_name: cmn_hans_cn data_files: - split: train path: cmn_hans_cn/train-* - split: validation path: cmn_hans_cn/validation-* - split: test path: cmn_hans_cn/test-* - config_name: cs_cz data_files: - split: train path: cs_cz/train-* - split: validation path: cs_cz/validation-* - split: test path: cs_cz/test-* - config_name: cy_gb data_files: - split: train path: cy_gb/train-* - split: validation path: cy_gb/validation-* - split: test path: cy_gb/test-* - config_name: da_dk data_files: - split: train path: da_dk/train-* - split: validation path: da_dk/validation-* - split: test path: da_dk/test-* - config_name: de_de data_files: - split: train path: de_de/train-* - split: validation path: de_de/validation-* - split: test path: de_de/test-* - config_name: el_gr data_files: - split: train path: el_gr/train-* - split: validation path: el_gr/validation-* - split: test path: el_gr/test-* - config_name: en_us data_files: - split: train path: en_us/train-* - split: validation path: en_us/validation-* - split: test path: en_us/test-* - config_name: es_419 data_files: - split: train path: es_419/train-* - split: validation path: es_419/validation-* - split: test path: es_419/test-* - config_name: et_ee data_files: - split: train path: et_ee/train-* - split: validation path: et_ee/validation-* - split: test path: et_ee/test-* - config_name: fa_ir data_files: - split: train path: fa_ir/train-* - split: validation path: fa_ir/validation-* - split: test path: fa_ir/test-* - config_name: ff_sn data_files: - split: train path: ff_sn/train-* - split: validation path: ff_sn/validation-* - split: test path: ff_sn/test-* - config_name: fi_fi data_files: - split: train path: fi_fi/train-* - split: validation path: fi_fi/validation-* - split: test path: fi_fi/test-* - config_name: fil_ph data_files: - split: train path: fil_ph/train-* - split: validation path: fil_ph/validation-* - split: test path: fil_ph/test-* - config_name: fr_fr data_files: - split: train path: fr_fr/train-* - split: validation path: fr_fr/validation-* - split: test path: fr_fr/test-* - config_name: ga_ie data_files: - split: train path: ga_ie/train-* - split: validation path: ga_ie/validation-* - split: test path: ga_ie/test-* - config_name: gl_es data_files: - split: train path: gl_es/train-* - split: validation path: gl_es/validation-* - split: test path: gl_es/test-* - config_name: gu_in data_files: - split: train path: gu_in/train-* - split: validation path: gu_in/validation-* - split: test path: gu_in/test-* - config_name: ha_ng data_files: - split: train path: ha_ng/train-* - split: validation path: ha_ng/validation-* - split: test path: ha_ng/test-* - config_name: he_il data_files: - split: train path: he_il/train-* - split: validation path: he_il/validation-* - split: test path: he_il/test-* - config_name: hi_in data_files: - split: train path: hi_in/train-* - split: validation path: hi_in/validation-* - split: test path: hi_in/test-* - config_name: hr_hr data_files: - split: train path: hr_hr/train-* - split: validation path: hr_hr/validation-* - split: test path: hr_hr/test-* - config_name: hu_hu data_files: - split: train path: hu_hu/train-* - split: validation path: hu_hu/validation-* - split: test path: hu_hu/test-* - config_name: hy_am data_files: - split: train path: hy_am/train-* - split: validation path: hy_am/validation-* - split: test path: hy_am/test-* - config_name: id_id data_files: - split: train path: id_id/train-* - split: validation path: id_id/validation-* - split: test path: id_id/test-* - config_name: ig_ng data_files: - split: train path: ig_ng/train-* - split: validation path: ig_ng/validation-* - split: test path: ig_ng/test-* - config_name: is_is data_files: - split: train path: is_is/train-* - split: validation path: is_is/validation-* - split: test path: is_is/test-* - config_name: it_it data_files: - split: train path: it_it/train-* - split: validation path: it_it/validation-* - split: test path: it_it/test-* - config_name: ja_jp data_files: - split: train path: ja_jp/train-* - split: validation path: ja_jp/validation-* - split: test path: ja_jp/test-* - config_name: jv_id data_files: - split: train path: jv_id/train-* - split: validation path: jv_id/validation-* - split: test path: jv_id/test-* - config_name: ka_ge data_files: - split: train path: ka_ge/train-* - split: validation path: ka_ge/validation-* - split: test path: ka_ge/test-* - config_name: kam_ke data_files: - split: train path: kam_ke/train-* - split: validation path: kam_ke/validation-* - split: test path: kam_ke/test-* - config_name: kea_cv data_files: - split: train path: kea_cv/train-* - split: validation path: kea_cv/validation-* - split: test path: kea_cv/test-* - config_name: kk_kz data_files: - split: train path: kk_kz/train-* - split: validation path: kk_kz/validation-* - split: test path: kk_kz/test-* - config_name: km_kh data_files: - split: train path: km_kh/train-* - split: validation path: km_kh/validation-* - split: test path: km_kh/test-* - config_name: kn_in data_files: - split: train path: kn_in/train-* - split: validation path: kn_in/validation-* - split: test path: kn_in/test-* - config_name: ko_kr data_files: - split: train path: ko_kr/train-* - split: validation path: ko_kr/validation-* - split: test path: ko_kr/test-* - config_name: ky_kg data_files: - split: train path: ky_kg/train-* - split: validation path: ky_kg/validation-* - split: test path: ky_kg/test-* - config_name: lb_lu data_files: - split: train path: lb_lu/train-* - split: validation path: lb_lu/validation-* - split: test path: lb_lu/test-* - config_name: lg_ug data_files: - split: train path: lg_ug/train-* - split: validation path: lg_ug/validation-* - split: test path: lg_ug/test-* - config_name: ln_cd data_files: - split: train path: ln_cd/train-* - split: validation path: ln_cd/validation-* - split: test path: ln_cd/test-* - config_name: lo_la data_files: - split: train path: lo_la/train-* - split: validation path: lo_la/validation-* - split: test path: lo_la/test-* - config_name: lt_lt data_files: - split: train path: lt_lt/train-* - split: validation path: lt_lt/validation-* - split: test path: lt_lt/test-* - config_name: luo_ke data_files: - split: train path: luo_ke/train-* - split: validation path: luo_ke/validation-* - split: test path: luo_ke/test-* - config_name: lv_lv data_files: - split: train path: lv_lv/train-* - split: validation path: lv_lv/validation-* - split: test path: lv_lv/test-* - config_name: mi_nz data_files: - split: train path: mi_nz/train-* - split: validation path: mi_nz/validation-* - split: test path: mi_nz/test-* - config_name: mk_mk data_files: - split: train path: mk_mk/train-* - split: validation path: mk_mk/validation-* - split: test path: mk_mk/test-* - config_name: ml_in data_files: - split: train path: ml_in/train-* - split: validation path: ml_in/validation-* - split: test path: ml_in/test-* - config_name: mn_mn data_files: - split: train path: mn_mn/train-* - split: validation path: mn_mn/validation-* - split: test path: mn_mn/test-* - config_name: mr_in data_files: - split: train path: mr_in/train-* - split: validation path: mr_in/validation-* - split: test path: mr_in/test-* - config_name: ms_my data_files: - split: train path: ms_my/train-* - split: validation path: ms_my/validation-* - split: test path: ms_my/test-* - config_name: mt_mt data_files: - split: train path: mt_mt/train-* - split: validation path: mt_mt/validation-* - split: test path: mt_mt/test-* - config_name: my_mm data_files: - split: train path: my_mm/train-* - split: validation path: my_mm/validation-* - split: test path: my_mm/test-* - config_name: nb_no data_files: - split: train path: nb_no/train-* - split: validation path: nb_no/validation-* - split: test path: nb_no/test-* - config_name: ne_np data_files: - split: train path: ne_np/train-* - split: validation path: ne_np/validation-* - split: test path: ne_np/test-* - config_name: nl_nl data_files: - split: train path: nl_nl/train-* - split: validation path: nl_nl/validation-* - split: test path: nl_nl/test-* - config_name: nso_za data_files: - split: train path: nso_za/train-* - split: validation path: nso_za/validation-* - split: test path: nso_za/test-* - config_name: ny_mw data_files: - split: train path: ny_mw/train-* - split: validation path: ny_mw/validation-* - split: test path: ny_mw/test-* - config_name: oc_fr data_files: - split: train path: oc_fr/train-* - split: validation path: oc_fr/validation-* - split: test path: oc_fr/test-* - config_name: om_et data_files: - split: train path: om_et/train-* - split: validation path: om_et/validation-* - split: test path: om_et/test-* - config_name: or_in data_files: - split: train path: or_in/train-* - split: validation path: or_in/validation-* - split: test path: or_in/test-* - config_name: pa_in data_files: - split: train path: pa_in/train-* - split: validation path: pa_in/validation-* - split: test path: pa_in/test-* - config_name: pl_pl data_files: - split: train path: pl_pl/train-* - split: validation path: pl_pl/validation-* - split: test path: pl_pl/test-* - config_name: ps_af data_files: - split: train path: ps_af/train-* - split: validation path: ps_af/validation-* - split: test path: ps_af/test-* - config_name: pt_br data_files: - split: train path: pt_br/train-* - split: validation path: pt_br/validation-* - split: test path: pt_br/test-* - config_name: ro_ro data_files: - split: train path: ro_ro/train-* - split: validation path: ro_ro/validation-* - split: test path: ro_ro/test-* - config_name: ru_ru data_files: - split: train path: ru_ru/train-* - split: validation path: ru_ru/validation-* - split: test path: ru_ru/test-* - config_name: sd_in data_files: - split: train path: sd_in/train-* - split: validation path: sd_in/validation-* - split: test path: sd_in/test-* - config_name: sk_sk data_files: - split: train path: sk_sk/train-* - split: validation path: sk_sk/validation-* - split: test path: sk_sk/test-* - config_name: sl_si data_files: - split: train path: sl_si/train-* - split: validation path: sl_si/validation-* - split: test path: sl_si/test-* - config_name: sn_zw data_files: - split: train path: sn_zw/train-* - split: validation path: sn_zw/validation-* - split: test path: sn_zw/test-* - config_name: so_so data_files: - split: train path: so_so/train-* - split: validation path: so_so/validation-* - split: test path: so_so/test-* - config_name: sr_rs data_files: - split: train path: sr_rs/train-* - split: validation path: sr_rs/validation-* - split: test path: sr_rs/test-* - config_name: sv_se data_files: - split: train path: sv_se/train-* - split: validation path: sv_se/validation-* - split: test path: sv_se/test-* - config_name: sw_ke data_files: - split: train path: sw_ke/train-* - split: validation path: sw_ke/validation-* - split: test path: sw_ke/test-* - config_name: ta_in data_files: - split: train path: ta_in/train-* - split: validation path: ta_in/validation-* - split: test path: ta_in/test-* - config_name: te_in data_files: - split: train path: te_in/train-* - split: validation path: te_in/validation-* - split: test path: te_in/test-* - config_name: tg_tj data_files: - split: train path: tg_tj/train-* - split: validation path: tg_tj/validation-* - split: test path: tg_tj/test-* - config_name: th_th data_files: - split: train path: th_th/train-* - split: validation path: th_th/validation-* - split: test path: th_th/test-* - config_name: tr_tr data_files: - split: train path: tr_tr/train-* - split: validation path: tr_tr/validation-* - split: test path: tr_tr/test-* - config_name: uk_ua data_files: - split: train path: uk_ua/train-* - split: validation path: uk_ua/validation-* - split: test path: uk_ua/test-* - config_name: umb_ao data_files: - split: train path: umb_ao/train-* - split: validation path: umb_ao/validation-* - split: test path: umb_ao/test-* - config_name: ur_pk data_files: - split: train path: ur_pk/train-* - split: validation path: ur_pk/validation-* - split: test path: ur_pk/test-* - config_name: uz_uz data_files: - split: train path: uz_uz/train-* - split: validation path: uz_uz/validation-* - split: test path: uz_uz/test-* - config_name: vi_vn data_files: - split: train path: vi_vn/train-* - split: validation path: vi_vn/validation-* - split: test path: vi_vn/test-* - config_name: wo_sn data_files: - split: train path: wo_sn/train-* - split: validation path: wo_sn/validation-* - split: test path: wo_sn/test-* - config_name: xh_za data_files: - split: train path: xh_za/train-* - split: validation path: xh_za/validation-* - split: test path: xh_za/test-* - config_name: yo_ng data_files: - split: train path: yo_ng/train-* - split: validation path: yo_ng/validation-* - split: test path: yo_ng/test-* - config_name: yue_hant_hk data_files: - split: train path: yue_hant_hk/train-* - split: validation path: yue_hant_hk/validation-* - split: test path: yue_hant_hk/test-* - config_name: zu_za data_files: - split: train path: zu_za/train-* - split: validation path: zu_za/validation-* - split: test path: zu_za/test-* size_categories: - 100K<n<1M --- # Dataset ## Dataset Statistics This table shows the number of examples per language configuration and split. | config_name | train_examples | validation_examples | test_examples | |---|---:|---:|---:| | af_za | 1.032 | 198 | 264 | | am_et | 3.163 | 223 | 516 | | ar_eg | 2.104 | 295 | 428 | | as_in | 2.812 | 418 | 984 | | ast_es | 2.511 | 398 | 946 | | az_az | 2.665 | 400 | 923 | | be_by | 2.433 | 408 | 967 | | bg_bg | 2.973 | 395 | 658 | | bn_in | 3.006 | 402 | 920 | | bs_ba | 3.091 | 400 | 925 | | ca_es | 2.300 | 404 | 940 | | ceb_ph | 3.261 | 225 | 541 | | ckb_iq | 3.040 | 386 | 922 | | cmn_hans_cn | 3.246 | 409 | 945 | | cs_cz | 2.811 | 305 | 723 | | cy_gb | 3.427 | 447 | 1.021 | | da_dk | 2.465 | 395 | 930 | | de_de | 2.987 | 363 | 862 | | el_gr | 3.215 | 271 | 650 | | en_us | 2.602 | 394 | 647 | | es_419 | 2.796 | 408 | 908 | | et_ee | 2.501 | 387 | 893 | | fa_ir | 3.101 | 369 | 871 | | ff_sn | 3.235 | 273 | 660 | | fi_fi | 2.704 | 415 | 918 | | fil_ph | 1.884 | 418 | 964 | | fr_fr | 3.193 | 289 | 676 | | ga_ie | 2.845 | 369 | 842 | | gl_es | 2.175 | 395 | 927 | | gu_in | 3.145 | 432 | 1.000 | | ha_ng | 3.259 | 296 | 621 | | he_il | 3.242 | 328 | 792 | | hi_in | 2.120 | 239 | 418 | | hr_hr | 3.461 | 377 | 914 | | hu_hu | 3.095 | 407 | 905 | | hy_am | 3.053 | 395 | 932 | | id_id | 2.579 | 350 | 687 | | ig_ng | 2.839 | 413 | 969 | | is_is | 926 | 36 | 46 | | it_it | 3.030 | 391 | 865 | | ja_jp | 2.292 | 266 | 650 | | jv_id | 3.051 | 295 | 728 | | ka_ge | 1.491 | 409 | 979 | | kam_ke | 3.340 | 338 | 827 | | kea_cv | 2.715 | 366 | 864 | | kk_kz | 3.200 | 369 | 856 | | km_kh | 1.675 | 326 | 771 | | kn_in | 2.283 | 368 | 838 | | ko_kr | 2.307 | 226 | 382 | | ky_kg | 2.818 | 422 | 977 | | lb_lu | 2.502 | 408 | 934 | | lg_ug | 2.478 | 306 | 723 | | ln_cd | 3.350 | 209 | 478 | | lo_la | 1.809 | 191 | 404 | | lt_lt | 2.937 | 416 | 986 | | luo_ke | 2.384 | 102 | 256 | | lv_lv | 2.110 | 356 | 851 | | mi_nz | 3.249 | 429 | 1.008 | | mk_mk | 2.337 | 415 | 973 | | ml_in | 3.043 | 418 | 958 | | mn_mn | 3.074 | 405 | 949 | | mr_in | 3.269 | 443 | 1.015 | | ms_my | 2.667 | 324 | 749 | | mt_mt | 2.895 | 404 | 926 | | my_mm | 3.058 | 384 | 880 | | nb_no | 3.167 | 163 | 357 | | ne_np | 3.331 | 305 | 726 | | nl_nl | 2.918 | 171 | 364 | | nso_za | 1.990 | 363 | 790 | | ny_mw | 2.694 | 311 | 761 | | oc_fr | 3.379 | 427 | 998 | | om_et | 1.701 | 19 | 41 | | or_in | 1.081 | 392 | 883 | | pa_in | 1.923 | 251 | 574 | | pl_pl | 2.841 | 338 | 758 | | ps_af | 2.513 | 217 | 512 | | pt_br | 2.793 | 386 | 919 | | ro_ro | 2.891 | 387 | 883 | | ru_ru | 2.562 | 356 | 775 | | sd_in | 3.443 | 426 | 980 | | sk_sk | 1.957 | 352 | 792 | | sl_si | 2.512 | 349 | 834 | | sn_zw | 2.463 | 393 | 925 | | so_so | 3.148 | 432 | 1.019 | | sr_rs | 2.944 | 290 | 700 | | sv_se | 2.385 | 330 | 759 | | sw_ke | 3.070 | 211 | 487 | | ta_in | 2.367 | 377 | 591 | | te_in | 2.302 | 311 | 472 | | tg_tj | 2.298 | 240 | 600 | | th_th | 2.602 | 439 | 1.021 | | tr_tr | 2.526 | 338 | 743 | | uk_ua | 2.810 | 325 | 750 | | umb_ao | 1.597 | 135 | 379 | | ur_pk | 2.109 | 267 | 299 | | uz_uz | 2.943 | 363 | 862 | | vi_vn | 2.994 | 361 | 857 | | wo_sn | 2.279 | 169 | 371 | | xh_za | 3.466 | 446 | 1.041 | | yo_ng | 2.339 | 378 | 831 | | yue_hant_hk | 1.939 | 362 | 819 | | zu_za | 2.858 | 354 | 854 |
提供机构:
deepdml
5,000+
优质数据集
54 个
任务类型
进入经典数据集
二维码
社区交流群

面向社区/商业的数据集话题

二维码
科研交流群

面向高校/科研机构的开源数据集话题

数据驱动未来

携手共赢发展

商业合作