five

KoseiUemura/SIB200Classification.v2

收藏
Hugging Face2026-04-18 更新2026-04-26 收录
下载链接:
https://hf-mirror.com/datasets/KoseiUemura/SIB200Classification.v2
下载链接
链接失效反馈
官方服务:
资源简介:
--- dataset_info: - config_name: ace_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10453.0 num_examples: 70 - name: validation num_bytes: 22387.0 num_examples: 139 - name: test num_bytes: 185646.0 num_examples: 1225 download_size: 143601 dataset_size: 218486.0 - config_name: acm_Arab features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 15468.0 num_examples: 70 - name: validation num_bytes: 31142.0 num_examples: 139 - name: test num_bytes: 262763.0 num_examples: 1225 download_size: 171481 dataset_size: 309373.0 - config_name: acq_Arab features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 15638.0 num_examples: 70 - name: validation num_bytes: 31548.0 num_examples: 139 - name: test num_bytes: 265364.0 num_examples: 1225 download_size: 173694 dataset_size: 312550.0 - config_name: aeb_Arab features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 15150.0 num_examples: 70 - name: validation num_bytes: 31226.0 num_examples: 139 - name: test num_bytes: 258647.0 num_examples: 1225 download_size: 171007 dataset_size: 305023.0 - config_name: afr_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10686.0 num_examples: 70 - name: validation num_bytes: 22352.0 num_examples: 139 - name: test num_bytes: 182477.0 num_examples: 1225 download_size: 144631 dataset_size: 215515.0 - config_name: ajp_Arab features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 14672.0 num_examples: 70 - name: validation num_bytes: 30121.0 num_examples: 139 - name: test num_bytes: 249781.0 num_examples: 1225 download_size: 164371 dataset_size: 294574.0 - config_name: aka_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10707.0 num_examples: 70 - name: validation num_bytes: 22874.0 num_examples: 139 - name: test num_bytes: 189034.0 num_examples: 1225 download_size: 142988 dataset_size: 222615.0 - config_name: als_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11626.0 num_examples: 70 - name: validation num_bytes: 24784.0 num_examples: 139 - name: test num_bytes: 204828.0 num_examples: 1225 download_size: 156564 dataset_size: 241238.0 - config_name: amh_Ethi features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 16468.0 num_examples: 70 - name: validation num_bytes: 33977.0 num_examples: 139 - name: test num_bytes: 287586.0 num_examples: 1225 download_size: 185850 dataset_size: 338031.0 - config_name: apc_Arab features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 14815.0 num_examples: 70 - name: validation num_bytes: 29933.0 num_examples: 139 - name: test num_bytes: 249456.0 num_examples: 1225 download_size: 162289 dataset_size: 294204.0 - config_name: arb_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11507.0 num_examples: 70 - name: validation num_bytes: 23844.0 num_examples: 139 - name: test num_bytes: 197744.62367346938 num_examples: 1223 download_size: 165027 dataset_size: 233095.62367346938 - config_name: ars_Arab features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 15772.0 num_examples: 70 - name: validation num_bytes: 31868.0 num_examples: 139 - name: test num_bytes: 268483.0 num_examples: 1225 download_size: 176066 dataset_size: 316123.0 - config_name: ary_Arab features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 15119.0 num_examples: 70 - name: validation num_bytes: 31534.0 num_examples: 139 - name: test num_bytes: 261538.30040816325 num_examples: 1223 download_size: 170776 dataset_size: 308191.3004081632 - config_name: arz_Arab features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 15605.0 num_examples: 70 - name: validation num_bytes: 31246.0 num_examples: 139 - name: test num_bytes: 261238.0 num_examples: 1225 download_size: 168487 dataset_size: 308089.0 - config_name: asm_Beng features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 24348.0 num_examples: 70 - name: validation num_bytes: 49799.0 num_examples: 139 - name: test num_bytes: 415437.0 num_examples: 1225 download_size: 219383 dataset_size: 489584.0 - config_name: ast_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10180.0 num_examples: 70 - name: validation num_bytes: 21785.0 num_examples: 139 - name: test num_bytes: 182128.0 num_examples: 1225 download_size: 147145 dataset_size: 214093.0 - config_name: awa_Deva features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 23310.0 num_examples: 70 - name: validation num_bytes: 49396.0 num_examples: 139 - name: test num_bytes: 411104.0 num_examples: 1225 download_size: 209857 dataset_size: 483810.0 - config_name: ayr_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10113.0 num_examples: 70 - name: validation num_bytes: 21812.0 num_examples: 139 - name: test num_bytes: 184312.0 num_examples: 1225 download_size: 141598 dataset_size: 216237.0 - config_name: azb_Arab features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11737.985714285714 num_examples: 53 - name: validation num_bytes: 27540.40287769784 num_examples: 116 - name: test num_bytes: 233870.12408163265 num_examples: 1054 download_size: 152029 dataset_size: 273148.5126736162 - config_name: azj_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 12118.0 num_examples: 70 - name: validation num_bytes: 25446.0 num_examples: 139 - name: test num_bytes: 214322.0 num_examples: 1225 download_size: 160880 dataset_size: 251886.0 - config_name: bak_Cyrl features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 17964.0 num_examples: 70 - name: validation num_bytes: 37919.0 num_examples: 139 - name: test num_bytes: 307310.0 num_examples: 1225 download_size: 194733 dataset_size: 363193.0 - config_name: bam_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10318.0 num_examples: 70 - name: validation num_bytes: 20891.0 num_examples: 139 - name: test num_bytes: 179621.2612244898 num_examples: 1223 download_size: 138768 dataset_size: 210830.2612244898 - config_name: ban_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10614.0 num_examples: 70 - name: validation num_bytes: 22617.0 num_examples: 139 - name: test num_bytes: 189633.07102040816 num_examples: 1224 download_size: 142738 dataset_size: 222864.07102040816 - config_name: bel_Cyrl features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 19892.0 num_examples: 70 - name: validation num_bytes: 41655.0 num_examples: 139 - name: test num_bytes: 341032.0 num_examples: 1225 download_size: 219275 dataset_size: 402579.0 - config_name: bem_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11849.0 num_examples: 70 - name: validation num_bytes: 24988.0 num_examples: 139 - name: test num_bytes: 208603.0 num_examples: 1225 download_size: 155580 dataset_size: 245440.0 - config_name: ben_Beng features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 24476.0 num_examples: 70 - name: validation num_bytes: 51394.0 num_examples: 139 - name: test num_bytes: 426549.0 num_examples: 1225 download_size: 218953 dataset_size: 502419.0 - config_name: bho_Deva features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 23026.0 num_examples: 70 - name: validation num_bytes: 49922.0 num_examples: 139 - name: test num_bytes: 405716.0 num_examples: 1225 download_size: 208388 dataset_size: 478664.0 - config_name: bjn_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10266.0 num_examples: 70 - name: validation num_bytes: 21911.0 num_examples: 139 - name: test num_bytes: 179001.0 num_examples: 1225 download_size: 137505 dataset_size: 211178.0 - config_name: bod_Tibt features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 6172.8 num_examples: 14 - name: validation num_bytes: 17662.575539568345 num_examples: 37 - name: test num_bytes: 108847.09877551021 num_examples: 248 download_size: 65588 dataset_size: 132682.47431507855 - config_name: bos_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10385.0 num_examples: 70 - name: validation num_bytes: 21252.0 num_examples: 138 - name: test num_bytes: 177211.0 num_examples: 1225 download_size: 151480 dataset_size: 208848.0 - config_name: bug_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10661.0 num_examples: 70 - name: validation num_bytes: 22597.0 num_examples: 139 - name: test num_bytes: 187468.83918367347 num_examples: 1224 download_size: 151869 dataset_size: 220726.83918367347 - config_name: bul_Cyrl features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 18004.0 num_examples: 70 - name: validation num_bytes: 38099.0 num_examples: 139 - name: test num_bytes: 312752.0 num_examples: 1225 download_size: 194001 dataset_size: 368855.0 - config_name: cat_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11115.0 num_examples: 70 - name: validation num_bytes: 23078.0 num_examples: 139 - name: test num_bytes: 191564.0 num_examples: 1225 download_size: 153794 dataset_size: 225757.0 - config_name: ceb_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11663.0 num_examples: 70 - name: validation num_bytes: 24391.0 num_examples: 139 - name: test num_bytes: 203779.0 num_examples: 1225 download_size: 149861 dataset_size: 239833.0 - config_name: ces_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10548.0 num_examples: 70 - name: validation num_bytes: 22335.0 num_examples: 139 - name: test num_bytes: 185086.0 num_examples: 1225 download_size: 157595 dataset_size: 217969.0 - config_name: cjk_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10771.0 num_examples: 70 - name: validation num_bytes: 22144.0 num_examples: 139 - name: test num_bytes: 184318.0 num_examples: 1225 download_size: 147980 dataset_size: 217233.0 - config_name: ckb_Arab features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 16745.0 num_examples: 70 - name: validation num_bytes: 36213.0 num_examples: 139 - name: test num_bytes: 295205.0 num_examples: 1225 download_size: 178877 dataset_size: 348163.0 - config_name: crh_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10971.0 num_examples: 70 - name: validation num_bytes: 22976.5035971223 num_examples: 138 - name: test num_bytes: 190762.14857142858 num_examples: 1224 download_size: 150715 dataset_size: 224709.65216855088 - config_name: cym_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10449.0 num_examples: 70 - name: validation num_bytes: 21839.0 num_examples: 139 - name: test num_bytes: 182936.0 num_examples: 1225 download_size: 145614 dataset_size: 215224.0 - config_name: dan_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10310.0 num_examples: 70 - name: validation num_bytes: 21563.0 num_examples: 139 - name: test num_bytes: 178691.0 num_examples: 1225 download_size: 143477 dataset_size: 210564.0 - config_name: deu_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11447.0 num_examples: 70 - name: validation num_bytes: 24509.0 num_examples: 139 - name: test num_bytes: 200342.0 num_examples: 1225 download_size: 160155 dataset_size: 236298.0 - config_name: dik_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10282.0 num_examples: 70 - name: validation num_bytes: 20433.928057553956 num_examples: 138 - name: test num_bytes: 167637.04163265307 num_examples: 1224 download_size: 133643 dataset_size: 198352.969690207 - config_name: dyu_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10908.0 num_examples: 70 - name: validation num_bytes: 21655.0 num_examples: 139 - name: test num_bytes: 183526.8742857143 num_examples: 1223 download_size: 150150 dataset_size: 216089.8742857143 - config_name: dzo_Tibt features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 33004.28571428572 num_examples: 68 - name: validation num_bytes: 69728.71942446043 num_examples: 138 - name: test num_bytes: 580810.3640816327 num_examples: 1203 download_size: 238599 dataset_size: 683543.3692203788 - config_name: ell_Grek features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 20962.0 num_examples: 70 - name: validation num_bytes: 43213.0 num_examples: 139 - name: test num_bytes: 357098.0 num_examples: 1225 download_size: 226422 dataset_size: 421273.0 - config_name: eng_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 9930.0 num_examples: 70 - name: validation num_bytes: 20729.0 num_examples: 139 - name: test num_bytes: 171234.0 num_examples: 1225 download_size: 136991 dataset_size: 201893.0 - config_name: epo_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10193.0 num_examples: 70 - name: validation num_bytes: 21140.0 num_examples: 139 - name: test num_bytes: 174031.0 num_examples: 1225 download_size: 140592 dataset_size: 205364.0 - config_name: est_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 9901.0 num_examples: 70 - name: validation num_bytes: 20892.0 num_examples: 139 - name: test num_bytes: 173631.0 num_examples: 1225 download_size: 145526 dataset_size: 204424.0 - config_name: eus_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10442.0 num_examples: 70 - name: validation num_bytes: 21794.0 num_examples: 139 - name: test num_bytes: 183312.0 num_examples: 1225 download_size: 143642 dataset_size: 215548.0 - config_name: ewe_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 9962.0 num_examples: 70 - name: validation num_bytes: 22199.0 num_examples: 139 - name: test num_bytes: 183838.0 num_examples: 1225 download_size: 135538 dataset_size: 215999.0 - config_name: fao_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10585.0 num_examples: 70 - name: validation num_bytes: 22470.172661870503 num_examples: 138 - name: test num_bytes: 186668.4930612245 num_examples: 1224 download_size: 147806 dataset_size: 219723.665723095 - config_name: fij_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11224.0 num_examples: 70 - name: validation num_bytes: 24141.0 num_examples: 139 - name: test num_bytes: 200544.0 num_examples: 1225 download_size: 137216 dataset_size: 235909.0 - config_name: fin_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10635.0 num_examples: 70 - name: validation num_bytes: 22659.0 num_examples: 139 - name: test num_bytes: 188781.0 num_examples: 1225 download_size: 156463 dataset_size: 222075.0 - config_name: fon_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 12330.0 num_examples: 70 - name: validation num_bytes: 26267.0 num_examples: 139 - name: test num_bytes: 214922.0 num_examples: 1225 download_size: 163329 dataset_size: 253519.0 - config_name: fra_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 12020.0 num_examples: 70 - name: validation num_bytes: 25545.0 num_examples: 139 - name: test num_bytes: 209260.0 num_examples: 1225 download_size: 165772 dataset_size: 246825.0 - config_name: fur_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11208.0 num_examples: 70 - name: validation num_bytes: 23187.0 num_examples: 139 - name: test num_bytes: 193250.0 num_examples: 1225 download_size: 151551 dataset_size: 227645.0 - config_name: fuv_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 9573.0 num_examples: 70 - name: validation num_bytes: 19968.0 num_examples: 139 - name: test num_bytes: 165597.0 num_examples: 1225 download_size: 134203 dataset_size: 195138.0 - config_name: gaz_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11777.0 num_examples: 70 - name: validation num_bytes: 24664.0 num_examples: 139 - name: test num_bytes: 203851.0 num_examples: 1225 download_size: 155131 dataset_size: 240292.0 - config_name: gla_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 12332.0 num_examples: 70 - name: validation num_bytes: 26393.0 num_examples: 139 - name: test num_bytes: 217166.0 num_examples: 1225 download_size: 160806 dataset_size: 255891.0 - config_name: gle_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11749.0 num_examples: 70 - name: validation num_bytes: 25419.0 num_examples: 139 - name: test num_bytes: 207783.0 num_examples: 1225 download_size: 161086 dataset_size: 244951.0 - config_name: glg_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10989.0 num_examples: 70 - name: validation num_bytes: 23321.0 num_examples: 139 - name: test num_bytes: 193196.0 num_examples: 1225 download_size: 152484 dataset_size: 227506.0 - config_name: grn_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10729.0 num_examples: 70 - name: validation num_bytes: 22070.0 num_examples: 139 - name: test num_bytes: 185011.84653061224 num_examples: 1224 download_size: 142324 dataset_size: 217810.84653061224 - config_name: guj_Gujr features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 23131.0 num_examples: 70 - name: validation num_bytes: 49503.0 num_examples: 139 - name: test num_bytes: 410138.0 num_examples: 1225 download_size: 214760 dataset_size: 482772.0 - config_name: hat_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 9446.0 num_examples: 70 - name: validation num_bytes: 19675.424460431655 num_examples: 138 - name: test num_bytes: 163751.21387755102 num_examples: 1223 download_size: 129538 dataset_size: 192872.63833798267 - config_name: hau_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10815.0 num_examples: 70 - name: validation num_bytes: 21980.0 num_examples: 139 - name: test num_bytes: 185353.0 num_examples: 1225 download_size: 141310 dataset_size: 218148.0 - config_name: heb_Hebr features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 13494.0 num_examples: 70 - name: validation num_bytes: 28231.0 num_examples: 139 - name: test num_bytes: 233485.0 num_examples: 1225 download_size: 154523 dataset_size: 275210.0 - config_name: hin_Deva features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 23813.0 num_examples: 70 - name: validation num_bytes: 50797.0 num_examples: 139 - name: test num_bytes: 417976.0 num_examples: 1225 download_size: 211947 dataset_size: 492586.0 - config_name: hne_Deva features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 23067.0 num_examples: 70 - name: validation num_bytes: 48171.0 num_examples: 139 - name: test num_bytes: 404007.0 num_examples: 1225 download_size: 209152 dataset_size: 475245.0 - config_name: hrv_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10138.0 num_examples: 70 - name: validation num_bytes: 21103.0 num_examples: 139 - name: test num_bytes: 173386.0 num_examples: 1225 download_size: 147455 dataset_size: 204627.0 - config_name: hun_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11212.0 num_examples: 70 - name: validation num_bytes: 23923.0 num_examples: 139 - name: test num_bytes: 196257.0 num_examples: 1225 download_size: 160529 dataset_size: 231392.0 - config_name: hye_Armn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 19518.0 num_examples: 70 - name: validation num_bytes: 41252.0 num_examples: 139 - name: test num_bytes: 338159.0 num_examples: 1225 download_size: 207276 dataset_size: 398929.0 - config_name: ibo_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11319.0 num_examples: 70 - name: validation num_bytes: 24597.0 num_examples: 139 - name: test num_bytes: 205514.0 num_examples: 1225 download_size: 146117 dataset_size: 241430.0 - config_name: ilo_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11934.0 num_examples: 70 - name: validation num_bytes: 24833.0 num_examples: 139 - name: test num_bytes: 205739.0 num_examples: 1225 download_size: 152118 dataset_size: 242506.0 - config_name: ind_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10501.0 num_examples: 70 - name: validation num_bytes: 22205.0 num_examples: 139 - name: test num_bytes: 184685.0 num_examples: 1225 download_size: 138183 dataset_size: 217391.0 - config_name: isl_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10714.0 num_examples: 70 - name: validation num_bytes: 22552.0 num_examples: 139 - name: test num_bytes: 186124.0 num_examples: 1225 download_size: 149812 dataset_size: 219390.0 - config_name: ita_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11731.0 num_examples: 70 - name: validation num_bytes: 24474.0 num_examples: 139 - name: test num_bytes: 201283.0 num_examples: 1225 download_size: 160550 dataset_size: 237488.0 - config_name: jav_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10073.0 num_examples: 70 - name: validation num_bytes: 21709.0 num_examples: 139 - name: test num_bytes: 177840.0 num_examples: 1225 download_size: 135082 dataset_size: 209622.0 - config_name: jpn_Jpan features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 12534.0 num_examples: 70 - name: validation num_bytes: 25798.0 num_examples: 139 - name: test num_bytes: 213955.0 num_examples: 1225 download_size: 158432 dataset_size: 252287.0 - config_name: kab_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10588.0 num_examples: 70 - name: validation num_bytes: 21500.0 num_examples: 139 - name: test num_bytes: 182035.0 num_examples: 1225 download_size: 147278 dataset_size: 214123.0 - config_name: kac_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 12069.0 num_examples: 70 - name: validation num_bytes: 25714.0 num_examples: 139 - name: test num_bytes: 216160.0 num_examples: 1225 download_size: 147951 dataset_size: 253943.0 - config_name: kam_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10418.0 num_examples: 70 - name: validation num_bytes: 20618.589928057554 num_examples: 138 - name: test num_bytes: 174423.76081632654 num_examples: 1223 download_size: 140121 dataset_size: 205460.3507443841 - config_name: kan_Knda features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 26652.0 num_examples: 70 - name: validation num_bytes: 54787.0 num_examples: 139 - name: test num_bytes: 464067.0 num_examples: 1225 download_size: 232405 dataset_size: 545506.0 - config_name: kas_Deva features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 22552.0 num_examples: 70 - name: validation num_bytes: 46192.65467625899 num_examples: 137 - name: test num_bytes: 390639.85714285716 num_examples: 1215 download_size: 217163 dataset_size: 459384.5118191162 - config_name: kat_Geor features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 27918.0 num_examples: 70 - name: validation num_bytes: 59157.0 num_examples: 139 - name: test num_bytes: 481290.0 num_examples: 1225 download_size: 227470 dataset_size: 568365.0 - config_name: kaz_Cyrl features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 17838.0 num_examples: 70 - name: validation num_bytes: 37742.0 num_examples: 139 - name: test num_bytes: 313235.0 num_examples: 1225 download_size: 189907 dataset_size: 368815.0 - config_name: kbp_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 13145.0 num_examples: 70 - name: validation num_bytes: 27426.258992805757 num_examples: 138 - name: test num_bytes: 231349.66857142857 num_examples: 1223 download_size: 156082 dataset_size: 271920.92756423436 - config_name: kea_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10088.0 num_examples: 70 - name: validation num_bytes: 21097.0 num_examples: 139 - name: test num_bytes: 174414.0 num_examples: 1225 download_size: 138918 dataset_size: 205599.0 - config_name: khk_Cyrl features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 18294.0 num_examples: 70 - name: validation num_bytes: 37638.0 num_examples: 139 - name: test num_bytes: 317890.0 num_examples: 1225 download_size: 194032 dataset_size: 373822.0 - config_name: khm_Khmr features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 25506.542857142857 num_examples: 59 - name: validation num_bytes: 57911.76258992806 num_examples: 123 - name: test num_bytes: 470662.21714285715 num_examples: 1056 download_size: 252264 dataset_size: 554080.522589928 - config_name: kik_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 12695.0 num_examples: 70 - name: validation num_bytes: 26207.0 num_examples: 139 - name: test num_bytes: 219727.0 num_examples: 1225 download_size: 158524 dataset_size: 258629.0 - config_name: kin_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11226.0 num_examples: 70 - name: validation num_bytes: 23285.0 num_examples: 139 - name: test num_bytes: 194176.0 num_examples: 1225 download_size: 148101 dataset_size: 228687.0 - config_name: kir_Cyrl features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 18063.0 num_examples: 70 - name: validation num_bytes: 37258.0 num_examples: 139 - name: test num_bytes: 312570.0 num_examples: 1225 download_size: 194542 dataset_size: 367891.0 - config_name: kmb_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10941.0 num_examples: 70 - name: validation num_bytes: 22788.0 num_examples: 139 - name: test num_bytes: 190325.0 num_examples: 1225 download_size: 140985 dataset_size: 224054.0 - config_name: kmr_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10652.0 num_examples: 70 - name: validation num_bytes: 22750.0 num_examples: 139 - name: test num_bytes: 188121.0 num_examples: 1225 download_size: 150236 dataset_size: 221523.0 - config_name: knc_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11580.0 num_examples: 70 - name: validation num_bytes: 22805.0 num_examples: 139 - name: test num_bytes: 191150.0 num_examples: 1225 download_size: 153378 dataset_size: 225535.0 - config_name: kon_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11111.0 num_examples: 70 - name: validation num_bytes: 23483.0 num_examples: 139 - name: test num_bytes: 193720.0 num_examples: 1225 download_size: 133533 dataset_size: 228314.0 - config_name: kor_Hang features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11949.0 num_examples: 70 - name: validation num_bytes: 24620.0 num_examples: 139 - name: test num_bytes: 202116.0 num_examples: 1225 download_size: 157051 dataset_size: 238685.0 - config_name: lao_Laoo features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 22445.1 num_examples: 63 - name: validation num_bytes: 47041.14388489209 num_examples: 121 - name: test num_bytes: 382933.13469387754 num_examples: 1055 download_size: 206422 dataset_size: 452419.3785787696 - config_name: lij_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11492.0 num_examples: 70 - name: validation num_bytes: 24039.0 num_examples: 139 - name: test num_bytes: 197929.0 num_examples: 1225 download_size: 159663 dataset_size: 233460.0 - config_name: lim_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10553.0 num_examples: 70 - name: validation num_bytes: 22468.0 num_examples: 139 - name: test num_bytes: 182564.8457142857 num_examples: 1224 download_size: 149516 dataset_size: 215585.8457142857 - config_name: lin_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10545.0 num_examples: 70 - name: validation num_bytes: 22176.0 num_examples: 139 - name: test num_bytes: 184640.0 num_examples: 1225 download_size: 126201 dataset_size: 217361.0 - config_name: lit_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10738.0 num_examples: 70 - name: validation num_bytes: 21766.0 num_examples: 139 - name: test num_bytes: 182762.0 num_examples: 1225 download_size: 152149 dataset_size: 215266.0 - config_name: lmo_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11477.0 num_examples: 70 - name: validation num_bytes: 23957.0 num_examples: 139 - name: test num_bytes: 197484.0 num_examples: 1225 download_size: 163032 dataset_size: 232918.0 - config_name: ltg_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10294.0 num_examples: 70 - name: validation num_bytes: 21419.0 num_examples: 139 - name: test num_bytes: 180105.0 num_examples: 1225 download_size: 149862 dataset_size: 211818.0 - config_name: ltz_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11119.0 num_examples: 70 - name: validation num_bytes: 23884.0 num_examples: 139 - name: test num_bytes: 195528.0 num_examples: 1225 download_size: 155424 dataset_size: 230531.0 - config_name: lua_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10692.0 num_examples: 70 - name: validation num_bytes: 22336.0 num_examples: 139 - name: test num_bytes: 183976.69224489795 num_examples: 1224 download_size: 139423 dataset_size: 217004.69224489795 - config_name: lug_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10610.0 num_examples: 70 - name: validation num_bytes: 21158.0 num_examples: 139 - name: test num_bytes: 177649.0 num_examples: 1225 download_size: 144935 dataset_size: 209417.0 - config_name: luo_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10572.0 num_examples: 70 - name: validation num_bytes: 21534.0 num_examples: 139 - name: test num_bytes: 179610.0 num_examples: 1225 download_size: 138426 dataset_size: 211716.0 - config_name: lus_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10249.0 num_examples: 70 - name: validation num_bytes: 21973.769784172662 num_examples: 138 - name: test num_bytes: 187821.55102040817 num_examples: 1224 download_size: 144052 dataset_size: 220044.3208045808 - config_name: lvs_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10724.0 num_examples: 70 - name: validation num_bytes: 23294.0 num_examples: 139 - name: test num_bytes: 190221.0 num_examples: 1225 download_size: 156274 dataset_size: 224239.0 - config_name: mag_Deva features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 22923.0 num_examples: 70 - name: validation num_bytes: 48802.0 num_examples: 139 - name: test num_bytes: 404136.0 num_examples: 1225 download_size: 205019 dataset_size: 475861.0 - config_name: mai_Deva features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 23642.0 num_examples: 70 - name: validation num_bytes: 49755.0 num_examples: 139 - name: test num_bytes: 414019.0 num_examples: 1225 download_size: 210828 dataset_size: 487416.0 - config_name: mal_Mlym features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 28813.0 num_examples: 70 - name: validation num_bytes: 60793.0 num_examples: 139 - name: test num_bytes: 504090.0 num_examples: 1225 download_size: 244639 dataset_size: 593696.0 - config_name: mar_Deva features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 25269.0 num_examples: 70 - name: validation num_bytes: 53145.0 num_examples: 139 - name: test num_bytes: 436262.0 num_examples: 1225 download_size: 222718 dataset_size: 514676.0 - config_name: min_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10304.0 num_examples: 70 - name: validation num_bytes: 22100.0 num_examples: 139 - name: test num_bytes: 182205.0 num_examples: 1225 download_size: 139192 dataset_size: 214609.0 - config_name: mkd_Cyrl features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 18109.0 num_examples: 70 - name: validation num_bytes: 38074.0 num_examples: 139 - name: test num_bytes: 313783.0 num_examples: 1225 download_size: 190914 dataset_size: 369966.0 - config_name: mlt_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11292.0 num_examples: 70 - name: validation num_bytes: 23788.0 num_examples: 139 - name: test num_bytes: 197824.0 num_examples: 1225 download_size: 157430 dataset_size: 232904.0 - config_name: mni_Beng features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 25959.0 num_examples: 70 - name: validation num_bytes: 53758.0 num_examples: 139 - name: test num_bytes: 451414.0 num_examples: 1225 download_size: 217574 dataset_size: 531131.0 - config_name: mos_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10233.0 num_examples: 70 - name: validation num_bytes: 21654.0 num_examples: 139 - name: test num_bytes: 177291.0 num_examples: 1225 download_size: 144699 dataset_size: 209178.0 - config_name: mri_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11204.0 num_examples: 70 - name: validation num_bytes: 23697.0 num_examples: 139 - name: test num_bytes: 197838.0 num_examples: 1225 download_size: 136556 dataset_size: 232739.0 - config_name: mya_Mymr features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 32282.0 num_examples: 70 - name: validation num_bytes: 68851.0 num_examples: 139 - name: test num_bytes: 571754.0 num_examples: 1225 download_size: 248904 dataset_size: 672887.0 - config_name: nld_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10904.0 num_examples: 70 - name: validation num_bytes: 23125.0 num_examples: 139 - name: test num_bytes: 190029.0 num_examples: 1225 download_size: 149736 dataset_size: 224058.0 - config_name: nno_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10176.0 num_examples: 70 - name: validation num_bytes: 21406.0 num_examples: 139 - name: test num_bytes: 176790.0 num_examples: 1225 download_size: 142630 dataset_size: 208372.0 - config_name: nob_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 10110.0 num_examples: 70 - name: validation num_bytes: 21018.0 num_examples: 139 - name: test num_bytes: 175982.0 num_examples: 1225 download_size: 141546 dataset_size: 207110.0 - config_name: npi_Deva features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 23340.0 num_examples: 70 - name: validation num_bytes: 50361.0 num_examples: 139 - name: test num_bytes: 418579.0236734694 num_examples: 1224 download_size: 213026 dataset_size: 492280.0236734694 - config_name: nqo_Nkoo features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 21734.0 num_examples: 70 - name: validation num_bytes: 45424.0 num_examples: 139 - name: test num_bytes: 378331.0 num_examples: 1225 download_size: 212357 dataset_size: 445489.0 - config_name: nso_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 11313.0 num_examples: 70 - name: validation num_bytes: 23984.0 num_examples: 139 - name: test num_bytes: 200127.0 num_examples: 1225 download_size: 146242 dataset_size: 235424.0 - config_name: nus_Latn features: - name: text dtype: string - name: label dtype: int64 splits: - name: train num_bytes: 12185.0 num_examples: 70 - name: validation num_bytes: 26842.0 num_examples: 139 - name: test num_bytes: 222474.0 num_examples: 1225 download_size: 156536 dataset_size: 261501.0 configs: - config_name: ace_Latn data_files: - split: train path: ace_Latn/train-* - split: validation path: ace_Latn/validation-* - split: test path: ace_Latn/test-* - config_name: acm_Arab data_files: - split: train path: acm_Arab/train-* - split: validation path: acm_Arab/validation-* - split: test path: acm_Arab/test-* - config_name: acq_Arab data_files: - split: train path: acq_Arab/train-* - split: validation path: acq_Arab/validation-* - split: test path: acq_Arab/test-* - config_name: aeb_Arab data_files: - split: train path: aeb_Arab/train-* - split: validation path: aeb_Arab/validation-* - split: test path: aeb_Arab/test-* - config_name: afr_Latn data_files: - split: train path: afr_Latn/train-* - split: validation path: afr_Latn/validation-* - split: test path: afr_Latn/test-* - config_name: ajp_Arab data_files: - split: train path: ajp_Arab/train-* - split: validation path: ajp_Arab/validation-* - split: test path: ajp_Arab/test-* - config_name: aka_Latn data_files: - split: train path: aka_Latn/train-* - split: validation path: aka_Latn/validation-* - split: test path: aka_Latn/test-* - config_name: als_Latn data_files: - split: train path: als_Latn/train-* - split: validation path: als_Latn/validation-* - split: test path: als_Latn/test-* - config_name: amh_Ethi data_files: - split: train path: amh_Ethi/train-* - split: validation path: amh_Ethi/validation-* - split: test path: amh_Ethi/test-* - config_name: apc_Arab data_files: - split: train path: apc_Arab/train-* - split: validation path: apc_Arab/validation-* - split: test path: apc_Arab/test-* - config_name: arb_Latn data_files: - split: train path: arb_Latn/train-* - split: validation path: arb_Latn/validation-* - split: test path: arb_Latn/test-* - config_name: ars_Arab data_files: - split: train path: ars_Arab/train-* - split: validation path: ars_Arab/validation-* - split: test path: ars_Arab/test-* - config_name: ary_Arab data_files: - split: train path: ary_Arab/train-* - split: validation path: ary_Arab/validation-* - split: test path: ary_Arab/test-* - config_name: arz_Arab data_files: - split: train path: arz_Arab/train-* - split: validation path: arz_Arab/validation-* - split: test path: arz_Arab/test-* - config_name: asm_Beng data_files: - split: train path: asm_Beng/train-* - split: validation path: asm_Beng/validation-* - split: test path: asm_Beng/test-* - config_name: ast_Latn data_files: - split: train path: ast_Latn/train-* - split: validation path: ast_Latn/validation-* - split: test path: ast_Latn/test-* - config_name: awa_Deva data_files: - split: train path: awa_Deva/train-* - split: validation path: awa_Deva/validation-* - split: test path: awa_Deva/test-* - config_name: ayr_Latn data_files: - split: train path: ayr_Latn/train-* - split: validation path: ayr_Latn/validation-* - split: test path: ayr_Latn/test-* - config_name: azb_Arab data_files: - split: train path: azb_Arab/train-* - split: validation path: azb_Arab/validation-* - split: test path: azb_Arab/test-* - config_name: azj_Latn data_files: - split: train path: azj_Latn/train-* - split: validation path: azj_Latn/validation-* - split: test path: azj_Latn/test-* - config_name: bak_Cyrl data_files: - split: train path: bak_Cyrl/train-* - split: validation path: bak_Cyrl/validation-* - split: test path: bak_Cyrl/test-* - config_name: bam_Latn data_files: - split: train path: bam_Latn/train-* - split: validation path: bam_Latn/validation-* - split: test path: bam_Latn/test-* - config_name: ban_Latn data_files: - split: train path: ban_Latn/train-* - split: validation path: ban_Latn/validation-* - split: test path: ban_Latn/test-* - config_name: bel_Cyrl data_files: - split: train path: bel_Cyrl/train-* - split: validation path: bel_Cyrl/validation-* - split: test path: bel_Cyrl/test-* - config_name: bem_Latn data_files: - split: train path: bem_Latn/train-* - split: validation path: bem_Latn/validation-* - split: test path: bem_Latn/test-* - config_name: ben_Beng data_files: - split: train path: ben_Beng/train-* - split: validation path: ben_Beng/validation-* - split: test path: ben_Beng/test-* - config_name: bho_Deva data_files: - split: train path: bho_Deva/train-* - split: validation path: bho_Deva/validation-* - split: test path: bho_Deva/test-* - config_name: bjn_Latn data_files: - split: train path: bjn_Latn/train-* - split: validation path: bjn_Latn/validation-* - split: test path: bjn_Latn/test-* - config_name: bod_Tibt data_files: - split: train path: bod_Tibt/train-* - split: validation path: bod_Tibt/validation-* - split: test path: bod_Tibt/test-* - config_name: bos_Latn data_files: - split: train path: bos_Latn/train-* - split: validation path: bos_Latn/validation-* - split: test path: bos_Latn/test-* - config_name: bug_Latn data_files: - split: train path: bug_Latn/train-* - split: validation path: bug_Latn/validation-* - split: test path: bug_Latn/test-* - config_name: bul_Cyrl data_files: - split: train path: bul_Cyrl/train-* - split: validation path: bul_Cyrl/validation-* - split: test path: bul_Cyrl/test-* - config_name: cat_Latn data_files: - split: train path: cat_Latn/train-* - split: validation path: cat_Latn/validation-* - split: test path: cat_Latn/test-* - config_name: ceb_Latn data_files: - split: train path: ceb_Latn/train-* - split: validation path: ceb_Latn/validation-* - split: test path: ceb_Latn/test-* - config_name: ces_Latn data_files: - split: train path: ces_Latn/train-* - split: validation path: ces_Latn/validation-* - split: test path: ces_Latn/test-* - config_name: cjk_Latn data_files: - split: train path: cjk_Latn/train-* - split: validation path: cjk_Latn/validation-* - split: test path: cjk_Latn/test-* - config_name: ckb_Arab data_files: - split: train path: ckb_Arab/train-* - split: validation path: ckb_Arab/validation-* - split: test path: ckb_Arab/test-* - config_name: crh_Latn data_files: - split: train path: crh_Latn/train-* - split: validation path: crh_Latn/validation-* - split: test path: crh_Latn/test-* - config_name: cym_Latn data_files: - split: train path: cym_Latn/train-* - split: validation path: cym_Latn/validation-* - split: test path: cym_Latn/test-* - config_name: dan_Latn data_files: - split: train path: dan_Latn/train-* - split: validation path: dan_Latn/validation-* - split: test path: dan_Latn/test-* - config_name: deu_Latn data_files: - split: train path: deu_Latn/train-* - split: validation path: deu_Latn/validation-* - split: test path: deu_Latn/test-* - config_name: dik_Latn data_files: - split: train path: dik_Latn/train-* - split: validation path: dik_Latn/validation-* - split: test path: dik_Latn/test-* - config_name: dyu_Latn data_files: - split: train path: dyu_Latn/train-* - split: validation path: dyu_Latn/validation-* - split: test path: dyu_Latn/test-* - config_name: dzo_Tibt data_files: - split: train path: dzo_Tibt/train-* - split: validation path: dzo_Tibt/validation-* - split: test path: dzo_Tibt/test-* - config_name: ell_Grek data_files: - split: train path: ell_Grek/train-* - split: validation path: ell_Grek/validation-* - split: test path: ell_Grek/test-* - config_name: eng_Latn data_files: - split: train path: eng_Latn/train-* - split: validation path: eng_Latn/validation-* - split: test path: eng_Latn/test-* - config_name: epo_Latn data_files: - split: train path: epo_Latn/train-* - split: validation path: epo_Latn/validation-* - split: test path: epo_Latn/test-* - config_name: est_Latn data_files: - split: train path: est_Latn/train-* - split: validation path: est_Latn/validation-* - split: test path: est_Latn/test-* - config_name: eus_Latn data_files: - split: train path: eus_Latn/train-* - split: validation path: eus_Latn/validation-* - split: test path: eus_Latn/test-* - config_name: ewe_Latn data_files: - split: train path: ewe_Latn/train-* - split: validation path: ewe_Latn/validation-* - split: test path: ewe_Latn/test-* - config_name: fao_Latn data_files: - split: train path: fao_Latn/train-* - split: validation path: fao_Latn/validation-* - split: test path: fao_Latn/test-* - config_name: fij_Latn data_files: - split: train path: fij_Latn/train-* - split: validation path: fij_Latn/validation-* - split: test path: fij_Latn/test-* - config_name: fin_Latn data_files: - split: train path: fin_Latn/train-* - split: validation path: fin_Latn/validation-* - split: test path: fin_Latn/test-* - config_name: fon_Latn data_files: - split: train path: fon_Latn/train-* - split: validation path: fon_Latn/validation-* - split: test path: fon_Latn/test-* - config_name: fra_Latn data_files: - split: train path: fra_Latn/train-* - split: validation path: fra_Latn/validation-* - split: test path: fra_Latn/test-* - config_name: fur_Latn data_files: - split: train path: fur_Latn/train-* - split: validation path: fur_Latn/validation-* - split: test path: fur_Latn/test-* - config_name: fuv_Latn data_files: - split: train path: fuv_Latn/train-* - split: validation path: fuv_Latn/validation-* - split: test path: fuv_Latn/test-* - config_name: gaz_Latn data_files: - split: train path: gaz_Latn/train-* - split: validation path: gaz_Latn/validation-* - split: test path: gaz_Latn/test-* - config_name: gla_Latn data_files: - split: train path: gla_Latn/train-* - split: validation path: gla_Latn/validation-* - split: test path: gla_Latn/test-* - config_name: gle_Latn data_files: - split: train path: gle_Latn/train-* - split: validation path: gle_Latn/validation-* - split: test path: gle_Latn/test-* - config_name: glg_Latn data_files: - split: train path: glg_Latn/train-* - split: validation path: glg_Latn/validation-* - split: test path: glg_Latn/test-* - config_name: grn_Latn data_files: - split: train path: grn_Latn/train-* - split: validation path: grn_Latn/validation-* - split: test path: grn_Latn/test-* - config_name: guj_Gujr data_files: - split: train path: guj_Gujr/train-* - split: validation path: guj_Gujr/validation-* - split: test path: guj_Gujr/test-* - config_name: hat_Latn data_files: - split: train path: hat_Latn/train-* - split: validation path: hat_Latn/validation-* - split: test path: hat_Latn/test-* - config_name: hau_Latn data_files: - split: train path: hau_Latn/train-* - split: validation path: hau_Latn/validation-* - split: test path: hau_Latn/test-* - config_name: heb_Hebr data_files: - split: train path: heb_Hebr/train-* - split: validation path: heb_Hebr/validation-* - split: test path: heb_Hebr/test-* - config_name: hin_Deva data_files: - split: train path: hin_Deva/train-* - split: validation path: hin_Deva/validation-* - split: test path: hin_Deva/test-* - config_name: hne_Deva data_files: - split: train path: hne_Deva/train-* - split: validation path: hne_Deva/validation-* - split: test path: hne_Deva/test-* - config_name: hrv_Latn data_files: - split: train path: hrv_Latn/train-* - split: validation path: hrv_Latn/validation-* - split: test path: hrv_Latn/test-* - config_name: hun_Latn data_files: - split: train path: hun_Latn/train-* - split: validation path: hun_Latn/validation-* - split: test path: hun_Latn/test-* - config_name: hye_Armn data_files: - split: train path: hye_Armn/train-* - split: validation path: hye_Armn/validation-* - split: test path: hye_Armn/test-* - config_name: ibo_Latn data_files: - split: train path: ibo_Latn/train-* - split: validation path: ibo_Latn/validation-* - split: test path: ibo_Latn/test-* - config_name: ilo_Latn data_files: - split: train path: ilo_Latn/train-* - split: validation path: ilo_Latn/validation-* - split: test path: ilo_Latn/test-* - config_name: ind_Latn data_files: - split: train path: ind_Latn/train-* - split: validation path: ind_Latn/validation-* - split: test path: ind_Latn/test-* - config_name: isl_Latn data_files: - split: train path: isl_Latn/train-* - split: validation path: isl_Latn/validation-* - split: test path: isl_Latn/test-* - config_name: ita_Latn data_files: - split: train path: ita_Latn/train-* - split: validation path: ita_Latn/validation-* - split: test path: ita_Latn/test-* - config_name: jav_Latn data_files: - split: train path: jav_Latn/train-* - split: validation path: jav_Latn/validation-* - split: test path: jav_Latn/test-* - config_name: jpn_Jpan data_files: - split: train path: jpn_Jpan/train-* - split: validation path: jpn_Jpan/validation-* - split: test path: jpn_Jpan/test-* - config_name: kab_Latn data_files: - split: train path: kab_Latn/train-* - split: validation path: kab_Latn/validation-* - split: test path: kab_Latn/test-* - config_name: kac_Latn data_files: - split: train path: kac_Latn/train-* - split: validation path: kac_Latn/validation-* - split: test path: kac_Latn/test-* - config_name: kam_Latn data_files: - split: train path: kam_Latn/train-* - split: validation path: kam_Latn/validation-* - split: test path: kam_Latn/test-* - config_name: kan_Knda data_files: - split: train path: kan_Knda/train-* - split: validation path: kan_Knda/validation-* - split: test path: kan_Knda/test-* - config_name: kas_Deva data_files: - split: train path: kas_Deva/train-* - split: validation path: kas_Deva/validation-* - split: test path: kas_Deva/test-* - config_name: kat_Geor data_files: - split: train path: kat_Geor/train-* - split: validation path: kat_Geor/validation-* - split: test path: kat_Geor/test-* - config_name: kaz_Cyrl data_files: - split: train path: kaz_Cyrl/train-* - split: validation path: kaz_Cyrl/validation-* - split: test path: kaz_Cyrl/test-* - config_name: kbp_Latn data_files: - split: train path: kbp_Latn/train-* - split: validation path: kbp_Latn/validation-* - split: test path: kbp_Latn/test-* - config_name: kea_Latn data_files: - split: train path: kea_Latn/train-* - split: validation path: kea_Latn/validation-* - split: test path: kea_Latn/test-* - config_name: khk_Cyrl data_files: - split: train path: khk_Cyrl/train-* - split: validation path: khk_Cyrl/validation-* - split: test path: khk_Cyrl/test-* - config_name: khm_Khmr data_files: - split: train path: khm_Khmr/train-* - split: validation path: khm_Khmr/validation-* - split: test path: khm_Khmr/test-* - config_name: kik_Latn data_files: - split: train path: kik_Latn/train-* - split: validation path: kik_Latn/validation-* - split: test path: kik_Latn/test-* - config_name: kin_Latn data_files: - split: train path: kin_Latn/train-* - split: validation path: kin_Latn/validation-* - split: test path: kin_Latn/test-* - config_name: kir_Cyrl data_files: - split: train path: kir_Cyrl/train-* - split: validation path: kir_Cyrl/validation-* - split: test path: kir_Cyrl/test-* - config_name: kmb_Latn data_files: - split: train path: kmb_Latn/train-* - split: validation path: kmb_Latn/validation-* - split: test path: kmb_Latn/test-* - config_name: kmr_Latn data_files: - split: train path: kmr_Latn/train-* - split: validation path: kmr_Latn/validation-* - split: test path: kmr_Latn/test-* - config_name: knc_Latn data_files: - split: train path: knc_Latn/train-* - split: validation path: knc_Latn/validation-* - split: test path: knc_Latn/test-* - config_name: kon_Latn data_files: - split: train path: kon_Latn/train-* - split: validation path: kon_Latn/validation-* - split: test path: kon_Latn/test-* - config_name: kor_Hang data_files: - split: train path: kor_Hang/train-* - split: validation path: kor_Hang/validation-* - split: test path: kor_Hang/test-* - config_name: lao_Laoo data_files: - split: train path: lao_Laoo/train-* - split: validation path: lao_Laoo/validation-* - split: test path: lao_Laoo/test-* - config_name: lij_Latn data_files: - split: train path: lij_Latn/train-* - split: validation path: lij_Latn/validation-* - split: test path: lij_Latn/test-* - config_name: lim_Latn data_files: - split: train path: lim_Latn/train-* - split: validation path: lim_Latn/validation-* - split: test path: lim_Latn/test-* - config_name: lin_Latn data_files: - split: train path: lin_Latn/train-* - split: validation path: lin_Latn/validation-* - split: test path: lin_Latn/test-* - config_name: lit_Latn data_files: - split: train path: lit_Latn/train-* - split: validation path: lit_Latn/validation-* - split: test path: lit_Latn/test-* - config_name: lmo_Latn data_files: - split: train path: lmo_Latn/train-* - split: validation path: lmo_Latn/validation-* - split: test path: lmo_Latn/test-* - config_name: ltg_Latn data_files: - split: train path: ltg_Latn/train-* - split: validation path: ltg_Latn/validation-* - split: test path: ltg_Latn/test-* - config_name: ltz_Latn data_files: - split: train path: ltz_Latn/train-* - split: validation path: ltz_Latn/validation-* - split: test path: ltz_Latn/test-* - config_name: lua_Latn data_files: - split: train path: lua_Latn/train-* - split: validation path: lua_Latn/validation-* - split: test path: lua_Latn/test-* - config_name: lug_Latn data_files: - split: train path: lug_Latn/train-* - split: validation path: lug_Latn/validation-* - split: test path: lug_Latn/test-* - config_name: luo_Latn data_files: - split: train path: luo_Latn/train-* - split: validation path: luo_Latn/validation-* - split: test path: luo_Latn/test-* - config_name: lus_Latn data_files: - split: train path: lus_Latn/train-* - split: validation path: lus_Latn/validation-* - split: test path: lus_Latn/test-* - config_name: lvs_Latn data_files: - split: train path: lvs_Latn/train-* - split: validation path: lvs_Latn/validation-* - split: test path: lvs_Latn/test-* - config_name: mag_Deva data_files: - split: train path: mag_Deva/train-* - split: validation path: mag_Deva/validation-* - split: test path: mag_Deva/test-* - config_name: mai_Deva data_files: - split: train path: mai_Deva/train-* - split: validation path: mai_Deva/validation-* - split: test path: mai_Deva/test-* - config_name: mal_Mlym data_files: - split: train path: mal_Mlym/train-* - split: validation path: mal_Mlym/validation-* - split: test path: mal_Mlym/test-* - config_name: mar_Deva data_files: - split: train path: mar_Deva/train-* - split: validation path: mar_Deva/validation-* - split: test path: mar_Deva/test-* - config_name: min_Latn data_files: - split: train path: min_Latn/train-* - split: validation path: min_Latn/validation-* - split: test path: min_Latn/test-* - config_name: mkd_Cyrl data_files: - split: train path: mkd_Cyrl/train-* - split: validation path: mkd_Cyrl/validation-* - split: test path: mkd_Cyrl/test-* - config_name: mlt_Latn data_files: - split: train path: mlt_Latn/train-* - split: validation path: mlt_Latn/validation-* - split: test path: mlt_Latn/test-* - config_name: mni_Beng data_files: - split: train path: mni_Beng/train-* - split: validation path: mni_Beng/validation-* - split: test path: mni_Beng/test-* - config_name: mos_Latn data_files: - split: train path: mos_Latn/train-* - split: validation path: mos_Latn/validation-* - split: test path: mos_Latn/test-* - config_name: mri_Latn data_files: - split: train path: mri_Latn/train-* - split: validation path: mri_Latn/validation-* - split: test path: mri_Latn/test-* - config_name: mya_Mymr data_files: - split: train path: mya_Mymr/train-* - split: validation path: mya_Mymr/validation-* - split: test path: mya_Mymr/test-* - config_name: nld_Latn data_files: - split: train path: nld_Latn/train-* - split: validation path: nld_Latn/validation-* - split: test path: nld_Latn/test-* - config_name: nno_Latn data_files: - split: train path: nno_Latn/train-* - split: validation path: nno_Latn/validation-* - split: test path: nno_Latn/test-* - config_name: nob_Latn data_files: - split: train path: nob_Latn/train-* - split: validation path: nob_Latn/validation-* - split: test path: nob_Latn/test-* - config_name: npi_Deva data_files: - split: train path: npi_Deva/train-* - split: validation path: npi_Deva/validation-* - split: test path: npi_Deva/test-* - config_name: nqo_Nkoo data_files: - split: train path: nqo_Nkoo/train-* - split: validation path: nqo_Nkoo/validation-* - split: test path: nqo_Nkoo/test-* - config_name: nso_Latn data_files: - split: train path: nso_Latn/train-* - split: validation path: nso_Latn/validation-* - split: test path: nso_Latn/test-* - config_name: nus_Latn data_files: - split: train path: nus_Latn/train-* - split: validation path: nus_Latn/validation-* - split: test path: nus_Latn/test-* ---
提供机构:
KoseiUemura
5,000+
优质数据集
54 个
任务类型
进入经典数据集
二维码
社区交流群

面向社区/商业的数据集话题

二维码
科研交流群

面向高校/科研机构的开源数据集话题

数据驱动未来

携手共赢发展

商业合作