KoseiUemura/SIB200Classification.v2
收藏Hugging Face2026-04-18 更新2026-04-26 收录
下载链接:
https://hf-mirror.com/datasets/KoseiUemura/SIB200Classification.v2
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: ace_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10453.0
num_examples: 70
- name: validation
num_bytes: 22387.0
num_examples: 139
- name: test
num_bytes: 185646.0
num_examples: 1225
download_size: 143601
dataset_size: 218486.0
- config_name: acm_Arab
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 15468.0
num_examples: 70
- name: validation
num_bytes: 31142.0
num_examples: 139
- name: test
num_bytes: 262763.0
num_examples: 1225
download_size: 171481
dataset_size: 309373.0
- config_name: acq_Arab
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 15638.0
num_examples: 70
- name: validation
num_bytes: 31548.0
num_examples: 139
- name: test
num_bytes: 265364.0
num_examples: 1225
download_size: 173694
dataset_size: 312550.0
- config_name: aeb_Arab
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 15150.0
num_examples: 70
- name: validation
num_bytes: 31226.0
num_examples: 139
- name: test
num_bytes: 258647.0
num_examples: 1225
download_size: 171007
dataset_size: 305023.0
- config_name: afr_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10686.0
num_examples: 70
- name: validation
num_bytes: 22352.0
num_examples: 139
- name: test
num_bytes: 182477.0
num_examples: 1225
download_size: 144631
dataset_size: 215515.0
- config_name: ajp_Arab
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 14672.0
num_examples: 70
- name: validation
num_bytes: 30121.0
num_examples: 139
- name: test
num_bytes: 249781.0
num_examples: 1225
download_size: 164371
dataset_size: 294574.0
- config_name: aka_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10707.0
num_examples: 70
- name: validation
num_bytes: 22874.0
num_examples: 139
- name: test
num_bytes: 189034.0
num_examples: 1225
download_size: 142988
dataset_size: 222615.0
- config_name: als_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11626.0
num_examples: 70
- name: validation
num_bytes: 24784.0
num_examples: 139
- name: test
num_bytes: 204828.0
num_examples: 1225
download_size: 156564
dataset_size: 241238.0
- config_name: amh_Ethi
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 16468.0
num_examples: 70
- name: validation
num_bytes: 33977.0
num_examples: 139
- name: test
num_bytes: 287586.0
num_examples: 1225
download_size: 185850
dataset_size: 338031.0
- config_name: apc_Arab
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 14815.0
num_examples: 70
- name: validation
num_bytes: 29933.0
num_examples: 139
- name: test
num_bytes: 249456.0
num_examples: 1225
download_size: 162289
dataset_size: 294204.0
- config_name: arb_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11507.0
num_examples: 70
- name: validation
num_bytes: 23844.0
num_examples: 139
- name: test
num_bytes: 197744.62367346938
num_examples: 1223
download_size: 165027
dataset_size: 233095.62367346938
- config_name: ars_Arab
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 15772.0
num_examples: 70
- name: validation
num_bytes: 31868.0
num_examples: 139
- name: test
num_bytes: 268483.0
num_examples: 1225
download_size: 176066
dataset_size: 316123.0
- config_name: ary_Arab
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 15119.0
num_examples: 70
- name: validation
num_bytes: 31534.0
num_examples: 139
- name: test
num_bytes: 261538.30040816325
num_examples: 1223
download_size: 170776
dataset_size: 308191.3004081632
- config_name: arz_Arab
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 15605.0
num_examples: 70
- name: validation
num_bytes: 31246.0
num_examples: 139
- name: test
num_bytes: 261238.0
num_examples: 1225
download_size: 168487
dataset_size: 308089.0
- config_name: asm_Beng
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 24348.0
num_examples: 70
- name: validation
num_bytes: 49799.0
num_examples: 139
- name: test
num_bytes: 415437.0
num_examples: 1225
download_size: 219383
dataset_size: 489584.0
- config_name: ast_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10180.0
num_examples: 70
- name: validation
num_bytes: 21785.0
num_examples: 139
- name: test
num_bytes: 182128.0
num_examples: 1225
download_size: 147145
dataset_size: 214093.0
- config_name: awa_Deva
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 23310.0
num_examples: 70
- name: validation
num_bytes: 49396.0
num_examples: 139
- name: test
num_bytes: 411104.0
num_examples: 1225
download_size: 209857
dataset_size: 483810.0
- config_name: ayr_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10113.0
num_examples: 70
- name: validation
num_bytes: 21812.0
num_examples: 139
- name: test
num_bytes: 184312.0
num_examples: 1225
download_size: 141598
dataset_size: 216237.0
- config_name: azb_Arab
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11737.985714285714
num_examples: 53
- name: validation
num_bytes: 27540.40287769784
num_examples: 116
- name: test
num_bytes: 233870.12408163265
num_examples: 1054
download_size: 152029
dataset_size: 273148.5126736162
- config_name: azj_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 12118.0
num_examples: 70
- name: validation
num_bytes: 25446.0
num_examples: 139
- name: test
num_bytes: 214322.0
num_examples: 1225
download_size: 160880
dataset_size: 251886.0
- config_name: bak_Cyrl
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 17964.0
num_examples: 70
- name: validation
num_bytes: 37919.0
num_examples: 139
- name: test
num_bytes: 307310.0
num_examples: 1225
download_size: 194733
dataset_size: 363193.0
- config_name: bam_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10318.0
num_examples: 70
- name: validation
num_bytes: 20891.0
num_examples: 139
- name: test
num_bytes: 179621.2612244898
num_examples: 1223
download_size: 138768
dataset_size: 210830.2612244898
- config_name: ban_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10614.0
num_examples: 70
- name: validation
num_bytes: 22617.0
num_examples: 139
- name: test
num_bytes: 189633.07102040816
num_examples: 1224
download_size: 142738
dataset_size: 222864.07102040816
- config_name: bel_Cyrl
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 19892.0
num_examples: 70
- name: validation
num_bytes: 41655.0
num_examples: 139
- name: test
num_bytes: 341032.0
num_examples: 1225
download_size: 219275
dataset_size: 402579.0
- config_name: bem_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11849.0
num_examples: 70
- name: validation
num_bytes: 24988.0
num_examples: 139
- name: test
num_bytes: 208603.0
num_examples: 1225
download_size: 155580
dataset_size: 245440.0
- config_name: ben_Beng
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 24476.0
num_examples: 70
- name: validation
num_bytes: 51394.0
num_examples: 139
- name: test
num_bytes: 426549.0
num_examples: 1225
download_size: 218953
dataset_size: 502419.0
- config_name: bho_Deva
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 23026.0
num_examples: 70
- name: validation
num_bytes: 49922.0
num_examples: 139
- name: test
num_bytes: 405716.0
num_examples: 1225
download_size: 208388
dataset_size: 478664.0
- config_name: bjn_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10266.0
num_examples: 70
- name: validation
num_bytes: 21911.0
num_examples: 139
- name: test
num_bytes: 179001.0
num_examples: 1225
download_size: 137505
dataset_size: 211178.0
- config_name: bod_Tibt
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 6172.8
num_examples: 14
- name: validation
num_bytes: 17662.575539568345
num_examples: 37
- name: test
num_bytes: 108847.09877551021
num_examples: 248
download_size: 65588
dataset_size: 132682.47431507855
- config_name: bos_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10385.0
num_examples: 70
- name: validation
num_bytes: 21252.0
num_examples: 138
- name: test
num_bytes: 177211.0
num_examples: 1225
download_size: 151480
dataset_size: 208848.0
- config_name: bug_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10661.0
num_examples: 70
- name: validation
num_bytes: 22597.0
num_examples: 139
- name: test
num_bytes: 187468.83918367347
num_examples: 1224
download_size: 151869
dataset_size: 220726.83918367347
- config_name: bul_Cyrl
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 18004.0
num_examples: 70
- name: validation
num_bytes: 38099.0
num_examples: 139
- name: test
num_bytes: 312752.0
num_examples: 1225
download_size: 194001
dataset_size: 368855.0
- config_name: cat_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11115.0
num_examples: 70
- name: validation
num_bytes: 23078.0
num_examples: 139
- name: test
num_bytes: 191564.0
num_examples: 1225
download_size: 153794
dataset_size: 225757.0
- config_name: ceb_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11663.0
num_examples: 70
- name: validation
num_bytes: 24391.0
num_examples: 139
- name: test
num_bytes: 203779.0
num_examples: 1225
download_size: 149861
dataset_size: 239833.0
- config_name: ces_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10548.0
num_examples: 70
- name: validation
num_bytes: 22335.0
num_examples: 139
- name: test
num_bytes: 185086.0
num_examples: 1225
download_size: 157595
dataset_size: 217969.0
- config_name: cjk_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10771.0
num_examples: 70
- name: validation
num_bytes: 22144.0
num_examples: 139
- name: test
num_bytes: 184318.0
num_examples: 1225
download_size: 147980
dataset_size: 217233.0
- config_name: ckb_Arab
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 16745.0
num_examples: 70
- name: validation
num_bytes: 36213.0
num_examples: 139
- name: test
num_bytes: 295205.0
num_examples: 1225
download_size: 178877
dataset_size: 348163.0
- config_name: crh_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10971.0
num_examples: 70
- name: validation
num_bytes: 22976.5035971223
num_examples: 138
- name: test
num_bytes: 190762.14857142858
num_examples: 1224
download_size: 150715
dataset_size: 224709.65216855088
- config_name: cym_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10449.0
num_examples: 70
- name: validation
num_bytes: 21839.0
num_examples: 139
- name: test
num_bytes: 182936.0
num_examples: 1225
download_size: 145614
dataset_size: 215224.0
- config_name: dan_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10310.0
num_examples: 70
- name: validation
num_bytes: 21563.0
num_examples: 139
- name: test
num_bytes: 178691.0
num_examples: 1225
download_size: 143477
dataset_size: 210564.0
- config_name: deu_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11447.0
num_examples: 70
- name: validation
num_bytes: 24509.0
num_examples: 139
- name: test
num_bytes: 200342.0
num_examples: 1225
download_size: 160155
dataset_size: 236298.0
- config_name: dik_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10282.0
num_examples: 70
- name: validation
num_bytes: 20433.928057553956
num_examples: 138
- name: test
num_bytes: 167637.04163265307
num_examples: 1224
download_size: 133643
dataset_size: 198352.969690207
- config_name: dyu_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10908.0
num_examples: 70
- name: validation
num_bytes: 21655.0
num_examples: 139
- name: test
num_bytes: 183526.8742857143
num_examples: 1223
download_size: 150150
dataset_size: 216089.8742857143
- config_name: dzo_Tibt
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 33004.28571428572
num_examples: 68
- name: validation
num_bytes: 69728.71942446043
num_examples: 138
- name: test
num_bytes: 580810.3640816327
num_examples: 1203
download_size: 238599
dataset_size: 683543.3692203788
- config_name: ell_Grek
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 20962.0
num_examples: 70
- name: validation
num_bytes: 43213.0
num_examples: 139
- name: test
num_bytes: 357098.0
num_examples: 1225
download_size: 226422
dataset_size: 421273.0
- config_name: eng_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 9930.0
num_examples: 70
- name: validation
num_bytes: 20729.0
num_examples: 139
- name: test
num_bytes: 171234.0
num_examples: 1225
download_size: 136991
dataset_size: 201893.0
- config_name: epo_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10193.0
num_examples: 70
- name: validation
num_bytes: 21140.0
num_examples: 139
- name: test
num_bytes: 174031.0
num_examples: 1225
download_size: 140592
dataset_size: 205364.0
- config_name: est_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 9901.0
num_examples: 70
- name: validation
num_bytes: 20892.0
num_examples: 139
- name: test
num_bytes: 173631.0
num_examples: 1225
download_size: 145526
dataset_size: 204424.0
- config_name: eus_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10442.0
num_examples: 70
- name: validation
num_bytes: 21794.0
num_examples: 139
- name: test
num_bytes: 183312.0
num_examples: 1225
download_size: 143642
dataset_size: 215548.0
- config_name: ewe_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 9962.0
num_examples: 70
- name: validation
num_bytes: 22199.0
num_examples: 139
- name: test
num_bytes: 183838.0
num_examples: 1225
download_size: 135538
dataset_size: 215999.0
- config_name: fao_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10585.0
num_examples: 70
- name: validation
num_bytes: 22470.172661870503
num_examples: 138
- name: test
num_bytes: 186668.4930612245
num_examples: 1224
download_size: 147806
dataset_size: 219723.665723095
- config_name: fij_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11224.0
num_examples: 70
- name: validation
num_bytes: 24141.0
num_examples: 139
- name: test
num_bytes: 200544.0
num_examples: 1225
download_size: 137216
dataset_size: 235909.0
- config_name: fin_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10635.0
num_examples: 70
- name: validation
num_bytes: 22659.0
num_examples: 139
- name: test
num_bytes: 188781.0
num_examples: 1225
download_size: 156463
dataset_size: 222075.0
- config_name: fon_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 12330.0
num_examples: 70
- name: validation
num_bytes: 26267.0
num_examples: 139
- name: test
num_bytes: 214922.0
num_examples: 1225
download_size: 163329
dataset_size: 253519.0
- config_name: fra_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 12020.0
num_examples: 70
- name: validation
num_bytes: 25545.0
num_examples: 139
- name: test
num_bytes: 209260.0
num_examples: 1225
download_size: 165772
dataset_size: 246825.0
- config_name: fur_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11208.0
num_examples: 70
- name: validation
num_bytes: 23187.0
num_examples: 139
- name: test
num_bytes: 193250.0
num_examples: 1225
download_size: 151551
dataset_size: 227645.0
- config_name: fuv_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 9573.0
num_examples: 70
- name: validation
num_bytes: 19968.0
num_examples: 139
- name: test
num_bytes: 165597.0
num_examples: 1225
download_size: 134203
dataset_size: 195138.0
- config_name: gaz_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11777.0
num_examples: 70
- name: validation
num_bytes: 24664.0
num_examples: 139
- name: test
num_bytes: 203851.0
num_examples: 1225
download_size: 155131
dataset_size: 240292.0
- config_name: gla_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 12332.0
num_examples: 70
- name: validation
num_bytes: 26393.0
num_examples: 139
- name: test
num_bytes: 217166.0
num_examples: 1225
download_size: 160806
dataset_size: 255891.0
- config_name: gle_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11749.0
num_examples: 70
- name: validation
num_bytes: 25419.0
num_examples: 139
- name: test
num_bytes: 207783.0
num_examples: 1225
download_size: 161086
dataset_size: 244951.0
- config_name: glg_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10989.0
num_examples: 70
- name: validation
num_bytes: 23321.0
num_examples: 139
- name: test
num_bytes: 193196.0
num_examples: 1225
download_size: 152484
dataset_size: 227506.0
- config_name: grn_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10729.0
num_examples: 70
- name: validation
num_bytes: 22070.0
num_examples: 139
- name: test
num_bytes: 185011.84653061224
num_examples: 1224
download_size: 142324
dataset_size: 217810.84653061224
- config_name: guj_Gujr
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 23131.0
num_examples: 70
- name: validation
num_bytes: 49503.0
num_examples: 139
- name: test
num_bytes: 410138.0
num_examples: 1225
download_size: 214760
dataset_size: 482772.0
- config_name: hat_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 9446.0
num_examples: 70
- name: validation
num_bytes: 19675.424460431655
num_examples: 138
- name: test
num_bytes: 163751.21387755102
num_examples: 1223
download_size: 129538
dataset_size: 192872.63833798267
- config_name: hau_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10815.0
num_examples: 70
- name: validation
num_bytes: 21980.0
num_examples: 139
- name: test
num_bytes: 185353.0
num_examples: 1225
download_size: 141310
dataset_size: 218148.0
- config_name: heb_Hebr
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 13494.0
num_examples: 70
- name: validation
num_bytes: 28231.0
num_examples: 139
- name: test
num_bytes: 233485.0
num_examples: 1225
download_size: 154523
dataset_size: 275210.0
- config_name: hin_Deva
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 23813.0
num_examples: 70
- name: validation
num_bytes: 50797.0
num_examples: 139
- name: test
num_bytes: 417976.0
num_examples: 1225
download_size: 211947
dataset_size: 492586.0
- config_name: hne_Deva
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 23067.0
num_examples: 70
- name: validation
num_bytes: 48171.0
num_examples: 139
- name: test
num_bytes: 404007.0
num_examples: 1225
download_size: 209152
dataset_size: 475245.0
- config_name: hrv_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10138.0
num_examples: 70
- name: validation
num_bytes: 21103.0
num_examples: 139
- name: test
num_bytes: 173386.0
num_examples: 1225
download_size: 147455
dataset_size: 204627.0
- config_name: hun_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11212.0
num_examples: 70
- name: validation
num_bytes: 23923.0
num_examples: 139
- name: test
num_bytes: 196257.0
num_examples: 1225
download_size: 160529
dataset_size: 231392.0
- config_name: hye_Armn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 19518.0
num_examples: 70
- name: validation
num_bytes: 41252.0
num_examples: 139
- name: test
num_bytes: 338159.0
num_examples: 1225
download_size: 207276
dataset_size: 398929.0
- config_name: ibo_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11319.0
num_examples: 70
- name: validation
num_bytes: 24597.0
num_examples: 139
- name: test
num_bytes: 205514.0
num_examples: 1225
download_size: 146117
dataset_size: 241430.0
- config_name: ilo_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11934.0
num_examples: 70
- name: validation
num_bytes: 24833.0
num_examples: 139
- name: test
num_bytes: 205739.0
num_examples: 1225
download_size: 152118
dataset_size: 242506.0
- config_name: ind_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10501.0
num_examples: 70
- name: validation
num_bytes: 22205.0
num_examples: 139
- name: test
num_bytes: 184685.0
num_examples: 1225
download_size: 138183
dataset_size: 217391.0
- config_name: isl_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10714.0
num_examples: 70
- name: validation
num_bytes: 22552.0
num_examples: 139
- name: test
num_bytes: 186124.0
num_examples: 1225
download_size: 149812
dataset_size: 219390.0
- config_name: ita_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11731.0
num_examples: 70
- name: validation
num_bytes: 24474.0
num_examples: 139
- name: test
num_bytes: 201283.0
num_examples: 1225
download_size: 160550
dataset_size: 237488.0
- config_name: jav_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10073.0
num_examples: 70
- name: validation
num_bytes: 21709.0
num_examples: 139
- name: test
num_bytes: 177840.0
num_examples: 1225
download_size: 135082
dataset_size: 209622.0
- config_name: jpn_Jpan
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 12534.0
num_examples: 70
- name: validation
num_bytes: 25798.0
num_examples: 139
- name: test
num_bytes: 213955.0
num_examples: 1225
download_size: 158432
dataset_size: 252287.0
- config_name: kab_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10588.0
num_examples: 70
- name: validation
num_bytes: 21500.0
num_examples: 139
- name: test
num_bytes: 182035.0
num_examples: 1225
download_size: 147278
dataset_size: 214123.0
- config_name: kac_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 12069.0
num_examples: 70
- name: validation
num_bytes: 25714.0
num_examples: 139
- name: test
num_bytes: 216160.0
num_examples: 1225
download_size: 147951
dataset_size: 253943.0
- config_name: kam_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10418.0
num_examples: 70
- name: validation
num_bytes: 20618.589928057554
num_examples: 138
- name: test
num_bytes: 174423.76081632654
num_examples: 1223
download_size: 140121
dataset_size: 205460.3507443841
- config_name: kan_Knda
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 26652.0
num_examples: 70
- name: validation
num_bytes: 54787.0
num_examples: 139
- name: test
num_bytes: 464067.0
num_examples: 1225
download_size: 232405
dataset_size: 545506.0
- config_name: kas_Deva
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 22552.0
num_examples: 70
- name: validation
num_bytes: 46192.65467625899
num_examples: 137
- name: test
num_bytes: 390639.85714285716
num_examples: 1215
download_size: 217163
dataset_size: 459384.5118191162
- config_name: kat_Geor
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 27918.0
num_examples: 70
- name: validation
num_bytes: 59157.0
num_examples: 139
- name: test
num_bytes: 481290.0
num_examples: 1225
download_size: 227470
dataset_size: 568365.0
- config_name: kaz_Cyrl
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 17838.0
num_examples: 70
- name: validation
num_bytes: 37742.0
num_examples: 139
- name: test
num_bytes: 313235.0
num_examples: 1225
download_size: 189907
dataset_size: 368815.0
- config_name: kbp_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 13145.0
num_examples: 70
- name: validation
num_bytes: 27426.258992805757
num_examples: 138
- name: test
num_bytes: 231349.66857142857
num_examples: 1223
download_size: 156082
dataset_size: 271920.92756423436
- config_name: kea_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10088.0
num_examples: 70
- name: validation
num_bytes: 21097.0
num_examples: 139
- name: test
num_bytes: 174414.0
num_examples: 1225
download_size: 138918
dataset_size: 205599.0
- config_name: khk_Cyrl
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 18294.0
num_examples: 70
- name: validation
num_bytes: 37638.0
num_examples: 139
- name: test
num_bytes: 317890.0
num_examples: 1225
download_size: 194032
dataset_size: 373822.0
- config_name: khm_Khmr
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 25506.542857142857
num_examples: 59
- name: validation
num_bytes: 57911.76258992806
num_examples: 123
- name: test
num_bytes: 470662.21714285715
num_examples: 1056
download_size: 252264
dataset_size: 554080.522589928
- config_name: kik_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 12695.0
num_examples: 70
- name: validation
num_bytes: 26207.0
num_examples: 139
- name: test
num_bytes: 219727.0
num_examples: 1225
download_size: 158524
dataset_size: 258629.0
- config_name: kin_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11226.0
num_examples: 70
- name: validation
num_bytes: 23285.0
num_examples: 139
- name: test
num_bytes: 194176.0
num_examples: 1225
download_size: 148101
dataset_size: 228687.0
- config_name: kir_Cyrl
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 18063.0
num_examples: 70
- name: validation
num_bytes: 37258.0
num_examples: 139
- name: test
num_bytes: 312570.0
num_examples: 1225
download_size: 194542
dataset_size: 367891.0
- config_name: kmb_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10941.0
num_examples: 70
- name: validation
num_bytes: 22788.0
num_examples: 139
- name: test
num_bytes: 190325.0
num_examples: 1225
download_size: 140985
dataset_size: 224054.0
- config_name: kmr_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10652.0
num_examples: 70
- name: validation
num_bytes: 22750.0
num_examples: 139
- name: test
num_bytes: 188121.0
num_examples: 1225
download_size: 150236
dataset_size: 221523.0
- config_name: knc_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11580.0
num_examples: 70
- name: validation
num_bytes: 22805.0
num_examples: 139
- name: test
num_bytes: 191150.0
num_examples: 1225
download_size: 153378
dataset_size: 225535.0
- config_name: kon_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11111.0
num_examples: 70
- name: validation
num_bytes: 23483.0
num_examples: 139
- name: test
num_bytes: 193720.0
num_examples: 1225
download_size: 133533
dataset_size: 228314.0
- config_name: kor_Hang
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11949.0
num_examples: 70
- name: validation
num_bytes: 24620.0
num_examples: 139
- name: test
num_bytes: 202116.0
num_examples: 1225
download_size: 157051
dataset_size: 238685.0
- config_name: lao_Laoo
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 22445.1
num_examples: 63
- name: validation
num_bytes: 47041.14388489209
num_examples: 121
- name: test
num_bytes: 382933.13469387754
num_examples: 1055
download_size: 206422
dataset_size: 452419.3785787696
- config_name: lij_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11492.0
num_examples: 70
- name: validation
num_bytes: 24039.0
num_examples: 139
- name: test
num_bytes: 197929.0
num_examples: 1225
download_size: 159663
dataset_size: 233460.0
- config_name: lim_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10553.0
num_examples: 70
- name: validation
num_bytes: 22468.0
num_examples: 139
- name: test
num_bytes: 182564.8457142857
num_examples: 1224
download_size: 149516
dataset_size: 215585.8457142857
- config_name: lin_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10545.0
num_examples: 70
- name: validation
num_bytes: 22176.0
num_examples: 139
- name: test
num_bytes: 184640.0
num_examples: 1225
download_size: 126201
dataset_size: 217361.0
- config_name: lit_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10738.0
num_examples: 70
- name: validation
num_bytes: 21766.0
num_examples: 139
- name: test
num_bytes: 182762.0
num_examples: 1225
download_size: 152149
dataset_size: 215266.0
- config_name: lmo_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11477.0
num_examples: 70
- name: validation
num_bytes: 23957.0
num_examples: 139
- name: test
num_bytes: 197484.0
num_examples: 1225
download_size: 163032
dataset_size: 232918.0
- config_name: ltg_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10294.0
num_examples: 70
- name: validation
num_bytes: 21419.0
num_examples: 139
- name: test
num_bytes: 180105.0
num_examples: 1225
download_size: 149862
dataset_size: 211818.0
- config_name: ltz_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11119.0
num_examples: 70
- name: validation
num_bytes: 23884.0
num_examples: 139
- name: test
num_bytes: 195528.0
num_examples: 1225
download_size: 155424
dataset_size: 230531.0
- config_name: lua_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10692.0
num_examples: 70
- name: validation
num_bytes: 22336.0
num_examples: 139
- name: test
num_bytes: 183976.69224489795
num_examples: 1224
download_size: 139423
dataset_size: 217004.69224489795
- config_name: lug_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10610.0
num_examples: 70
- name: validation
num_bytes: 21158.0
num_examples: 139
- name: test
num_bytes: 177649.0
num_examples: 1225
download_size: 144935
dataset_size: 209417.0
- config_name: luo_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10572.0
num_examples: 70
- name: validation
num_bytes: 21534.0
num_examples: 139
- name: test
num_bytes: 179610.0
num_examples: 1225
download_size: 138426
dataset_size: 211716.0
- config_name: lus_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10249.0
num_examples: 70
- name: validation
num_bytes: 21973.769784172662
num_examples: 138
- name: test
num_bytes: 187821.55102040817
num_examples: 1224
download_size: 144052
dataset_size: 220044.3208045808
- config_name: lvs_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10724.0
num_examples: 70
- name: validation
num_bytes: 23294.0
num_examples: 139
- name: test
num_bytes: 190221.0
num_examples: 1225
download_size: 156274
dataset_size: 224239.0
- config_name: mag_Deva
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 22923.0
num_examples: 70
- name: validation
num_bytes: 48802.0
num_examples: 139
- name: test
num_bytes: 404136.0
num_examples: 1225
download_size: 205019
dataset_size: 475861.0
- config_name: mai_Deva
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 23642.0
num_examples: 70
- name: validation
num_bytes: 49755.0
num_examples: 139
- name: test
num_bytes: 414019.0
num_examples: 1225
download_size: 210828
dataset_size: 487416.0
- config_name: mal_Mlym
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 28813.0
num_examples: 70
- name: validation
num_bytes: 60793.0
num_examples: 139
- name: test
num_bytes: 504090.0
num_examples: 1225
download_size: 244639
dataset_size: 593696.0
- config_name: mar_Deva
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 25269.0
num_examples: 70
- name: validation
num_bytes: 53145.0
num_examples: 139
- name: test
num_bytes: 436262.0
num_examples: 1225
download_size: 222718
dataset_size: 514676.0
- config_name: min_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10304.0
num_examples: 70
- name: validation
num_bytes: 22100.0
num_examples: 139
- name: test
num_bytes: 182205.0
num_examples: 1225
download_size: 139192
dataset_size: 214609.0
- config_name: mkd_Cyrl
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 18109.0
num_examples: 70
- name: validation
num_bytes: 38074.0
num_examples: 139
- name: test
num_bytes: 313783.0
num_examples: 1225
download_size: 190914
dataset_size: 369966.0
- config_name: mlt_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11292.0
num_examples: 70
- name: validation
num_bytes: 23788.0
num_examples: 139
- name: test
num_bytes: 197824.0
num_examples: 1225
download_size: 157430
dataset_size: 232904.0
- config_name: mni_Beng
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 25959.0
num_examples: 70
- name: validation
num_bytes: 53758.0
num_examples: 139
- name: test
num_bytes: 451414.0
num_examples: 1225
download_size: 217574
dataset_size: 531131.0
- config_name: mos_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10233.0
num_examples: 70
- name: validation
num_bytes: 21654.0
num_examples: 139
- name: test
num_bytes: 177291.0
num_examples: 1225
download_size: 144699
dataset_size: 209178.0
- config_name: mri_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11204.0
num_examples: 70
- name: validation
num_bytes: 23697.0
num_examples: 139
- name: test
num_bytes: 197838.0
num_examples: 1225
download_size: 136556
dataset_size: 232739.0
- config_name: mya_Mymr
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 32282.0
num_examples: 70
- name: validation
num_bytes: 68851.0
num_examples: 139
- name: test
num_bytes: 571754.0
num_examples: 1225
download_size: 248904
dataset_size: 672887.0
- config_name: nld_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10904.0
num_examples: 70
- name: validation
num_bytes: 23125.0
num_examples: 139
- name: test
num_bytes: 190029.0
num_examples: 1225
download_size: 149736
dataset_size: 224058.0
- config_name: nno_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10176.0
num_examples: 70
- name: validation
num_bytes: 21406.0
num_examples: 139
- name: test
num_bytes: 176790.0
num_examples: 1225
download_size: 142630
dataset_size: 208372.0
- config_name: nob_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 10110.0
num_examples: 70
- name: validation
num_bytes: 21018.0
num_examples: 139
- name: test
num_bytes: 175982.0
num_examples: 1225
download_size: 141546
dataset_size: 207110.0
- config_name: npi_Deva
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 23340.0
num_examples: 70
- name: validation
num_bytes: 50361.0
num_examples: 139
- name: test
num_bytes: 418579.0236734694
num_examples: 1224
download_size: 213026
dataset_size: 492280.0236734694
- config_name: nqo_Nkoo
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 21734.0
num_examples: 70
- name: validation
num_bytes: 45424.0
num_examples: 139
- name: test
num_bytes: 378331.0
num_examples: 1225
download_size: 212357
dataset_size: 445489.0
- config_name: nso_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 11313.0
num_examples: 70
- name: validation
num_bytes: 23984.0
num_examples: 139
- name: test
num_bytes: 200127.0
num_examples: 1225
download_size: 146242
dataset_size: 235424.0
- config_name: nus_Latn
features:
- name: text
dtype: string
- name: label
dtype: int64
splits:
- name: train
num_bytes: 12185.0
num_examples: 70
- name: validation
num_bytes: 26842.0
num_examples: 139
- name: test
num_bytes: 222474.0
num_examples: 1225
download_size: 156536
dataset_size: 261501.0
configs:
- config_name: ace_Latn
data_files:
- split: train
path: ace_Latn/train-*
- split: validation
path: ace_Latn/validation-*
- split: test
path: ace_Latn/test-*
- config_name: acm_Arab
data_files:
- split: train
path: acm_Arab/train-*
- split: validation
path: acm_Arab/validation-*
- split: test
path: acm_Arab/test-*
- config_name: acq_Arab
data_files:
- split: train
path: acq_Arab/train-*
- split: validation
path: acq_Arab/validation-*
- split: test
path: acq_Arab/test-*
- config_name: aeb_Arab
data_files:
- split: train
path: aeb_Arab/train-*
- split: validation
path: aeb_Arab/validation-*
- split: test
path: aeb_Arab/test-*
- config_name: afr_Latn
data_files:
- split: train
path: afr_Latn/train-*
- split: validation
path: afr_Latn/validation-*
- split: test
path: afr_Latn/test-*
- config_name: ajp_Arab
data_files:
- split: train
path: ajp_Arab/train-*
- split: validation
path: ajp_Arab/validation-*
- split: test
path: ajp_Arab/test-*
- config_name: aka_Latn
data_files:
- split: train
path: aka_Latn/train-*
- split: validation
path: aka_Latn/validation-*
- split: test
path: aka_Latn/test-*
- config_name: als_Latn
data_files:
- split: train
path: als_Latn/train-*
- split: validation
path: als_Latn/validation-*
- split: test
path: als_Latn/test-*
- config_name: amh_Ethi
data_files:
- split: train
path: amh_Ethi/train-*
- split: validation
path: amh_Ethi/validation-*
- split: test
path: amh_Ethi/test-*
- config_name: apc_Arab
data_files:
- split: train
path: apc_Arab/train-*
- split: validation
path: apc_Arab/validation-*
- split: test
path: apc_Arab/test-*
- config_name: arb_Latn
data_files:
- split: train
path: arb_Latn/train-*
- split: validation
path: arb_Latn/validation-*
- split: test
path: arb_Latn/test-*
- config_name: ars_Arab
data_files:
- split: train
path: ars_Arab/train-*
- split: validation
path: ars_Arab/validation-*
- split: test
path: ars_Arab/test-*
- config_name: ary_Arab
data_files:
- split: train
path: ary_Arab/train-*
- split: validation
path: ary_Arab/validation-*
- split: test
path: ary_Arab/test-*
- config_name: arz_Arab
data_files:
- split: train
path: arz_Arab/train-*
- split: validation
path: arz_Arab/validation-*
- split: test
path: arz_Arab/test-*
- config_name: asm_Beng
data_files:
- split: train
path: asm_Beng/train-*
- split: validation
path: asm_Beng/validation-*
- split: test
path: asm_Beng/test-*
- config_name: ast_Latn
data_files:
- split: train
path: ast_Latn/train-*
- split: validation
path: ast_Latn/validation-*
- split: test
path: ast_Latn/test-*
- config_name: awa_Deva
data_files:
- split: train
path: awa_Deva/train-*
- split: validation
path: awa_Deva/validation-*
- split: test
path: awa_Deva/test-*
- config_name: ayr_Latn
data_files:
- split: train
path: ayr_Latn/train-*
- split: validation
path: ayr_Latn/validation-*
- split: test
path: ayr_Latn/test-*
- config_name: azb_Arab
data_files:
- split: train
path: azb_Arab/train-*
- split: validation
path: azb_Arab/validation-*
- split: test
path: azb_Arab/test-*
- config_name: azj_Latn
data_files:
- split: train
path: azj_Latn/train-*
- split: validation
path: azj_Latn/validation-*
- split: test
path: azj_Latn/test-*
- config_name: bak_Cyrl
data_files:
- split: train
path: bak_Cyrl/train-*
- split: validation
path: bak_Cyrl/validation-*
- split: test
path: bak_Cyrl/test-*
- config_name: bam_Latn
data_files:
- split: train
path: bam_Latn/train-*
- split: validation
path: bam_Latn/validation-*
- split: test
path: bam_Latn/test-*
- config_name: ban_Latn
data_files:
- split: train
path: ban_Latn/train-*
- split: validation
path: ban_Latn/validation-*
- split: test
path: ban_Latn/test-*
- config_name: bel_Cyrl
data_files:
- split: train
path: bel_Cyrl/train-*
- split: validation
path: bel_Cyrl/validation-*
- split: test
path: bel_Cyrl/test-*
- config_name: bem_Latn
data_files:
- split: train
path: bem_Latn/train-*
- split: validation
path: bem_Latn/validation-*
- split: test
path: bem_Latn/test-*
- config_name: ben_Beng
data_files:
- split: train
path: ben_Beng/train-*
- split: validation
path: ben_Beng/validation-*
- split: test
path: ben_Beng/test-*
- config_name: bho_Deva
data_files:
- split: train
path: bho_Deva/train-*
- split: validation
path: bho_Deva/validation-*
- split: test
path: bho_Deva/test-*
- config_name: bjn_Latn
data_files:
- split: train
path: bjn_Latn/train-*
- split: validation
path: bjn_Latn/validation-*
- split: test
path: bjn_Latn/test-*
- config_name: bod_Tibt
data_files:
- split: train
path: bod_Tibt/train-*
- split: validation
path: bod_Tibt/validation-*
- split: test
path: bod_Tibt/test-*
- config_name: bos_Latn
data_files:
- split: train
path: bos_Latn/train-*
- split: validation
path: bos_Latn/validation-*
- split: test
path: bos_Latn/test-*
- config_name: bug_Latn
data_files:
- split: train
path: bug_Latn/train-*
- split: validation
path: bug_Latn/validation-*
- split: test
path: bug_Latn/test-*
- config_name: bul_Cyrl
data_files:
- split: train
path: bul_Cyrl/train-*
- split: validation
path: bul_Cyrl/validation-*
- split: test
path: bul_Cyrl/test-*
- config_name: cat_Latn
data_files:
- split: train
path: cat_Latn/train-*
- split: validation
path: cat_Latn/validation-*
- split: test
path: cat_Latn/test-*
- config_name: ceb_Latn
data_files:
- split: train
path: ceb_Latn/train-*
- split: validation
path: ceb_Latn/validation-*
- split: test
path: ceb_Latn/test-*
- config_name: ces_Latn
data_files:
- split: train
path: ces_Latn/train-*
- split: validation
path: ces_Latn/validation-*
- split: test
path: ces_Latn/test-*
- config_name: cjk_Latn
data_files:
- split: train
path: cjk_Latn/train-*
- split: validation
path: cjk_Latn/validation-*
- split: test
path: cjk_Latn/test-*
- config_name: ckb_Arab
data_files:
- split: train
path: ckb_Arab/train-*
- split: validation
path: ckb_Arab/validation-*
- split: test
path: ckb_Arab/test-*
- config_name: crh_Latn
data_files:
- split: train
path: crh_Latn/train-*
- split: validation
path: crh_Latn/validation-*
- split: test
path: crh_Latn/test-*
- config_name: cym_Latn
data_files:
- split: train
path: cym_Latn/train-*
- split: validation
path: cym_Latn/validation-*
- split: test
path: cym_Latn/test-*
- config_name: dan_Latn
data_files:
- split: train
path: dan_Latn/train-*
- split: validation
path: dan_Latn/validation-*
- split: test
path: dan_Latn/test-*
- config_name: deu_Latn
data_files:
- split: train
path: deu_Latn/train-*
- split: validation
path: deu_Latn/validation-*
- split: test
path: deu_Latn/test-*
- config_name: dik_Latn
data_files:
- split: train
path: dik_Latn/train-*
- split: validation
path: dik_Latn/validation-*
- split: test
path: dik_Latn/test-*
- config_name: dyu_Latn
data_files:
- split: train
path: dyu_Latn/train-*
- split: validation
path: dyu_Latn/validation-*
- split: test
path: dyu_Latn/test-*
- config_name: dzo_Tibt
data_files:
- split: train
path: dzo_Tibt/train-*
- split: validation
path: dzo_Tibt/validation-*
- split: test
path: dzo_Tibt/test-*
- config_name: ell_Grek
data_files:
- split: train
path: ell_Grek/train-*
- split: validation
path: ell_Grek/validation-*
- split: test
path: ell_Grek/test-*
- config_name: eng_Latn
data_files:
- split: train
path: eng_Latn/train-*
- split: validation
path: eng_Latn/validation-*
- split: test
path: eng_Latn/test-*
- config_name: epo_Latn
data_files:
- split: train
path: epo_Latn/train-*
- split: validation
path: epo_Latn/validation-*
- split: test
path: epo_Latn/test-*
- config_name: est_Latn
data_files:
- split: train
path: est_Latn/train-*
- split: validation
path: est_Latn/validation-*
- split: test
path: est_Latn/test-*
- config_name: eus_Latn
data_files:
- split: train
path: eus_Latn/train-*
- split: validation
path: eus_Latn/validation-*
- split: test
path: eus_Latn/test-*
- config_name: ewe_Latn
data_files:
- split: train
path: ewe_Latn/train-*
- split: validation
path: ewe_Latn/validation-*
- split: test
path: ewe_Latn/test-*
- config_name: fao_Latn
data_files:
- split: train
path: fao_Latn/train-*
- split: validation
path: fao_Latn/validation-*
- split: test
path: fao_Latn/test-*
- config_name: fij_Latn
data_files:
- split: train
path: fij_Latn/train-*
- split: validation
path: fij_Latn/validation-*
- split: test
path: fij_Latn/test-*
- config_name: fin_Latn
data_files:
- split: train
path: fin_Latn/train-*
- split: validation
path: fin_Latn/validation-*
- split: test
path: fin_Latn/test-*
- config_name: fon_Latn
data_files:
- split: train
path: fon_Latn/train-*
- split: validation
path: fon_Latn/validation-*
- split: test
path: fon_Latn/test-*
- config_name: fra_Latn
data_files:
- split: train
path: fra_Latn/train-*
- split: validation
path: fra_Latn/validation-*
- split: test
path: fra_Latn/test-*
- config_name: fur_Latn
data_files:
- split: train
path: fur_Latn/train-*
- split: validation
path: fur_Latn/validation-*
- split: test
path: fur_Latn/test-*
- config_name: fuv_Latn
data_files:
- split: train
path: fuv_Latn/train-*
- split: validation
path: fuv_Latn/validation-*
- split: test
path: fuv_Latn/test-*
- config_name: gaz_Latn
data_files:
- split: train
path: gaz_Latn/train-*
- split: validation
path: gaz_Latn/validation-*
- split: test
path: gaz_Latn/test-*
- config_name: gla_Latn
data_files:
- split: train
path: gla_Latn/train-*
- split: validation
path: gla_Latn/validation-*
- split: test
path: gla_Latn/test-*
- config_name: gle_Latn
data_files:
- split: train
path: gle_Latn/train-*
- split: validation
path: gle_Latn/validation-*
- split: test
path: gle_Latn/test-*
- config_name: glg_Latn
data_files:
- split: train
path: glg_Latn/train-*
- split: validation
path: glg_Latn/validation-*
- split: test
path: glg_Latn/test-*
- config_name: grn_Latn
data_files:
- split: train
path: grn_Latn/train-*
- split: validation
path: grn_Latn/validation-*
- split: test
path: grn_Latn/test-*
- config_name: guj_Gujr
data_files:
- split: train
path: guj_Gujr/train-*
- split: validation
path: guj_Gujr/validation-*
- split: test
path: guj_Gujr/test-*
- config_name: hat_Latn
data_files:
- split: train
path: hat_Latn/train-*
- split: validation
path: hat_Latn/validation-*
- split: test
path: hat_Latn/test-*
- config_name: hau_Latn
data_files:
- split: train
path: hau_Latn/train-*
- split: validation
path: hau_Latn/validation-*
- split: test
path: hau_Latn/test-*
- config_name: heb_Hebr
data_files:
- split: train
path: heb_Hebr/train-*
- split: validation
path: heb_Hebr/validation-*
- split: test
path: heb_Hebr/test-*
- config_name: hin_Deva
data_files:
- split: train
path: hin_Deva/train-*
- split: validation
path: hin_Deva/validation-*
- split: test
path: hin_Deva/test-*
- config_name: hne_Deva
data_files:
- split: train
path: hne_Deva/train-*
- split: validation
path: hne_Deva/validation-*
- split: test
path: hne_Deva/test-*
- config_name: hrv_Latn
data_files:
- split: train
path: hrv_Latn/train-*
- split: validation
path: hrv_Latn/validation-*
- split: test
path: hrv_Latn/test-*
- config_name: hun_Latn
data_files:
- split: train
path: hun_Latn/train-*
- split: validation
path: hun_Latn/validation-*
- split: test
path: hun_Latn/test-*
- config_name: hye_Armn
data_files:
- split: train
path: hye_Armn/train-*
- split: validation
path: hye_Armn/validation-*
- split: test
path: hye_Armn/test-*
- config_name: ibo_Latn
data_files:
- split: train
path: ibo_Latn/train-*
- split: validation
path: ibo_Latn/validation-*
- split: test
path: ibo_Latn/test-*
- config_name: ilo_Latn
data_files:
- split: train
path: ilo_Latn/train-*
- split: validation
path: ilo_Latn/validation-*
- split: test
path: ilo_Latn/test-*
- config_name: ind_Latn
data_files:
- split: train
path: ind_Latn/train-*
- split: validation
path: ind_Latn/validation-*
- split: test
path: ind_Latn/test-*
- config_name: isl_Latn
data_files:
- split: train
path: isl_Latn/train-*
- split: validation
path: isl_Latn/validation-*
- split: test
path: isl_Latn/test-*
- config_name: ita_Latn
data_files:
- split: train
path: ita_Latn/train-*
- split: validation
path: ita_Latn/validation-*
- split: test
path: ita_Latn/test-*
- config_name: jav_Latn
data_files:
- split: train
path: jav_Latn/train-*
- split: validation
path: jav_Latn/validation-*
- split: test
path: jav_Latn/test-*
- config_name: jpn_Jpan
data_files:
- split: train
path: jpn_Jpan/train-*
- split: validation
path: jpn_Jpan/validation-*
- split: test
path: jpn_Jpan/test-*
- config_name: kab_Latn
data_files:
- split: train
path: kab_Latn/train-*
- split: validation
path: kab_Latn/validation-*
- split: test
path: kab_Latn/test-*
- config_name: kac_Latn
data_files:
- split: train
path: kac_Latn/train-*
- split: validation
path: kac_Latn/validation-*
- split: test
path: kac_Latn/test-*
- config_name: kam_Latn
data_files:
- split: train
path: kam_Latn/train-*
- split: validation
path: kam_Latn/validation-*
- split: test
path: kam_Latn/test-*
- config_name: kan_Knda
data_files:
- split: train
path: kan_Knda/train-*
- split: validation
path: kan_Knda/validation-*
- split: test
path: kan_Knda/test-*
- config_name: kas_Deva
data_files:
- split: train
path: kas_Deva/train-*
- split: validation
path: kas_Deva/validation-*
- split: test
path: kas_Deva/test-*
- config_name: kat_Geor
data_files:
- split: train
path: kat_Geor/train-*
- split: validation
path: kat_Geor/validation-*
- split: test
path: kat_Geor/test-*
- config_name: kaz_Cyrl
data_files:
- split: train
path: kaz_Cyrl/train-*
- split: validation
path: kaz_Cyrl/validation-*
- split: test
path: kaz_Cyrl/test-*
- config_name: kbp_Latn
data_files:
- split: train
path: kbp_Latn/train-*
- split: validation
path: kbp_Latn/validation-*
- split: test
path: kbp_Latn/test-*
- config_name: kea_Latn
data_files:
- split: train
path: kea_Latn/train-*
- split: validation
path: kea_Latn/validation-*
- split: test
path: kea_Latn/test-*
- config_name: khk_Cyrl
data_files:
- split: train
path: khk_Cyrl/train-*
- split: validation
path: khk_Cyrl/validation-*
- split: test
path: khk_Cyrl/test-*
- config_name: khm_Khmr
data_files:
- split: train
path: khm_Khmr/train-*
- split: validation
path: khm_Khmr/validation-*
- split: test
path: khm_Khmr/test-*
- config_name: kik_Latn
data_files:
- split: train
path: kik_Latn/train-*
- split: validation
path: kik_Latn/validation-*
- split: test
path: kik_Latn/test-*
- config_name: kin_Latn
data_files:
- split: train
path: kin_Latn/train-*
- split: validation
path: kin_Latn/validation-*
- split: test
path: kin_Latn/test-*
- config_name: kir_Cyrl
data_files:
- split: train
path: kir_Cyrl/train-*
- split: validation
path: kir_Cyrl/validation-*
- split: test
path: kir_Cyrl/test-*
- config_name: kmb_Latn
data_files:
- split: train
path: kmb_Latn/train-*
- split: validation
path: kmb_Latn/validation-*
- split: test
path: kmb_Latn/test-*
- config_name: kmr_Latn
data_files:
- split: train
path: kmr_Latn/train-*
- split: validation
path: kmr_Latn/validation-*
- split: test
path: kmr_Latn/test-*
- config_name: knc_Latn
data_files:
- split: train
path: knc_Latn/train-*
- split: validation
path: knc_Latn/validation-*
- split: test
path: knc_Latn/test-*
- config_name: kon_Latn
data_files:
- split: train
path: kon_Latn/train-*
- split: validation
path: kon_Latn/validation-*
- split: test
path: kon_Latn/test-*
- config_name: kor_Hang
data_files:
- split: train
path: kor_Hang/train-*
- split: validation
path: kor_Hang/validation-*
- split: test
path: kor_Hang/test-*
- config_name: lao_Laoo
data_files:
- split: train
path: lao_Laoo/train-*
- split: validation
path: lao_Laoo/validation-*
- split: test
path: lao_Laoo/test-*
- config_name: lij_Latn
data_files:
- split: train
path: lij_Latn/train-*
- split: validation
path: lij_Latn/validation-*
- split: test
path: lij_Latn/test-*
- config_name: lim_Latn
data_files:
- split: train
path: lim_Latn/train-*
- split: validation
path: lim_Latn/validation-*
- split: test
path: lim_Latn/test-*
- config_name: lin_Latn
data_files:
- split: train
path: lin_Latn/train-*
- split: validation
path: lin_Latn/validation-*
- split: test
path: lin_Latn/test-*
- config_name: lit_Latn
data_files:
- split: train
path: lit_Latn/train-*
- split: validation
path: lit_Latn/validation-*
- split: test
path: lit_Latn/test-*
- config_name: lmo_Latn
data_files:
- split: train
path: lmo_Latn/train-*
- split: validation
path: lmo_Latn/validation-*
- split: test
path: lmo_Latn/test-*
- config_name: ltg_Latn
data_files:
- split: train
path: ltg_Latn/train-*
- split: validation
path: ltg_Latn/validation-*
- split: test
path: ltg_Latn/test-*
- config_name: ltz_Latn
data_files:
- split: train
path: ltz_Latn/train-*
- split: validation
path: ltz_Latn/validation-*
- split: test
path: ltz_Latn/test-*
- config_name: lua_Latn
data_files:
- split: train
path: lua_Latn/train-*
- split: validation
path: lua_Latn/validation-*
- split: test
path: lua_Latn/test-*
- config_name: lug_Latn
data_files:
- split: train
path: lug_Latn/train-*
- split: validation
path: lug_Latn/validation-*
- split: test
path: lug_Latn/test-*
- config_name: luo_Latn
data_files:
- split: train
path: luo_Latn/train-*
- split: validation
path: luo_Latn/validation-*
- split: test
path: luo_Latn/test-*
- config_name: lus_Latn
data_files:
- split: train
path: lus_Latn/train-*
- split: validation
path: lus_Latn/validation-*
- split: test
path: lus_Latn/test-*
- config_name: lvs_Latn
data_files:
- split: train
path: lvs_Latn/train-*
- split: validation
path: lvs_Latn/validation-*
- split: test
path: lvs_Latn/test-*
- config_name: mag_Deva
data_files:
- split: train
path: mag_Deva/train-*
- split: validation
path: mag_Deva/validation-*
- split: test
path: mag_Deva/test-*
- config_name: mai_Deva
data_files:
- split: train
path: mai_Deva/train-*
- split: validation
path: mai_Deva/validation-*
- split: test
path: mai_Deva/test-*
- config_name: mal_Mlym
data_files:
- split: train
path: mal_Mlym/train-*
- split: validation
path: mal_Mlym/validation-*
- split: test
path: mal_Mlym/test-*
- config_name: mar_Deva
data_files:
- split: train
path: mar_Deva/train-*
- split: validation
path: mar_Deva/validation-*
- split: test
path: mar_Deva/test-*
- config_name: min_Latn
data_files:
- split: train
path: min_Latn/train-*
- split: validation
path: min_Latn/validation-*
- split: test
path: min_Latn/test-*
- config_name: mkd_Cyrl
data_files:
- split: train
path: mkd_Cyrl/train-*
- split: validation
path: mkd_Cyrl/validation-*
- split: test
path: mkd_Cyrl/test-*
- config_name: mlt_Latn
data_files:
- split: train
path: mlt_Latn/train-*
- split: validation
path: mlt_Latn/validation-*
- split: test
path: mlt_Latn/test-*
- config_name: mni_Beng
data_files:
- split: train
path: mni_Beng/train-*
- split: validation
path: mni_Beng/validation-*
- split: test
path: mni_Beng/test-*
- config_name: mos_Latn
data_files:
- split: train
path: mos_Latn/train-*
- split: validation
path: mos_Latn/validation-*
- split: test
path: mos_Latn/test-*
- config_name: mri_Latn
data_files:
- split: train
path: mri_Latn/train-*
- split: validation
path: mri_Latn/validation-*
- split: test
path: mri_Latn/test-*
- config_name: mya_Mymr
data_files:
- split: train
path: mya_Mymr/train-*
- split: validation
path: mya_Mymr/validation-*
- split: test
path: mya_Mymr/test-*
- config_name: nld_Latn
data_files:
- split: train
path: nld_Latn/train-*
- split: validation
path: nld_Latn/validation-*
- split: test
path: nld_Latn/test-*
- config_name: nno_Latn
data_files:
- split: train
path: nno_Latn/train-*
- split: validation
path: nno_Latn/validation-*
- split: test
path: nno_Latn/test-*
- config_name: nob_Latn
data_files:
- split: train
path: nob_Latn/train-*
- split: validation
path: nob_Latn/validation-*
- split: test
path: nob_Latn/test-*
- config_name: npi_Deva
data_files:
- split: train
path: npi_Deva/train-*
- split: validation
path: npi_Deva/validation-*
- split: test
path: npi_Deva/test-*
- config_name: nqo_Nkoo
data_files:
- split: train
path: nqo_Nkoo/train-*
- split: validation
path: nqo_Nkoo/validation-*
- split: test
path: nqo_Nkoo/test-*
- config_name: nso_Latn
data_files:
- split: train
path: nso_Latn/train-*
- split: validation
path: nso_Latn/validation-*
- split: test
path: nso_Latn/test-*
- config_name: nus_Latn
data_files:
- split: train
path: nus_Latn/train-*
- split: validation
path: nus_Latn/validation-*
- split: test
path: nus_Latn/test-*
---
提供机构:
KoseiUemura



