mbzuai-ugrip-statement-tuning/belebele
收藏Hugging Face2024-06-11 更新2024-06-12 收录
下载链接:
https://hf-mirror.com/datasets/mbzuai-ugrip-statement-tuning/belebele
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: acm_Arab
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1688141
num_examples: 1800
download_size: 379232
dataset_size: 1688141
- config_name: afr_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1191949
num_examples: 1800
download_size: 305533
dataset_size: 1191949
- config_name: als_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1322010
num_examples: 1800
download_size: 332023
dataset_size: 1322010
- config_name: amh_Ethi
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1867736
num_examples: 1800
download_size: 422551
dataset_size: 1867736
- config_name: apc_Arab
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1617059
num_examples: 1800
download_size: 364431
dataset_size: 1617059
- config_name: arb_Arab
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1723440
num_examples: 1800
download_size: 386464
dataset_size: 1723440
- config_name: arb_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1250964
num_examples: 1800
download_size: 352248
dataset_size: 1250964
- config_name: ars_Arab
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1717634
num_examples: 1800
download_size: 387967
dataset_size: 1717634
- config_name: ary_Arab
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1716253
num_examples: 1800
download_size: 389130
dataset_size: 1716253
- config_name: arz_Arab
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1683992
num_examples: 1800
download_size: 379939
dataset_size: 1683992
- config_name: asm_Beng
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2729329
num_examples: 1800
download_size: 534103
dataset_size: 2729329
- config_name: azj_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1380150
num_examples: 1800
download_size: 344852
dataset_size: 1380150
- config_name: bam_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1175129
num_examples: 1800
download_size: 295946
dataset_size: 1175129
- config_name: ben_Beng
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2791017
num_examples: 1800
download_size: 522440
dataset_size: 2791017
- config_name: ben_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1210191
num_examples: 1800
download_size: 318301
dataset_size: 1210191
- config_name: bod_Tibt
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 3546427
num_examples: 1800
download_size: 572873
dataset_size: 3546427
- config_name: bul_Cyrl
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2041305
num_examples: 1800
download_size: 442316
dataset_size: 2041305
- config_name: cat_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1242257
num_examples: 1800
download_size: 321205
dataset_size: 1242257
- config_name: ceb_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1312609
num_examples: 1800
download_size: 322137
dataset_size: 1312609
- config_name: ces_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1198322
num_examples: 1800
download_size: 326534
dataset_size: 1198322
- config_name: ckb_Arab
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1939446
num_examples: 1800
download_size: 424605
dataset_size: 1939446
- config_name: dan_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1166812
num_examples: 1800
download_size: 302287
dataset_size: 1166812
- config_name: deu_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1289814
num_examples: 1800
download_size: 329593
dataset_size: 1289814
- config_name: ell_Grek
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2343408
num_examples: 1800
download_size: 510292
dataset_size: 2343408
- config_name: eng_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1127828
num_examples: 1800
download_size: 286019
dataset_size: 1127828
- config_name: est_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1121599
num_examples: 1800
download_size: 303654
dataset_size: 1121599
- config_name: eus_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1182872
num_examples: 1800
download_size: 299344
dataset_size: 1182872
- config_name: fin_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1220088
num_examples: 1800
download_size: 317046
dataset_size: 1220088
- config_name: fra_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1375977
num_examples: 1800
download_size: 348408
dataset_size: 1375977
- config_name: fuv_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1049782
num_examples: 1800
download_size: 293431
dataset_size: 1049782
- config_name: gaz_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1327526
num_examples: 1800
download_size: 339266
dataset_size: 1327526
- config_name: grn_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1215571
num_examples: 1800
download_size: 302153
dataset_size: 1215571
- config_name: guj_Gujr
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2649771
num_examples: 1800
download_size: 507321
dataset_size: 2649771
- config_name: hat_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1068379
num_examples: 1800
download_size: 276575
dataset_size: 1068379
- config_name: hau_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1214872
num_examples: 1800
download_size: 310605
dataset_size: 1214872
- config_name: heb_Hebr
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1521083
num_examples: 1800
download_size: 351410
dataset_size: 1521083
- config_name: hin_Deva
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2732641
num_examples: 1800
download_size: 511170
dataset_size: 2732641
- config_name: hin_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1241492
num_examples: 1800
download_size: 317468
dataset_size: 1241492
- config_name: hrv_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1124220
num_examples: 1800
download_size: 306104
dataset_size: 1124220
- config_name: hun_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1275815
num_examples: 1800
download_size: 335648
dataset_size: 1275815
- config_name: hye_Armn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2187475
num_examples: 1800
download_size: 473622
dataset_size: 2187475
- config_name: ibo_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1326129
num_examples: 1800
download_size: 323528
dataset_size: 1326129
- config_name: ilo_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1334087
num_examples: 1800
download_size: 327120
dataset_size: 1334087
- config_name: ind_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1208396
num_examples: 1800
download_size: 296320
dataset_size: 1208396
- config_name: isl_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1222860
num_examples: 1800
download_size: 315295
dataset_size: 1222860
- config_name: ita_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1307299
num_examples: 1800
download_size: 335154
dataset_size: 1307299
- config_name: jav_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1162790
num_examples: 1800
download_size: 286292
dataset_size: 1162790
- config_name: jpn_Jpan
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1411321
num_examples: 1800
download_size: 338120
dataset_size: 1411321
- config_name: kac_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1412363
num_examples: 1800
download_size: 330636
dataset_size: 1412363
- config_name: kan_Knda
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 3010834
num_examples: 1800
download_size: 556128
dataset_size: 3010834
- config_name: kat_Geor
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 3126653
num_examples: 1800
download_size: 557103
dataset_size: 3126653
- config_name: kaz_Cyrl
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2027683
num_examples: 1800
download_size: 434943
dataset_size: 2027683
- config_name: kea_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1138209
num_examples: 1800
download_size: 300269
dataset_size: 1138209
- config_name: khk_Cyrl
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2057143
num_examples: 1800
download_size: 446112
dataset_size: 2057143
- config_name: khm_Khmr
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 3484857
num_examples: 1800
download_size: 651538
dataset_size: 3484857
- config_name: kin_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1258766
num_examples: 1800
download_size: 316372
dataset_size: 1258766
- config_name: kir_Cyrl
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2043020
num_examples: 1800
download_size: 444963
dataset_size: 2043020
- config_name: kor_Hang
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1313809
num_examples: 1800
download_size: 322966
dataset_size: 1313809
- config_name: lao_Laoo
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2870348
num_examples: 1800
download_size: 543802
dataset_size: 2870348
- config_name: lin_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1209733
num_examples: 1800
download_size: 290062
dataset_size: 1209733
- config_name: lit_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1179373
num_examples: 1800
download_size: 311666
dataset_size: 1179373
- config_name: lug_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1164045
num_examples: 1800
download_size: 312871
dataset_size: 1164045
- config_name: luo_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1161393
num_examples: 1800
download_size: 301534
dataset_size: 1161393
- config_name: lvs_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1232742
num_examples: 1800
download_size: 323660
dataset_size: 1232742
- config_name: mal_Mlym
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 3306271
num_examples: 1800
download_size: 597653
dataset_size: 3306271
- config_name: mar_Deva
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2836405
num_examples: 1800
download_size: 525942
dataset_size: 2836405
- config_name: mkd_Cyrl
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2042650
num_examples: 1800
download_size: 441091
dataset_size: 2042650
- config_name: mlt_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1275245
num_examples: 1800
download_size: 331307
dataset_size: 1275245
- config_name: mri_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1282250
num_examples: 1800
download_size: 307227
dataset_size: 1282250
- config_name: mya_Mymr
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 3737832
num_examples: 1800
download_size: 624145
dataset_size: 3737832
- config_name: nld_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1226443
num_examples: 1800
download_size: 313117
dataset_size: 1226443
- config_name: nob_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1142679
num_examples: 1800
download_size: 297190
dataset_size: 1142679
- config_name: npi_Deva
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2723502
num_examples: 1800
download_size: 522636
dataset_size: 2723502
- config_name: npi_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1188854
num_examples: 1800
download_size: 309354
dataset_size: 1188854
- config_name: nso_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1312061
num_examples: 1800
download_size: 329618
dataset_size: 1312061
- config_name: nya_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1266056
num_examples: 1800
download_size: 320531
dataset_size: 1266056
- config_name: ory_Orya
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2918278
num_examples: 1800
download_size: 539005
dataset_size: 2918278
- config_name: pan_Guru
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2770290
num_examples: 1800
download_size: 518873
dataset_size: 2770290
- config_name: pbt_Arab
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1802654
num_examples: 1800
download_size: 417002
dataset_size: 1802654
- config_name: pes_Arab
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1829970
num_examples: 1800
download_size: 405681
dataset_size: 1829970
- config_name: plt_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1398151
num_examples: 1800
download_size: 327390
dataset_size: 1398151
- config_name: pol_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1239564
num_examples: 1800
download_size: 333245
dataset_size: 1239564
- config_name: por_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1237048
num_examples: 1800
download_size: 317386
dataset_size: 1237048
- config_name: ron_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1299373
num_examples: 1800
download_size: 334785
dataset_size: 1299373
- config_name: rus_Cyrl
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2165439
num_examples: 1800
download_size: 479282
dataset_size: 2165439
- config_name: shn_Mymr
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 3984112
num_examples: 1800
download_size: 686658
dataset_size: 3984112
- config_name: sin_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1379986
num_examples: 1800
download_size: 353013
dataset_size: 1379986
- config_name: sin_Sinh
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2821909
num_examples: 1800
download_size: 541194
dataset_size: 2821909
- config_name: slk_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1202990
num_examples: 1800
download_size: 325838
dataset_size: 1202990
- config_name: slv_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1131603
num_examples: 1800
download_size: 307183
dataset_size: 1131603
- config_name: sna_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1244171
num_examples: 1800
download_size: 313519
dataset_size: 1244171
- config_name: snd_Arab
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1736018
num_examples: 1800
download_size: 391881
dataset_size: 1736018
- config_name: som_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1284822
num_examples: 1800
download_size: 349676
dataset_size: 1284822
- config_name: sot_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1321000
num_examples: 1800
download_size: 329791
dataset_size: 1321000
- config_name: spa_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1325181
num_examples: 1800
download_size: 337529
dataset_size: 1325181
- config_name: srp_Cyrl
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1932947
num_examples: 1800
download_size: 423333
dataset_size: 1932947
- config_name: ssw_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1245845
num_examples: 1800
download_size: 321368
dataset_size: 1245845
- config_name: sun_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1172821
num_examples: 1800
download_size: 301236
dataset_size: 1172821
- config_name: swe_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1157582
num_examples: 1800
download_size: 293472
dataset_size: 1157582
- config_name: swh_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1176976
num_examples: 1800
download_size: 293224
dataset_size: 1176976
- config_name: tam_Taml
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 3370693
num_examples: 1800
download_size: 580491
dataset_size: 3370693
- config_name: tel_Telu
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2870621
num_examples: 1800
download_size: 534852
dataset_size: 2870621
- config_name: tgk_Cyrl
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2150103
num_examples: 1800
download_size: 459873
dataset_size: 2150103
- config_name: tgl_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1366097
num_examples: 1800
download_size: 329264
dataset_size: 1366097
- config_name: tha_Thai
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2884348
num_examples: 1800
download_size: 536314
dataset_size: 2884348
- config_name: tir_Ethi
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1907163
num_examples: 1800
download_size: 443746
dataset_size: 1907163
- config_name: tsn_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1365171
num_examples: 1800
download_size: 340666
dataset_size: 1365171
- config_name: tso_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1338654
num_examples: 1800
download_size: 321622
dataset_size: 1338654
- config_name: tur_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1234855
num_examples: 1800
download_size: 313253
dataset_size: 1234855
- config_name: ukr_Cyrl
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2016868
num_examples: 1800
download_size: 457436
dataset_size: 2016868
- config_name: urd_Arab
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1898885
num_examples: 1800
download_size: 420793
dataset_size: 1898885
- config_name: urd_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1363434
num_examples: 1800
download_size: 355703
dataset_size: 1363434
- config_name: uzn_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1246344
num_examples: 1800
download_size: 312723
dataset_size: 1246344
- config_name: vie_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1515363
num_examples: 1800
download_size: 350295
dataset_size: 1515363
- config_name: war_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1365106
num_examples: 1800
download_size: 324003
dataset_size: 1365106
- config_name: wol_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1116734
num_examples: 1800
download_size: 309391
dataset_size: 1116734
- config_name: xho_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1185368
num_examples: 1800
download_size: 316998
dataset_size: 1185368
- config_name: yor_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1374489
num_examples: 1800
download_size: 356176
dataset_size: 1374489
- config_name: zho_Hans
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1027285
num_examples: 1800
download_size: 281363
dataset_size: 1027285
- config_name: zho_Hant
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 991124
num_examples: 1800
download_size: 273526
dataset_size: 991124
- config_name: zsm_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1228528
num_examples: 1800
download_size: 294592
dataset_size: 1228528
- config_name: zul_Latn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1233798
num_examples: 1800
download_size: 323033
dataset_size: 1233798
configs:
- config_name: acm_Arab
data_files:
- split: train
path: acm_Arab/train-*
- config_name: afr_Latn
data_files:
- split: train
path: afr_Latn/train-*
- config_name: als_Latn
data_files:
- split: train
path: als_Latn/train-*
- config_name: amh_Ethi
data_files:
- split: train
path: amh_Ethi/train-*
- config_name: apc_Arab
data_files:
- split: train
path: apc_Arab/train-*
- config_name: arb_Arab
data_files:
- split: train
path: arb_Arab/train-*
- config_name: arb_Latn
data_files:
- split: train
path: arb_Latn/train-*
- config_name: ars_Arab
data_files:
- split: train
path: ars_Arab/train-*
- config_name: ary_Arab
data_files:
- split: train
path: ary_Arab/train-*
- config_name: arz_Arab
data_files:
- split: train
path: arz_Arab/train-*
- config_name: asm_Beng
data_files:
- split: train
path: asm_Beng/train-*
- config_name: azj_Latn
data_files:
- split: train
path: azj_Latn/train-*
- config_name: bam_Latn
data_files:
- split: train
path: bam_Latn/train-*
- config_name: ben_Beng
data_files:
- split: train
path: ben_Beng/train-*
- config_name: ben_Latn
data_files:
- split: train
path: ben_Latn/train-*
- config_name: bod_Tibt
data_files:
- split: train
path: bod_Tibt/train-*
- config_name: bul_Cyrl
data_files:
- split: train
path: bul_Cyrl/train-*
- config_name: cat_Latn
data_files:
- split: train
path: cat_Latn/train-*
- config_name: ceb_Latn
data_files:
- split: train
path: ceb_Latn/train-*
- config_name: ces_Latn
data_files:
- split: train
path: ces_Latn/train-*
- config_name: ckb_Arab
data_files:
- split: train
path: ckb_Arab/train-*
- config_name: dan_Latn
data_files:
- split: train
path: dan_Latn/train-*
- config_name: deu_Latn
data_files:
- split: train
path: deu_Latn/train-*
- config_name: ell_Grek
data_files:
- split: train
path: ell_Grek/train-*
- config_name: eng_Latn
data_files:
- split: train
path: eng_Latn/train-*
- config_name: est_Latn
data_files:
- split: train
path: est_Latn/train-*
- config_name: eus_Latn
data_files:
- split: train
path: eus_Latn/train-*
- config_name: fin_Latn
data_files:
- split: train
path: fin_Latn/train-*
- config_name: fra_Latn
data_files:
- split: train
path: fra_Latn/train-*
- config_name: fuv_Latn
data_files:
- split: train
path: fuv_Latn/train-*
- config_name: gaz_Latn
data_files:
- split: train
path: gaz_Latn/train-*
- config_name: grn_Latn
data_files:
- split: train
path: grn_Latn/train-*
- config_name: guj_Gujr
data_files:
- split: train
path: guj_Gujr/train-*
- config_name: hat_Latn
data_files:
- split: train
path: hat_Latn/train-*
- config_name: hau_Latn
data_files:
- split: train
path: hau_Latn/train-*
- config_name: heb_Hebr
data_files:
- split: train
path: heb_Hebr/train-*
- config_name: hin_Deva
data_files:
- split: train
path: hin_Deva/train-*
- config_name: hin_Latn
data_files:
- split: train
path: hin_Latn/train-*
- config_name: hrv_Latn
data_files:
- split: train
path: hrv_Latn/train-*
- config_name: hun_Latn
data_files:
- split: train
path: hun_Latn/train-*
- config_name: hye_Armn
data_files:
- split: train
path: hye_Armn/train-*
- config_name: ibo_Latn
data_files:
- split: train
path: ibo_Latn/train-*
- config_name: ilo_Latn
data_files:
- split: train
path: ilo_Latn/train-*
- config_name: ind_Latn
data_files:
- split: train
path: ind_Latn/train-*
- config_name: isl_Latn
data_files:
- split: train
path: isl_Latn/train-*
- config_name: ita_Latn
data_files:
- split: train
path: ita_Latn/train-*
- config_name: jav_Latn
data_files:
- split: train
path: jav_Latn/train-*
- config_name: jpn_Jpan
data_files:
- split: train
path: jpn_Jpan/train-*
- config_name: kac_Latn
data_files:
- split: train
path: kac_Latn/train-*
- config_name: kan_Knda
data_files:
- split: train
path: kan_Knda/train-*
- config_name: kat_Geor
data_files:
- split: train
path: kat_Geor/train-*
- config_name: kaz_Cyrl
data_files:
- split: train
path: kaz_Cyrl/train-*
- config_name: kea_Latn
data_files:
- split: train
path: kea_Latn/train-*
- config_name: khk_Cyrl
data_files:
- split: train
path: khk_Cyrl/train-*
- config_name: khm_Khmr
data_files:
- split: train
path: khm_Khmr/train-*
- config_name: kin_Latn
data_files:
- split: train
path: kin_Latn/train-*
- config_name: kir_Cyrl
data_files:
- split: train
path: kir_Cyrl/train-*
- config_name: kor_Hang
data_files:
- split: train
path: kor_Hang/train-*
- config_name: lao_Laoo
data_files:
- split: train
path: lao_Laoo/train-*
- config_name: lin_Latn
data_files:
- split: train
path: lin_Latn/train-*
- config_name: lit_Latn
data_files:
- split: train
path: lit_Latn/train-*
- config_name: lug_Latn
data_files:
- split: train
path: lug_Latn/train-*
- config_name: luo_Latn
data_files:
- split: train
path: luo_Latn/train-*
- config_name: lvs_Latn
data_files:
- split: train
path: lvs_Latn/train-*
- config_name: mal_Mlym
data_files:
- split: train
path: mal_Mlym/train-*
- config_name: mar_Deva
data_files:
- split: train
path: mar_Deva/train-*
- config_name: mkd_Cyrl
data_files:
- split: train
path: mkd_Cyrl/train-*
- config_name: mlt_Latn
data_files:
- split: train
path: mlt_Latn/train-*
- config_name: mri_Latn
data_files:
- split: train
path: mri_Latn/train-*
- config_name: mya_Mymr
data_files:
- split: train
path: mya_Mymr/train-*
- config_name: nld_Latn
data_files:
- split: train
path: nld_Latn/train-*
- config_name: nob_Latn
data_files:
- split: train
path: nob_Latn/train-*
- config_name: npi_Deva
data_files:
- split: train
path: npi_Deva/train-*
- config_name: npi_Latn
data_files:
- split: train
path: npi_Latn/train-*
- config_name: nso_Latn
data_files:
- split: train
path: nso_Latn/train-*
- config_name: nya_Latn
data_files:
- split: train
path: nya_Latn/train-*
- config_name: ory_Orya
data_files:
- split: train
path: ory_Orya/train-*
- config_name: pan_Guru
data_files:
- split: train
path: pan_Guru/train-*
- config_name: pbt_Arab
data_files:
- split: train
path: pbt_Arab/train-*
- config_name: pes_Arab
data_files:
- split: train
path: pes_Arab/train-*
- config_name: plt_Latn
data_files:
- split: train
path: plt_Latn/train-*
- config_name: pol_Latn
data_files:
- split: train
path: pol_Latn/train-*
- config_name: por_Latn
data_files:
- split: train
path: por_Latn/train-*
- config_name: ron_Latn
data_files:
- split: train
path: ron_Latn/train-*
- config_name: rus_Cyrl
data_files:
- split: train
path: rus_Cyrl/train-*
- config_name: shn_Mymr
data_files:
- split: train
path: shn_Mymr/train-*
- config_name: sin_Latn
data_files:
- split: train
path: sin_Latn/train-*
- config_name: sin_Sinh
data_files:
- split: train
path: sin_Sinh/train-*
- config_name: slk_Latn
data_files:
- split: train
path: slk_Latn/train-*
- config_name: slv_Latn
data_files:
- split: train
path: slv_Latn/train-*
- config_name: sna_Latn
data_files:
- split: train
path: sna_Latn/train-*
- config_name: snd_Arab
data_files:
- split: train
path: snd_Arab/train-*
- config_name: som_Latn
data_files:
- split: train
path: som_Latn/train-*
- config_name: sot_Latn
data_files:
- split: train
path: sot_Latn/train-*
- config_name: spa_Latn
data_files:
- split: train
path: spa_Latn/train-*
- config_name: srp_Cyrl
data_files:
- split: train
path: srp_Cyrl/train-*
- config_name: ssw_Latn
data_files:
- split: train
path: ssw_Latn/train-*
- config_name: sun_Latn
data_files:
- split: train
path: sun_Latn/train-*
- config_name: swe_Latn
data_files:
- split: train
path: swe_Latn/train-*
- config_name: swh_Latn
data_files:
- split: train
path: swh_Latn/train-*
- config_name: tam_Taml
data_files:
- split: train
path: tam_Taml/train-*
- config_name: tel_Telu
data_files:
- split: train
path: tel_Telu/train-*
- config_name: tgk_Cyrl
data_files:
- split: train
path: tgk_Cyrl/train-*
- config_name: tgl_Latn
data_files:
- split: train
path: tgl_Latn/train-*
- config_name: tha_Thai
data_files:
- split: train
path: tha_Thai/train-*
- config_name: tir_Ethi
data_files:
- split: train
path: tir_Ethi/train-*
- config_name: tsn_Latn
data_files:
- split: train
path: tsn_Latn/train-*
- config_name: tso_Latn
data_files:
- split: train
path: tso_Latn/train-*
- config_name: tur_Latn
data_files:
- split: train
path: tur_Latn/train-*
- config_name: ukr_Cyrl
data_files:
- split: train
path: ukr_Cyrl/train-*
- config_name: urd_Arab
data_files:
- split: train
path: urd_Arab/train-*
- config_name: urd_Latn
data_files:
- split: train
path: urd_Latn/train-*
- config_name: uzn_Latn
data_files:
- split: train
path: uzn_Latn/train-*
- config_name: vie_Latn
data_files:
- split: train
path: vie_Latn/train-*
- config_name: war_Latn
data_files:
- split: train
path: war_Latn/train-*
- config_name: wol_Latn
data_files:
- split: train
path: wol_Latn/train-*
- config_name: xho_Latn
data_files:
- split: train
path: xho_Latn/train-*
- config_name: yor_Latn
data_files:
- split: train
path: yor_Latn/train-*
- config_name: zho_Hans
data_files:
- split: train
path: zho_Hans/train-*
- config_name: zho_Hant
data_files:
- split: train
path: zho_Hant/train-*
- config_name: zsm_Latn
data_files:
- split: train
path: zsm_Latn/train-*
- config_name: zul_Latn
data_files:
- split: train
path: zul_Latn/train-*
---
提供机构:
mbzuai-ugrip-statement-tuning
原始信息汇总
数据集概述
本数据集包含多个配置,每个配置对应不同的语言和字符编码。每个配置下有两个主要特征:is_true(数据类型为int64)和statement(数据类型为string)。数据集主要分为训练集,每个配置的训练集包含1800个示例。
数据集配置详情
配置1: acm_Arab
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1688141字节
- 下载大小: 379232字节
配置2: afr_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1191949字节
- 下载大小: 305533字节
配置3: als_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1322010字节
- 下载大小: 332023字节
配置4: amh_Ethi
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1867736字节
- 下载大小: 422551字节
配置5: apc_Arab
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1617059字节
- 下载大小: 364431字节
配置6: arb_Arab
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1723440字节
- 下载大小: 386464字节
配置7: arb_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1250964字节
- 下载大小: 352248字节
配置8: ars_Arab
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1717634字节
- 下载大小: 387967字节
配置9: ary_Arab
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1716253字节
- 下载大小: 389130字节
配置10: arz_Arab
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1683992字节
- 下载大小: 379939字节
配置11: asm_Beng
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 2729329字节
- 下载大小: 534103字节
配置12: azj_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1380150字节
- 下载大小: 344852字节
配置13: bam_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1175129字节
- 下载大小: 295946字节
配置14: ben_Beng
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 2791017字节
- 下载大小: 522440字节
配置15: ben_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1210191字节
- 下载大小: 318301字节
配置16: bod_Tibt
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 3546427字节
- 下载大小: 572873字节
配置17: bul_Cyrl
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 2041305字节
- 下载大小: 442316字节
配置18: cat_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1242257字节
- 下载大小: 321205字节
配置19: ceb_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1312609字节
- 下载大小: 322137字节
配置20: ces_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1198322字节
- 下载大小: 326534字节
配置21: ckb_Arab
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1939446字节
- 下载大小: 424605字节
配置22: dan_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1166812字节
- 下载大小: 302287字节
配置23: deu_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1289814字节
- 下载大小: 329593字节
配置24: ell_Grek
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 2343408字节
- 下载大小: 510292字节
配置25: eng_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1127828字节
- 下载大小: 286019字节
配置26: est_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1121599字节
- 下载大小: 303654字节
配置27: eus_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1182872字节
- 下载大小: 299344字节
配置28: fin_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1220088字节
- 下载大小: 317046字节
配置29: fra_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1375977字节
- 下载大小: 348408字节
配置30: fuv_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1049782字节
- 下载大小: 293431字节
配置31: gaz_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1327526字节
- 下载大小: 339266字节
配置32: grn_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1215571字节
- 下载大小: 302153字节
配置33: guj_Gujr
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 2649771字节
- 下载大小: 507321字节
配置34: hat_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1068379字节
- 下载大小: 276575字节
配置35: hau_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1214872字节
- 下载大小: 310605字节
配置36: heb_Hebr
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1521083字节
- 下载大小: 351410字节
配置37: hin_Deva
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 2732641字节
- 下载大小: 511170字节
配置38: hin_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1241492字节
- 下载大小: 317468字节
配置39: hrv_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1124220字节
- 下载大小: 306104字节
配置40: hun_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1275815字节
- 下载大小: 335648字节
配置41: hye_Armn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 2187475字节
- 下载大小: 473622字节
配置42: ibo_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1326129字节
- 下载大小: 323528字节
配置43: ilo_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1334087字节
- 下载大小: 327120字节
配置44: ind_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1208396字节
- 下载大小: 296320字节
配置45: isl_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1222860字节
- 下载大小: 315295字节
配置46: ita_Latn
- 特征:
is_true: int64statement: string
- 训练集:
- 示例数: 1800
- 数据大小: 1307299字节
- 下载大小: 335



