mbzuai-ugrip-statement-tuning/Topic-Statements
收藏Hugging Face2024-06-11 更新2024-06-12 收录
下载链接:
https://hf-mirror.com/datasets/mbzuai-ugrip-statement-tuning/Topic-Statements
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: ace_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 964916
num_examples: 1960
download_size: 194507
dataset_size: 964916
- config_name: ace_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 742224
num_examples: 1960
download_size: 154059
dataset_size: 742224
- config_name: acm_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 987824
num_examples: 1960
download_size: 200669
dataset_size: 987824
- config_name: acq_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1000292
num_examples: 1960
download_size: 205002
dataset_size: 1000292
- config_name: aeb_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 980668
num_examples: 1960
download_size: 202337
dataset_size: 980668
- config_name: afr_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 729044
num_examples: 1960
download_size: 155117
dataset_size: 729044
- config_name: ajp_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 946380
num_examples: 1960
download_size: 192036
dataset_size: 946380
- config_name: aka_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 753768
num_examples: 1960
download_size: 153502
dataset_size: 753768
- config_name: als_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 800376
num_examples: 1960
download_size: 168218
dataset_size: 800376
- config_name: amh_Ethi
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1063804
num_examples: 1960
download_size: 216648
dataset_size: 1063804
- config_name: apc_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 945292
num_examples: 1960
download_size: 189482
dataset_size: 945292
- config_name: arb_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1007608
num_examples: 1960
download_size: 206048
dataset_size: 1007608
- config_name: arb_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 769684
num_examples: 1960
download_size: 169815
dataset_size: 769684
- config_name: ars_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1009876
num_examples: 1960
download_size: 206545
dataset_size: 1009876
- config_name: ary_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 995948
num_examples: 1960
download_size: 201872
dataset_size: 995948
- config_name: arz_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 985120
num_examples: 1960
download_size: 201412
dataset_size: 985120
- config_name: asm_Beng
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1478700
num_examples: 1960
download_size: 272813
dataset_size: 1478700
- config_name: ast_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 722376
num_examples: 1960
download_size: 156383
dataset_size: 722376
- config_name: awa_Deva
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1483160
num_examples: 1960
download_size: 265379
dataset_size: 1483160
- config_name: ayr_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 711872
num_examples: 1960
download_size: 143939
dataset_size: 711872
- config_name: azb_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1013908
num_examples: 1960
download_size: 204841
dataset_size: 1013908
- config_name: azj_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 830680
num_examples: 1960
download_size: 170931
dataset_size: 830680
- config_name: bak_Cyrl
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1141576
num_examples: 1960
download_size: 225657
dataset_size: 1141576
- config_name: bam_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 722108
num_examples: 1960
download_size: 148492
dataset_size: 722108
- config_name: ban_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 748132
num_examples: 1960
download_size: 152506
dataset_size: 748132
- config_name: bel_Cyrl
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1275276
num_examples: 1960
download_size: 267238
dataset_size: 1275276
- config_name: bem_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 805612
num_examples: 1960
download_size: 166544
dataset_size: 805612
- config_name: ben_Beng
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1540552
num_examples: 1960
download_size: 276301
dataset_size: 1540552
- config_name: bho_Deva
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1456064
num_examples: 1960
download_size: 263544
dataset_size: 1456064
- config_name: bjn_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1031324
num_examples: 1960
download_size: 209905
dataset_size: 1031324
- config_name: bjn_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 718164
num_examples: 1960
download_size: 147358
dataset_size: 718164
- config_name: bod_Tibt
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1876316
num_examples: 1960
download_size: 298797
dataset_size: 1876316
- config_name: bos_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 716272
num_examples: 1960
download_size: 155889
dataset_size: 716272
- config_name: bug_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 750672
num_examples: 1960
download_size: 159625
dataset_size: 750672
- config_name: bul_Cyrl
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1158412
num_examples: 1960
download_size: 232514
dataset_size: 1158412
- config_name: cat_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 761056
num_examples: 1960
download_size: 163703
dataset_size: 761056
- config_name: ceb_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 794960
num_examples: 1960
download_size: 161564
dataset_size: 794960
- config_name: ces_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 732404
num_examples: 1960
download_size: 163620
dataset_size: 732404
- config_name: cjk_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 717900
num_examples: 1960
download_size: 152541
dataset_size: 717900
- config_name: ckb_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1093264
num_examples: 1960
download_size: 217177
dataset_size: 1093264
- config_name: crh_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 752432
num_examples: 1960
download_size: 157707
dataset_size: 752432
- config_name: cym_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 723124
num_examples: 1960
download_size: 154238
dataset_size: 723124
- config_name: dan_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 721672
num_examples: 1960
download_size: 154006
dataset_size: 721672
- config_name: deu_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 781696
num_examples: 1960
download_size: 167949
dataset_size: 781696
- config_name: dik_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 681072
num_examples: 1960
download_size: 144297
dataset_size: 681072
- config_name: dyu_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 748736
num_examples: 1960
download_size: 157046
dataset_size: 748736
- config_name: dzo_Tibt
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 2076244
num_examples: 1960
download_size: 326939
dataset_size: 2076244
- config_name: ell_Grek
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1316372
num_examples: 1960
download_size: 276831
dataset_size: 1316372
- config_name: eng_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 687176
num_examples: 1960
download_size: 145426
dataset_size: 687176
- config_name: epo_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 698380
num_examples: 1960
download_size: 149410
dataset_size: 698380
- config_name: est_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 700820
num_examples: 1960
download_size: 151874
dataset_size: 700820
- config_name: eus_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 737156
num_examples: 1960
download_size: 153547
dataset_size: 737156
- config_name: ewe_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 744356
num_examples: 1960
download_size: 149745
dataset_size: 744356
- config_name: fao_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 744368
num_examples: 1960
download_size: 157749
dataset_size: 744368
- config_name: fij_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 782172
num_examples: 1960
download_size: 153797
dataset_size: 782172
- config_name: fin_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 748816
num_examples: 1960
download_size: 161378
dataset_size: 748816
- config_name: fon_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 836840
num_examples: 1960
download_size: 171024
dataset_size: 836840
- config_name: fra_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 814600
num_examples: 1960
download_size: 171500
dataset_size: 814600
- config_name: fur_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 764148
num_examples: 1960
download_size: 162824
dataset_size: 764148
- config_name: fuv_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 662844
num_examples: 1960
download_size: 136688
dataset_size: 662844
- config_name: gaz_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 794228
num_examples: 1960
download_size: 162934
dataset_size: 794228
- config_name: gla_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 844272
num_examples: 1960
download_size: 173005
dataset_size: 844272
- config_name: gle_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 810588
num_examples: 1960
download_size: 170217
dataset_size: 810588
- config_name: glg_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 767380
num_examples: 1960
download_size: 163207
dataset_size: 767380
- config_name: grn_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 735256
num_examples: 1960
download_size: 151569
dataset_size: 735256
- config_name: guj_Gujr
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1456700
num_examples: 1960
download_size: 264326
dataset_size: 1456700
- config_name: hat_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 669252
num_examples: 1960
download_size: 137607
dataset_size: 669252
- config_name: hau_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 728948
num_examples: 1960
download_size: 151821
dataset_size: 728948
- config_name: heb_Hebr
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 898592
num_examples: 1960
download_size: 183423
dataset_size: 898592
- config_name: hin_Deva
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1489400
num_examples: 1960
download_size: 264357
dataset_size: 1489400
- config_name: hne_Deva
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1458188
num_examples: 1960
download_size: 265729
dataset_size: 1458188
- config_name: hrv_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 705080
num_examples: 1960
download_size: 154712
dataset_size: 705080
- config_name: hun_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 765388
num_examples: 1960
download_size: 165981
dataset_size: 765388
- config_name: hye_Armn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1232044
num_examples: 1960
download_size: 245677
dataset_size: 1232044
- config_name: ibo_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 786000
num_examples: 1960
download_size: 155336
dataset_size: 786000
- config_name: ilo_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 808768
num_examples: 1960
download_size: 164426
dataset_size: 808768
- config_name: ind_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 742516
num_examples: 1960
download_size: 150926
dataset_size: 742516
- config_name: isl_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 741296
num_examples: 1960
download_size: 156842
dataset_size: 741296
- config_name: ita_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 792352
num_examples: 1960
download_size: 169519
dataset_size: 792352
- config_name: jav_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 726704
num_examples: 1960
download_size: 149329
dataset_size: 726704
- config_name: jpn_Jpan
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 832156
num_examples: 1960
download_size: 169196
dataset_size: 832156
- config_name: kab_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 724440
num_examples: 1960
download_size: 154795
dataset_size: 724440
- config_name: kac_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 836852
num_examples: 1960
download_size: 159561
dataset_size: 836852
- config_name: kam_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 696496
num_examples: 1960
download_size: 145100
dataset_size: 696496
- config_name: kan_Knda
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1646088
num_examples: 1960
download_size: 295938
dataset_size: 1646088
- config_name: kas_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1063972
num_examples: 1960
download_size: 226199
dataset_size: 1063972
- config_name: kas_Deva
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1430736
num_examples: 1960
download_size: 273878
dataset_size: 1430736
- config_name: kat_Geor
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1704844
num_examples: 1960
download_size: 298620
dataset_size: 1704844
- config_name: kaz_Cyrl
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1136588
num_examples: 1960
download_size: 217835
dataset_size: 1136588
- config_name: kbp_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 887544
num_examples: 1960
download_size: 168660
dataset_size: 887544
- config_name: kea_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 705548
num_examples: 1960
download_size: 148827
dataset_size: 705548
- config_name: khk_Cyrl
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1178724
num_examples: 1960
download_size: 228508
dataset_size: 1178724
- config_name: khm_Khmr
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1912456
num_examples: 1960
download_size: 357542
dataset_size: 1912456
- config_name: kik_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 833580
num_examples: 1960
download_size: 167558
dataset_size: 833580
- config_name: kin_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 759244
num_examples: 1960
download_size: 156605
dataset_size: 759244
- config_name: kir_Cyrl
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1149124
num_examples: 1960
download_size: 227032
dataset_size: 1149124
- config_name: kmb_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 749048
num_examples: 1960
download_size: 152348
dataset_size: 749048
- config_name: kmr_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 743780
num_examples: 1960
download_size: 158304
dataset_size: 743780
- config_name: knc_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1010240
num_examples: 1960
download_size: 210047
dataset_size: 1010240
- config_name: knc_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 758716
num_examples: 1960
download_size: 160393
dataset_size: 758716
- config_name: kon_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 753800
num_examples: 1960
download_size: 146314
dataset_size: 753800
- config_name: kor_Hang
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 803620
num_examples: 1960
download_size: 167395
dataset_size: 803620
- config_name: lao_Laoo
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1585268
num_examples: 1960
download_size: 294507
dataset_size: 1585268
- config_name: lij_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 780220
num_examples: 1960
download_size: 167799
dataset_size: 780220
- config_name: lim_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 732872
num_examples: 1960
download_size: 159050
dataset_size: 732872
- config_name: lin_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 724560
num_examples: 1960
download_size: 141943
dataset_size: 724560
- config_name: lit_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 730112
num_examples: 1960
download_size: 160423
dataset_size: 730112
- config_name: lmo_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 779192
num_examples: 1960
download_size: 171339
dataset_size: 779192
- config_name: ltg_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 717416
num_examples: 1960
download_size: 156631
dataset_size: 717416
- config_name: ltz_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 775752
num_examples: 1960
download_size: 166352
dataset_size: 775752
- config_name: lua_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 731256
num_examples: 1960
download_size: 149335
dataset_size: 731256
- config_name: lug_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 709248
num_examples: 1960
download_size: 153350
dataset_size: 709248
- config_name: luo_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 717120
num_examples: 1960
download_size: 148806
dataset_size: 717120
- config_name: lus_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 728488
num_examples: 1960
download_size: 151383
dataset_size: 728488
- config_name: lvs_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 761896
num_examples: 1960
download_size: 164249
dataset_size: 761896
- config_name: mag_Deva
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1459764
num_examples: 1960
download_size: 262714
dataset_size: 1459764
- config_name: mai_Deva
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1484508
num_examples: 1960
download_size: 265544
dataset_size: 1484508
- config_name: mal_Mlym
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1787116
num_examples: 1960
download_size: 322558
dataset_size: 1787116
- config_name: mar_Deva
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1565660
num_examples: 1960
download_size: 285330
dataset_size: 1565660
- config_name: min_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1082260
num_examples: 1960
download_size: 217460
dataset_size: 1082260
- config_name: min_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 734800
num_examples: 1960
download_size: 151623
dataset_size: 734800
- config_name: mkd_Cyrl
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1161168
num_examples: 1960
download_size: 228029
dataset_size: 1161168
- config_name: mlt_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 779948
num_examples: 1960
download_size: 167946
dataset_size: 779948
- config_name: mni_Beng
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1581492
num_examples: 1960
download_size: 277820
dataset_size: 1581492
- config_name: mos_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 714712
num_examples: 1960
download_size: 150292
dataset_size: 714712
- config_name: mri_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 760176
num_examples: 1960
download_size: 148368
dataset_size: 760176
- config_name: mya_Mymr
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1989504
num_examples: 1960
download_size: 328043
dataset_size: 1989504
- config_name: nld_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 744276
num_examples: 1960
download_size: 158377
dataset_size: 744276
- config_name: nno_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 709672
num_examples: 1960
download_size: 151468
dataset_size: 709672
- config_name: nob_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 711868
num_examples: 1960
download_size: 149886
dataset_size: 711868
- config_name: npi_Deva
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1490288
num_examples: 1960
download_size: 265963
dataset_size: 1490288
- config_name: nqo_Nkoo
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1392344
num_examples: 1960
download_size: 276520
dataset_size: 1392344
- config_name: nso_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 781256
num_examples: 1960
download_size: 158395
dataset_size: 781256
- config_name: nus_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 856340
num_examples: 1960
download_size: 166499
dataset_size: 856340
- config_name: nya_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 741120
num_examples: 1960
download_size: 151343
dataset_size: 741120
- config_name: oci_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 779128
num_examples: 1960
download_size: 165919
dataset_size: 779128
- config_name: ory_Orya
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1588432
num_examples: 1960
download_size: 285648
dataset_size: 1588432
- config_name: pag_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 695668
num_examples: 1960
download_size: 148677
dataset_size: 695668
- config_name: pan_Guru
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1512156
num_examples: 1960
download_size: 271864
dataset_size: 1512156
- config_name: pap_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 735964
num_examples: 1960
download_size: 153532
dataset_size: 735964
- config_name: pbt_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1039728
num_examples: 1960
download_size: 212043
dataset_size: 1039728
- config_name: pes_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1055968
num_examples: 1960
download_size: 213401
dataset_size: 1055968
- config_name: plt_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 826544
num_examples: 1960
download_size: 162798
dataset_size: 826544
- config_name: pol_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 749756
num_examples: 1960
download_size: 164476
dataset_size: 749756
- config_name: por_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 761608
num_examples: 1960
download_size: 163118
dataset_size: 761608
- config_name: prs_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1008816
num_examples: 1960
download_size: 203371
dataset_size: 1008816
- config_name: quy_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 734040
num_examples: 1960
download_size: 145991
dataset_size: 734040
- config_name: ron_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 792752
num_examples: 1960
download_size: 169705
dataset_size: 792752
- config_name: run_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 747312
num_examples: 1960
download_size: 158000
dataset_size: 747312
- config_name: rus_Cyrl
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1236040
num_examples: 1960
download_size: 254866
dataset_size: 1236040
- config_name: sag_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 743300
num_examples: 1960
download_size: 144024
dataset_size: 743300
- config_name: san_Deva
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1536732
num_examples: 1960
download_size: 278216
dataset_size: 1536732
- config_name: sat_Olck
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1596700
num_examples: 1960
download_size: 286126
dataset_size: 1596700
- config_name: scn_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 749484
num_examples: 1960
download_size: 161449
dataset_size: 749484
- config_name: shn_Mymr
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 2195980
num_examples: 1960
download_size: 383034
dataset_size: 2195980
- config_name: sin_Sinh
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1540944
num_examples: 1960
download_size: 294040
dataset_size: 1540944
- config_name: slk_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 740524
num_examples: 1960
download_size: 164597
dataset_size: 740524
- config_name: slv_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 713108
num_examples: 1960
download_size: 157166
dataset_size: 713108
- config_name: smo_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 814204
num_examples: 1960
download_size: 161784
dataset_size: 814204
- config_name: sna_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 752716
num_examples: 1960
download_size: 156360
dataset_size: 752716
- config_name: snd_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1004556
num_examples: 1960
download_size: 201835
dataset_size: 1004556
- config_name: som_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 761544
num_examples: 1960
download_size: 163285
dataset_size: 761544
- config_name: sot_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 800296
num_examples: 1960
download_size: 162730
dataset_size: 800296
- config_name: spa_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 810144
num_examples: 1960
download_size: 172948
dataset_size: 810144
- config_name: srd_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 789464
num_examples: 1960
download_size: 165678
dataset_size: 789464
- config_name: srp_Cyrl
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1102152
num_examples: 1960
download_size: 222368
dataset_size: 1102152
- config_name: ssw_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 750336
num_examples: 1960
download_size: 156423
dataset_size: 750336
- config_name: sun_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 715600
num_examples: 1960
download_size: 148586
dataset_size: 715600
- config_name: swe_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 719440
num_examples: 1960
download_size: 153817
dataset_size: 719440
- config_name: swh_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 723128
num_examples: 1960
download_size: 148481
dataset_size: 723128
- config_name: szl_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 747608
num_examples: 1960
download_size: 166121
dataset_size: 747608
- config_name: tam_Taml
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1795220
num_examples: 1960
download_size: 314048
dataset_size: 1795220
- config_name: taq_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 680408
num_examples: 1960
download_size: 146720
dataset_size: 680408
- config_name: taq_Tfng
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1307816
num_examples: 1960
download_size: 248786
dataset_size: 1307816
- config_name: tat_Cyrl
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1125032
num_examples: 1960
download_size: 218726
dataset_size: 1125032
- config_name: tel_Telu
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1538136
num_examples: 1960
download_size: 278084
dataset_size: 1538136
- config_name: tgk_Cyrl
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1211792
num_examples: 1960
download_size: 241441
dataset_size: 1211792
- config_name: tgl_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 831360
num_examples: 1960
download_size: 169302
dataset_size: 831360
- config_name: tha_Thai
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1612188
num_examples: 1960
download_size: 298993
dataset_size: 1612188
- config_name: tir_Ethi
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1073600
num_examples: 1960
download_size: 222514
dataset_size: 1073600
- config_name: tpi_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 834612
num_examples: 1960
download_size: 154116
dataset_size: 834612
- config_name: tsn_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 822916
num_examples: 1960
download_size: 167984
dataset_size: 822916
- config_name: tso_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 793344
num_examples: 1960
download_size: 160191
dataset_size: 793344
- config_name: tuk_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 783392
num_examples: 1960
download_size: 164474
dataset_size: 783392
- config_name: tum_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 845992
num_examples: 1960
download_size: 167105
dataset_size: 845992
- config_name: tur_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 755408
num_examples: 1960
download_size: 158628
dataset_size: 755408
- config_name: twi_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 719196
num_examples: 1960
download_size: 146514
dataset_size: 719196
- config_name: tzm_Tfng
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1345860
num_examples: 1960
download_size: 263415
dataset_size: 1345860
- config_name: uig_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1202512
num_examples: 1960
download_size: 238597
dataset_size: 1202512
- config_name: ukr_Cyrl
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1144860
num_examples: 1960
download_size: 234391
dataset_size: 1144860
- config_name: umb_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 707564
num_examples: 1960
download_size: 141986
dataset_size: 707564
- config_name: urd_Arab
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1088812
num_examples: 1960
download_size: 220415
dataset_size: 1088812
- config_name: uzn_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 755612
num_examples: 1960
download_size: 157937
dataset_size: 755612
- config_name: vec_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 718392
num_examples: 1960
download_size: 154944
dataset_size: 718392
- config_name: vie_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 902844
num_examples: 1960
download_size: 178608
dataset_size: 902844
- config_name: war_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 816400
num_examples: 1960
download_size: 166068
dataset_size: 816400
- config_name: wol_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 674712
num_examples: 1960
download_size: 143873
dataset_size: 674712
- config_name: xho_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 724884
num_examples: 1960
download_size: 155723
dataset_size: 724884
- config_name: ydd_Hebr
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 1161328
num_examples: 1960
download_size: 232398
dataset_size: 1161328
- config_name: yor_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 830608
num_examples: 1960
download_size: 169276
dataset_size: 830608
- config_name: yue_Hant
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 627536
num_examples: 1960
download_size: 133832
dataset_size: 627536
- config_name: zho_Hans
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 658824
num_examples: 1960
download_size: 145682
dataset_size: 658824
- config_name: zho_Hant
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 640192
num_examples: 1960
download_size: 139630
dataset_size: 640192
- config_name: zsm_Latn
features:
- name: index
dtype: int64
- name: index_id
dtype: int64
- name: category
dtype: string
- name: text
dtype: string
- name: falsecat
dtype: string
- name: statement
dtype: string
- name: label
dtype: string
splits:
- name: train
num_bytes: 752004
num_examples: 1960
download_size: 153701
dataset_size: 752004
configs:
- config_name: ace_Arab
data_files:
- split: train
path: ace_Arab/train-*
- config_name: ace_Latn
data_files:
- split: train
path: ace_Latn/train-*
- config_name: acm_Arab
data_files:
- split: train
path: acm_Arab/train-*
- config_name: acq_Arab
data_files:
- split: train
path: acq_Arab/train-*
- config_name: aeb_Arab
data_files:
- split: train
path: aeb_Arab/train-*
- config_name: afr_Latn
data_files:
- split: train
path: afr_Latn/train-*
- config_name: ajp_Arab
data_files:
- split: train
path: ajp_Arab/train-*
- config_name: aka_Latn
data_files:
- split: train
path: aka_Latn/train-*
- config_name: als_Latn
data_files:
- split: train
path: als_Latn/train-*
- config_name: amh_Ethi
data_files:
- split: train
path: amh_Ethi/train-*
- config_name: apc_Arab
data_files:
- split: train
path: apc_Arab/train-*
- config_name: arb_Arab
data_files:
- split: train
path: arb_Arab/train-*
- config_name: arb_Latn
data_files:
- split: train
path: arb_Latn/train-*
- config_name: ars_Arab
data_files:
- split: train
path: ars_Arab/train-*
- config_name: ary_Arab
data_files:
- split: train
path: ary_Arab/train-*
- config_name: arz_Arab
data_files:
- split: train
path: arz_Arab/train-*
- config_name: asm_Beng
data_files:
- split: train
path: asm_Beng/train-*
- config_name: ast_Latn
data_files:
- split: train
path: ast_Latn/train-*
- config_name: awa_Deva
data_files:
- split: train
path: awa_Deva/train-*
- config_name: ayr_Latn
data_files:
- split: train
path: ayr_Latn/train-*
- config_name: azb_Arab
data_files:
- split: train
path: azb_Arab/train-*
- config_name: azj_Latn
data_files:
- split: train
path: azj_Latn/train-*
- config_name: bak_Cyrl
data_files:
- split: train
path: bak_Cyrl/train-*
- config_name: bam_Latn
data_files:
- split: train
path: bam_Latn/train-*
- config_name: ban_Latn
data_files:
- split: train
path: ban_Latn/train-*
- config_name: bel_Cyrl
data_files:
- split: train
path: bel_Cyrl/train-*
- config_name: bem_Latn
data_files:
- split: train
path: bem_Latn/train-*
- config_name: ben_Beng
data_files:
- split: train
path: ben_Beng/train-*
- config_name: bho_Deva
data_files:
- split: train
path: bho_Deva/train-*
- config_name: bjn_Arab
data_files:
- split: train
path: bjn_Arab/train-*
- config_name: bjn_Latn
data_files:
- split: train
path: bjn_Latn/train-*
- config_name: bod_Tibt
data_files:
- split: train
path: bod_Tibt/train-*
- config_name: bos_Latn
data_files:
- split: train
path: bos_Latn/train-*
- config_name: bug_Latn
data_files:
- split: train
path: bug_Latn/train-*
- config_name: bul_Cyrl
data_files:
- split: train
path: bul_Cyrl/train-*
- config_name: cat_Latn
data_files:
- split: train
path: cat_Latn/train-*
- config_name: ceb_Latn
data_files:
- split: train
path: ceb_Latn/train-*
- config_name: ces_Latn
data_files:
- split: train
path: ces_Latn/train-*
- config_name: cjk_Latn
data_files:
- split: train
path: cjk_Latn/train-*
- config_name: ckb_Arab
data_files:
- split: train
path: ckb_Arab/train-*
- config_name: crh_Latn
data_files:
- split: train
path: crh_Latn/train-*
- config_name: cym_Latn
data_files:
- split: train
path: cym_Latn/train-*
- config_name: dan_Latn
data_files:
- split: train
path: dan_Latn/train-*
- config_name: deu_Latn
data_files:
- split: train
path: deu_Latn/train-*
- config_name: dik_Latn
data_files:
- split: train
path: dik_Latn/train-*
- config_name: dyu_Latn
data_files:
- split: train
path: dyu_Latn/train-*
- config_name: dzo_Tibt
data_files:
- split: train
path: dzo_Tibt/train-*
- config_name: ell_Grek
data_files:
- split: train
path: ell_Grek/train-*
- config_name: eng_Latn
data_files:
- split: train
path: eng_Latn/train-*
- config_name: epo_Latn
data_files:
- split: train
path: epo_Latn/train-*
- config_name: est_Latn
data_files:
- split: train
path: est_Latn/train-*
- config_name: eus_Latn
data_files:
- split: train
path: eus_Latn/train-*
- config_name: ewe_Latn
data_files:
- split: train
path: ewe_Latn/train-*
- config_name: fao_Latn
data_files:
- split: train
path: fao_Latn/train-*
- config_name: fij_Latn
data_files:
- split: train
path: fij_Latn/train-*
- config_name: fin_Latn
data_files:
- split: train
path: fin_Latn/train-*
- config_name: fon_Latn
data_files:
- split: train
path: fon_Latn/train-*
- config_name: fra_Latn
data_files:
- split: train
path: fra_Latn/train-*
- config_name: fur_Latn
data_files:
- split: train
path: fur_Latn/train-*
- config_name: fuv_Latn
data_files:
- split: train
path: fuv_Latn/train-*
- config_name: gaz_Latn
data_files:
- split: train
path: gaz_Latn/train-*
- config_name: gla_Latn
data_files:
- split: train
path: gla_Latn/train-*
- config_name: gle_Latn
data_files:
- split: train
path: gle_Latn/train-*
- config_name: glg_Latn
data_files:
- split: train
path: glg_Latn/train-*
- config_name: grn_Latn
data_files:
- split: train
path: grn_Latn/train-*
- config_name: guj_Gujr
data_files:
- split: train
path: guj_Gujr/train-*
- config_name: hat_Latn
data_files:
- split: train
path: hat_Latn/train-*
- config_name: hau_Latn
data_files:
- split: train
path: hau_Latn/train-*
- config_name: heb_Hebr
data_files:
- split: train
path: heb_Hebr/train-*
- config_name: hin_Deva
data_files:
- split: train
path: hin_Deva/train-*
- config_name: hne_Deva
data_files:
- split: train
path: hne_Deva/train-*
- config_name: hrv_Latn
data_files:
- split: train
path: hrv_Latn/train-*
- config_name: hun_Latn
data_files:
- split: train
path: hun_Latn/train-*
- config_name: hye_Armn
data_files:
- split: train
path: hye_Armn/train-*
- config_name: ibo_Latn
data_files:
- split: train
path: ibo_Latn/train-*
- config_name: ilo_Latn
data_files:
- split: train
path: ilo_Latn/train-*
- config_name: ind_Latn
data_files:
- split: train
path: ind_Latn/train-*
- config_name: isl_Latn
data_files:
- split: train
path: isl_Latn/train-*
- config_name: ita_Latn
data_files:
- split: train
path: ita_Latn/train-*
- config_name: jav_Latn
data_files:
- split: train
path: jav_Latn/train-*
- config_name: jpn_Jpan
data_files:
- split: train
path: jpn_Jpan/train-*
- config_name: kab_Latn
data_files:
- split: train
path: kab_Latn/train-*
- config_name: kac_Latn
data_files:
- split: train
path: kac_Latn/train-*
- config_name: kam_Latn
data_files:
- split: train
path: kam_Latn/train-*
- config_name: kan_Knda
data_files:
- split: train
path: kan_Knda/train-*
- config_name: kas_Arab
data_files:
- split: train
path: kas_Arab/train-*
- config_name: kas_Deva
data_files:
- split: train
path: kas_Deva/train-*
- config_name: kat_Geor
data_files:
- split: train
path: kat_Geor/train-*
- config_name: kaz_Cyrl
data_files:
- split: train
path: kaz_Cyrl/train-*
- config_name: kbp_Latn
data_files:
- split: train
path: kbp_Latn/train-*
- config_name: kea_Latn
data_files:
- split: train
path: kea_Latn/train-*
- config_name: khk_Cyrl
data_files:
- split: train
path: khk_Cyrl/train-*
- config_name: khm_Khmr
data_files:
- split: train
path: khm_Khmr/train-*
- config_name: kik_Latn
data_files:
- split: train
path: kik_Latn/train-*
- config_name: kin_Latn
data_files:
- split: train
path: kin_Latn/train-*
- config_name: kir_Cyrl
data_files:
- split: train
path: kir_Cyrl/train-*
- config_name: kmb_Latn
data_files:
- split: train
path: kmb_Latn/train-*
- config_name: kmr_Latn
data_files:
- split: train
path: kmr_Latn/train-*
- config_name: knc_Arab
data_files:
- split: train
path: knc_Arab/train-*
- config_name: knc_Latn
data_files:
- split: train
path: knc_Latn/train-*
- config_name: kon_Latn
data_files:
- split: train
path: kon_Latn/train-*
- config_name: kor_Hang
data_files:
- split: train
path: kor_Hang/train-*
- config_name: lao_Laoo
data_files:
- split: train
path: lao_Laoo/train-*
- config_name: lij_Latn
data_files:
- split: train
path: lij_Latn/train-*
- config_name: lim_Latn
data_files:
- split: train
path: lim_Latn/train-*
- config_name: lin_Latn
data_files:
- split: train
path: lin_Latn/train-*
- config_name: lit_Latn
data_files:
- split: train
path: lit_Latn/train-*
- config_name: lmo_Latn
data_files:
- split: train
path: lmo_Latn/train-*
- config_name: ltg_Latn
data_files:
- split: train
path: ltg_Latn/train-*
- config_name: ltz_Latn
data_files:
- split: train
path: ltz_Latn/train-*
- config_name: lua_Latn
data_files:
- split: train
path: lua_Latn/train-*
- config_name: lug_Latn
data_files:
- split: train
path: lug_Latn/train-*
- config_name: luo_Latn
data_files:
- split: train
path: luo_Latn/train-*
- config_name: lus_Latn
data_files:
- split: train
path: lus_Latn/train-*
- config_name: lvs_Latn
data_files:
- split: train
path: lvs_Latn/train-*
- config_name: mag_Deva
data_files:
- split: train
path: mag_Deva/train-*
- config_name: mai_Deva
data_files:
- split: train
path: mai_Deva/train-*
- config_name: mal_Mlym
data_files:
- split: train
path: mal_Mlym/train-*
- config_name: mar_Deva
data_files:
- split: train
path: mar_Deva/train-*
- config_name: min_Arab
data_files:
- split: train
path: min_Arab/train-*
- config_name: min_Latn
data_files:
- split: train
path: min_Latn/train-*
- config_name: mkd_Cyrl
data_files:
- split: train
path: mkd_Cyrl/train-*
- config_name: mlt_Latn
data_files:
- split: train
path: mlt_Latn/train-*
- config_name: mni_Beng
data_files:
- split: train
path: mni_Beng/train-*
- config_name: mos_Latn
data_files:
- split: train
path: mos_Latn/train-*
- config_name: mri_Latn
data_files:
- split: train
path: mri_Latn/train-*
- config_name: mya_Mymr
data_files:
- split: train
path: mya_Mymr/train-*
- config_name: nld_Latn
data_files:
- split: train
path: nld_Latn/train-*
- config_name: nno_Latn
data_files:
- split: train
path: nno_Latn/train-*
- config_name: nob_Latn
data_files:
- split: train
path: nob_Latn/train-*
- config_name: npi_Deva
data_files:
- split: train
path: npi_Deva/train-*
- config_name: nqo_Nkoo
data_files:
- split: train
path: nqo_Nkoo/train-*
- config_name: nso_Latn
data_files:
- split: train
path: nso_Latn/train-*
- config_name: nus_Latn
data_files:
- split: train
path: nus_Latn/train-*
- config_name: nya_Latn
data_files:
- split: train
path: nya_Latn/train-*
- config_name: oci_Latn
data_files:
- split: train
path: oci_Latn/train-*
- config_name: ory_Orya
data_files:
- split: train
path: ory_Orya/train-*
- config_name: pag_Latn
data_files:
- split: train
path: pag_Latn/train-*
- config_name: pan_Guru
data_files:
- split: train
path: pan_Guru/train-*
- config_name: pap_Latn
data_files:
- split: train
path: pap_Latn/train-*
- config_name: pbt_Arab
data_files:
- split: train
path: pbt_Arab/train-*
- config_name: pes_Arab
data_files:
- split: train
path: pes_Arab/train-*
- config_name: plt_Latn
data_files:
- split: train
path: plt_Latn/train-*
- config_name: pol_Latn
data_files:
- split: train
path: pol_Latn/train-*
- config_name: por_Latn
data_files:
- split: train
path: por_Latn/train-*
- config_name: prs_Arab
data_files:
- split: train
path: prs_Arab/train-*
- config_name: quy_Latn
data_files:
- split: train
path: quy_Latn/train-*
- config_name: ron_Latn
data_files:
- split: train
path: ron_Latn/train-*
- config_name: run_Latn
data_files:
- split: train
path: run_Latn/train-*
- config_name: rus_Cyrl
data_files:
- split: train
path: rus_Cyrl/train-*
- config_name: sag_Latn
data_files:
- split: train
path: sag_Latn/train-*
- config_name: san_Deva
data_files:
- split: train
path: san_Deva/train-*
- config_name: sat_Olck
data_files:
- split: train
path: sat_Olck/train-*
- config_name: scn_Latn
data_files:
- split: train
path: scn_Latn/train-*
- config_name: shn_Mymr
data_files:
- split: train
path: shn_Mymr/train-*
- config_name: sin_Sinh
data_files:
- split: train
path: sin_Sinh/train-*
- config_name: slk_Latn
data_files:
- split: train
path: slk_Latn/train-*
- config_name: slv_Latn
data_files:
- split: train
path: slv_Latn/train-*
- config_name: smo_Latn
data_files:
- split: train
path: smo_Latn/train-*
- config_name: sna_Latn
data_files:
- split: train
path: sna_Latn/train-*
- config_name: snd_Arab
data_files:
- split: train
path: snd_Arab/train-*
- config_name: som_Latn
data_files:
- split: train
path: som_Latn/train-*
- config_name: sot_Latn
data_files:
- split: train
path: sot_Latn/train-*
- config_name: spa_Latn
data_files:
- split: train
path: spa_Latn/train-*
- config_name: srd_Latn
data_files:
- split: train
path: srd_Latn/train-*
- config_name: srp_Cyrl
data_files:
- split: train
path: srp_Cyrl/train-*
- config_name: ssw_Latn
data_files:
- split: train
path: ssw_Latn/train-*
- config_name: sun_Latn
data_files:
- split: train
path: sun_Latn/train-*
- config_name: swe_Latn
data_files:
- split: train
path: swe_Latn/train-*
- config_name: swh_Latn
data_files:
- split: train
path: swh_Latn/train-*
- config_name: szl_Latn
data_files:
- split: train
path: szl_Latn/train-*
- config_name: tam_Taml
data_files:
- split: train
path: tam_Taml/train-*
- config_name: taq_Latn
data_files:
- split: train
path: taq_Latn/train-*
- config_name: taq_Tfng
data_files:
- split: train
path: taq_Tfng/train-*
- config_name: tat_Cyrl
data_files:
- split: train
path: tat_Cyrl/train-*
- config_name: tel_Telu
data_files:
- split: train
path: tel_Telu/train-*
- config_name: tgk_Cyrl
data_files:
- split: train
path: tgk_Cyrl/train-*
- config_name: tgl_Latn
data_files:
- split: train
path: tgl_Latn/train-*
- config_name: tha_Thai
data_files:
- split: train
path: tha_Thai/train-*
- config_name: tir_Ethi
data_files:
- split: train
path: tir_Ethi/train-*
- config_name: tpi_Latn
data_files:
- split: train
path: tpi_Latn/train-*
- config_name: tsn_Latn
data_files:
- split: train
path: tsn_Latn/train-*
- config_name: tso_Latn
data_files:
- split: train
path: tso_Latn/train-*
- config_name: tuk_Latn
data_files:
- split: train
path: tuk_Latn/train-*
- config_name: tum_Latn
data_files:
- split: train
path: tum_Latn/train-*
- config_name: tur_Latn
data_files:
- split: train
path: tur_Latn/train-*
- config_name: twi_Latn
data_files:
- split: train
path: twi_Latn/train-*
- config_name: tzm_Tfng
data_files:
- split: train
path: tzm_Tfng/train-*
- config_name: uig_Arab
data_files:
- split: train
path: uig_Arab/train-*
- config_name: ukr_Cyrl
data_files:
- split: train
path: ukr_Cyrl/train-*
- config_name: umb_Latn
data_files:
- split: train
path: umb_Latn/train-*
- config_name: urd_Arab
data_files:
- split: train
path: urd_Arab/train-*
- config_name: uzn_Latn
data_files:
- split: train
path: uzn_Latn/train-*
- config_name: vec_Latn
data_files:
- split: train
path: vec_Latn/train-*
- config_name: vie_Latn
data_files:
- split: train
path: vie_Latn/train-*
- config_name: war_Latn
data_files:
- split: train
path: war_Latn/train-*
- config_name: wol_Latn
data_files:
- split: train
path: wol_Latn/train-*
- config_name: xho_Latn
data_files:
- split: train
path: xho_Latn/train-*
- config_name: ydd_Hebr
data_files:
- split: train
path: ydd_Hebr/train-*
- config_name: yor_Latn
data_files:
- split: train
path: yor_Latn/train-*
- config_name: yue_Hant
data_files:
- split: train
path: yue_Hant/train-*
- config_name: zho_Hans
data_files:
- split: train
path: zho_Hans/train-*
- config_name: zho_Hant
data_files:
- split: train
path: zho_Hant/train-*
- config_name: zsm_Latn
data_files:
- split: train
path: zsm_Latn/train-*
---
This dataset contains text data in multiple languages and dialects, each corresponding to a configuration. Each configuration has the same data structure and is primarily used for text classification tasks.
提供机构:
mbzuai-ugrip-statement-tuning
原始信息汇总
数据集概述
本数据集包含多个配置,每个配置对应不同的语言或书写系统。每个配置包含以下特征:
- index: 索引,数据类型为
int64。 - index_id: 索引ID,数据类型为
int64。 - category: 类别,数据类型为
string。 - text: 文本内容,数据类型为
string。 - falsecat: 错误类别,数据类型为
string。 - statement: 声明,数据类型为
string。 - label: 标签,数据类型为
string。
每个配置进一步细分为训练集(train),具体信息如下:
- num_bytes: 训练集的数据大小,单位为字节。
- num_examples: 训练集中的样本数量。
- download_size: 数据集的下载大小,单位为字节。
- dataset_size: 数据集的总大小,单位为字节。
配置详情
以下是各配置的具体信息:
| 配置名称 | 训练集大小(字节) | 样本数量 | 下载大小(字节) | 数据集大小(字节) |
|---|---|---|---|---|
| ace_Arab | 964916 | 1960 | 194507 | 964916 |
| ace_Latn | 742224 | 1960 | 154059 | 742224 |
| acm_Arab | 987824 | 1960 | 200669 | 987824 |
| acq_Arab | 1000292 | 1960 | 205002 | 1000292 |
| aeb_Arab | 980668 | 1960 | 202337 | 980668 |
| afr_Latn | 729044 | 1960 | 155117 | 729044 |
| ajp_Arab | 946380 | 1960 | 192036 | 946380 |
| aka_Latn | 753768 | 1960 | 153502 | 753768 |
| als_Latn | 800376 | 1960 | 168218 | 800376 |
| amh_Ethi | 1063804 | 1960 | 216648 | 1063804 |
| apc_Arab | 945292 | 1960 | 189482 | 945292 |
| arb_Arab | 1007608 | 1960 | 206048 | 1007608 |
| arb_Latn | 769684 | 1960 | 169815 | 769684 |
| ars_Arab | 1009876 | 1960 | 206545 | 1009876 |
| ary_Arab | 995948 | 1960 | 201872 | 995948 |
| arz_Arab | 985120 | 1960 | 201412 | 985120 |
| asm_Beng | 1478700 | 1960 | 272813 | 1478700 |
| ast_Latn | 722376 | 1960 | 156383 | 722376 |
| awa_Deva | 1483160 | 1960 | 265379 | 1483160 |
| ayr_Latn | 711872 | 1960 | 143939 | 711872 |
| azb_Arab | 1013908 | 1960 | 204841 | 1013908 |
| azj_Latn | 830680 | 1960 | 170931 | 830680 |
| bak_Cyrl | 1141576 | 1960 | 225657 | 1141576 |
| bam_Latn | 722108 | 1960 | 148492 | 722108 |
| ban_Latn | 748132 | 1960 | 152506 | 748132 |
| bel_Cyrl | 1275276 | 1960 | 267238 | 1275276 |
| bem_Latn | 805612 | 1960 | 166544 | 805612 |
| ben_Beng | 1540552 | 1960 | 276301 | 1540552 |
| bho_Deva | 1456064 | 1960 | 263544 | 1456064 |
| bjn_Arab | 1031324 | 1960 | 209905 | 1031324 |
| bjn_Latn | 718164 | 1960 | 147358 | 718164 |
| bod_Tibt | 1876316 | 1960 | 298797 | 1876316 |
| bos_Latn | 716272 | 1960 | 155889 | 716272 |
| bug_Latn | 750672 | 1960 | 159625 | 750672 |
| bul_Cyrl | 1158412 | 1960 | 232514 | 1158412 |
| cat_Latn | 761056 | 1960 | 163703 | 761056 |
| ceb_Latn | 794960 | 1960 | 161564 | 794960 |
| ces_Latn | 732404 | 1960 | 163620 | 732404 |
| cjk_Latn | 717900 | 1960 | 152541 | 717900 |
| ckb_Arab | 1093264 | 1960 | 217177 | 1093264 |
| crh_Latn | 752432 | 1960 | 157707 | 752432 |
| cym_Latn | 723124 | 1960 | 154238 | 723124 |
| dan_Latn | 721672 | 1960 | 154006 | 721672 |
| deu_Latn | 781696 | 1960 | 167949 | 781696 |
| dik_Latn | 681072 | 1960 | 144297 | 681072 |
| dyu_Latn | 748736 | 1960 | 157046 | 748736 |
| dzo_Tibt | 2076244 | 1960 | 326939 | 2076244 |
| ell_Grek | 1316372 | 1960 | 276831 | 1316372 |
| eng_Latn | 687176 | 1960 | 145426 | 687176 |
| epo_Latn | 数据未提供 | 1960 | 数据未提供 | 数据未提供 |



