five

Davlan/sib200

收藏
Hugging Face2024-02-19 更新2024-03-04 收录
下载链接:
https://hf-mirror.com/datasets/Davlan/sib200
下载链接
链接失效反馈
官方服务:
资源简介:
--- annotations_creators: - found language_creators: - expert-generated language: - ace - acm - acq - aeb - af - ajp - ak - als - am - apc - ar - ars - ary - arz - as - ast - awa - ayr - azb - azj - ba - bm - ban - be - bem - bn - bho - bjn - bo - bs - bug - bg - ca - ceb - cs - cjk - ckb - crh - cy - da - de - dik - dyu - dz - el - en - eo - et - eu - ee - fo - fj - fi - fon - fr - fur - fuv - gaz - gd - ga - gl - gn - gu - ht - ha - he - hi - hne - hr - hu - hy - ig - ilo - id - is - it - jv - ja - kab - kac - kam - kn - ks - ka - kk - kbp - kea - khk - km - ki - rw - ky - kmb - kmr - knc - kg - ko - lo - lij - li - ln - lt - lmo - ltg - lb - lua - lg - luo - lus - lvs - mag - mai - ml - mar - min - mk - mt - mni - mos - mi - my - nl - nn - nb - npi - nqo - nso - nus - ny - oc - ory - pag - pa - pap - pbt - pes - plt - pl - pt - prs - quy - ro - rn - ru - sg - sa - sat - scn - shn - si - sk - sl - sm - sn - sd - so - st - es - sc - sr - ss - su - sv - swh - szl - ta - taq - tt - te - tg - tl - th - ti - tpi - tn - ts - tk - tum - tr - tw - tzm - ug - uk - umb - ur - uzn - vec - vi - war - wo - xh - ydd - yo - yue - zh - zsm - zu license: - cc-by-sa-4.0 multilinguality: - multilingual pretty_name: sib200 language_details: ace_Arab, ace_Latn, acm_Arab, acq_Arab, aeb_Arab, afr_Latn, ajp_Arab, aka_Latn, amh_Ethi, apc_Arab, arb_Arab, ars_Arab, ary_Arab, arz_Arab, asm_Beng, ast_Latn, awa_Deva, ayr_Latn, azb_Arab, azj_Latn, bak_Cyrl, bam_Latn, ban_Latn,bel_Cyrl, bem_Latn, ben_Beng, bho_Deva, bjn_Arab, bjn_Latn, bod_Tibt, bos_Latn, bug_Latn, bul_Cyrl, cat_Latn, ceb_Latn, ces_Latn, cjk_Latn, ckb_Arab, crh_Latn, cym_Latn, dan_Latn, deu_Latn, dik_Latn, dyu_Latn, dzo_Tibt, ell_Grek, eng_Latn, epo_Latn, est_Latn, eus_Latn, ewe_Latn, fao_Latn, pes_Arab, fij_Latn, fin_Latn, fon_Latn, fra_Latn, fur_Latn, fuv_Latn, gla_Latn, gle_Latn, glg_Latn, grn_Latn, guj_Gujr, hat_Latn, hau_Latn, heb_Hebr, hin_Deva, hne_Deva, hrv_Latn, hun_Latn, hye_Armn, ibo_Latn, ilo_Latn, ind_Latn, isl_Latn, ita_Latn, jav_Latn, jpn_Jpan, kab_Latn, kac_Latn, kam_Latn, kan_Knda, kas_Arab, kas_Deva, kat_Geor, knc_Arab, knc_Latn, kaz_Cyrl, kbp_Latn, kea_Latn, khm_Khmr, kik_Latn, kin_Latn, kir_Cyrl, kmb_Latn, kon_Latn, kor_Hang, kmr_Latn, lao_Laoo, lvs_Latn, lij_Latn, lim_Latn, lin_Latn, lit_Latn, lmo_Latn, ltg_Latn, ltz_Latn, lua_Latn, lug_Latn, luo_Latn, lus_Latn, mag_Deva, mai_Deva, mal_Mlym, mar_Deva, min_Latn, mkd_Cyrl, plt_Latn, mlt_Latn, mni_Beng, khk_Cyrl, mos_Latn, mri_Latn, zsm_Latn, mya_Mymr, nld_Latn, nno_Latn, nob_Latn, npi_Deva, nso_Latn, nus_Latn, nya_Latn, oci_Latn, gaz_Latn, ory_Orya, pag_Latn, pan_Guru, pap_Latn, pol_Latn, por_Latn, prs_Arab, pbt_Arab, quy_Latn, ron_Latn, run_Latn, rus_Cyrl, sag_Latn, san_Deva, sat_Beng, scn_Latn, shn_Mymr, sin_Sinh, slk_Latn, slv_Latn, smo_Latn, sna_Latn, snd_Arab, som_Latn, sot_Latn, spa_Latn, als_Latn, srd_Latn, srp_Cyrl, ssw_Latn, sun_Latn, swe_Latn, swh_Latn, szl_Latn, tam_Taml, tat_Cyrl, tel_Telu, tgk_Cyrl, tgl_Latn, tha_Thai, tir_Ethi, taq_Latn, taq_Tfng, tpi_Latn, tsn_Latn, tso_Latn, tuk_Latn, tum_Latn, tur_Latn, twi_Latn, tzm_Tfng, uig_Arab, ukr_Cyrl, umb_Latn, urd_Arab, uzn_Latn, vec_Latn, vie_Latn, war_Latn, wol_Latn, xho_Latn, ydd_Hebr, yor_Latn, yue_Hant, zho_Hans, zho_Hant, zul_Latn size_categories: - 1K<n<10K source_datasets: - original tags: - news-topic - sib-200 - sib200 task_categories: - text-classification task_ids: - topic-classification configs: - config_name: ace_Arab data_files: - split: train path: data/ace_Arab/train.tsv - split: validation path: data/ace_Arab/dev.tsv - split: test path: data/ace_Arab/test.tsv - config_name: ace_Latn data_files: - split: train path: data/ace_Latn/train.tsv - split: validation path: data/ace_Latn/dev.tsv - split: test path: data/ace_Latn/test.tsv - config_name: acm_Arab data_files: - split: train path: data/acm_Arab/train.tsv - split: validation path: data/acm_Arab/dev.tsv - split: test path: data/acm_Arab/test.tsv - config_name: acq_Arab data_files: - split: train path: data/acq_Arab/train.tsv - split: validation path: data/acq_Arab/dev.tsv - split: test path: data/acq_Arab/test.tsv - config_name: aeb_Arab data_files: - split: train path: data/aeb_Arab/train.tsv - split: validation path: data/aeb_Arab/dev.tsv - split: test path: data/aeb_Arab/test.tsv - config_name: afr_Latn data_files: - split: train path: data/afr_Latn/train.tsv - split: validation path: data/afr_Latn/dev.tsv - split: test path: data/afr_Latn/test.tsv - config_name: ajp_Arab data_files: - split: train path: data/ajp_Arab/train.tsv - split: validation path: data/ajp_Arab/dev.tsv - split: test path: data/ajp_Arab/test.tsv - config_name: aka_Latn data_files: - split: train path: data/aka_Latn/train.tsv - split: validation path: data/aka_Latn/dev.tsv - split: test path: data/aka_Latn/test.tsv - config_name: als_Latn data_files: - split: train path: data/als_Latn/train.tsv - split: validation path: data/als_Latn/dev.tsv - split: test path: data/als_Latn/test.tsv - config_name: amh_Ethi data_files: - split: train path: data/amh_Ethi/train.tsv - split: validation path: data/amh_Ethi/dev.tsv - split: test path: data/amh_Ethi/test.tsv - config_name: apc_Arab data_files: - split: train path: data/apc_Arab/train.tsv - split: validation path: data/apc_Arab/dev.tsv - split: test path: data/apc_Arab/test.tsv - config_name: arb_Arab data_files: - split: train path: data/arb_Arab/train.tsv - split: validation path: data/arb_Arab/dev.tsv - split: test path: data/arb_Arab/test.tsv - config_name: arb_Latn data_files: - split: train path: data/arb_Latn/train.tsv - split: validation path: data/arb_Latn/dev.tsv - split: test path: data/arb_Latn/test.tsv - config_name: ars_Arab data_files: - split: train path: data/ars_Arab/train.tsv - split: validation path: data/ars_Arab/dev.tsv - split: test path: data/ars_Arab/test.tsv - config_name: ary_Arab data_files: - split: train path: data/ary_Arab/train.tsv - split: validation path: data/ary_Arab/dev.tsv - split: test path: data/ary_Arab/test.tsv - config_name: arz_Arab data_files: - split: train path: data/arz_Arab/train.tsv - split: validation path: data/arz_Arab/dev.tsv - split: test path: data/arz_Arab/test.tsv - config_name: asm_Beng data_files: - split: train path: data/asm_Beng/train.tsv - split: validation path: data/asm_Beng/dev.tsv - split: test path: data/asm_Beng/test.tsv - config_name: ast_Latn data_files: - split: train path: data/ast_Latn/train.tsv - split: validation path: data/ast_Latn/dev.tsv - split: test path: data/ast_Latn/test.tsv - config_name: awa_Deva data_files: - split: train path: data/awa_Deva/train.tsv - split: validation path: data/awa_Deva/dev.tsv - split: test path: data/awa_Deva/test.tsv - config_name: ayr_Latn data_files: - split: train path: data/ayr_Latn/train.tsv - split: validation path: data/ayr_Latn/dev.tsv - split: test path: data/ayr_Latn/test.tsv - config_name: azb_Arab data_files: - split: train path: data/azb_Arab/train.tsv - split: validation path: data/azb_Arab/dev.tsv - split: test path: data/azb_Arab/test.tsv - config_name: azj_Latn data_files: - split: train path: data/azj_Latn/train.tsv - split: validation path: data/azj_Latn/dev.tsv - split: test path: data/azj_Latn/test.tsv - config_name: bak_Cyrl data_files: - split: train path: data/bak_Cyrl/train.tsv - split: validation path: data/bak_Cyrl/dev.tsv - split: test path: data/bak_Cyrl/test.tsv - config_name: bam_Latn data_files: - split: train path: data/bam_Latn/train.tsv - split: validation path: data/bam_Latn/dev.tsv - split: test path: data/bam_Latn/test.tsv - config_name: ban_Latn data_files: - split: train path: data/ban_Latn/train.tsv - split: validation path: data/ban_Latn/dev.tsv - split: test path: data/ban_Latn/test.tsv - config_name: bel_Cyrl data_files: - split: train path: data/bel_Cyrl/train.tsv - split: validation path: data/bel_Cyrl/dev.tsv - split: test path: data/bel_Cyrl/test.tsv - config_name: bem_Latn data_files: - split: train path: data/bem_Latn/train.tsv - split: validation path: data/bem_Latn/dev.tsv - split: test path: data/bem_Latn/test.tsv - config_name: ben_Beng data_files: - split: train path: data/ben_Beng/train.tsv - split: validation path: data/ben_Beng/dev.tsv - split: test path: data/ben_Beng/test.tsv - config_name: bho_Deva data_files: - split: train path: data/bho_Deva/train.tsv - split: validation path: data/bho_Deva/dev.tsv - split: test path: data/bho_Deva/test.tsv - config_name: bjn_Arab data_files: - split: train path: data/bjn_Arab/train.tsv - split: validation path: data/bjn_Arab/dev.tsv - split: test path: data/bjn_Arab/test.tsv - config_name: bjn_Latn data_files: - split: train path: data/bjn_Latn/train.tsv - split: validation path: data/bjn_Latn/dev.tsv - split: test path: data/bjn_Latn/test.tsv - config_name: bod_Tibt data_files: - split: train path: data/bod_Tibt/train.tsv - split: validation path: data/bod_Tibt/dev.tsv - split: test path: data/bod_Tibt/test.tsv - config_name: bos_Latn data_files: - split: train path: data/bos_Latn/train.tsv - split: validation path: data/bos_Latn/dev.tsv - split: test path: data/bos_Latn/test.tsv - config_name: bug_Latn data_files: - split: train path: data/bug_Latn/train.tsv - split: validation path: data/bug_Latn/dev.tsv - split: test path: data/bug_Latn/test.tsv - config_name: bul_Cyrl data_files: - split: train path: data/bul_Cyrl/train.tsv - split: validation path: data/bul_Cyrl/dev.tsv - split: test path: data/bul_Cyrl/test.tsv - config_name: cat_Latn data_files: - split: train path: data/cat_Latn/train.tsv - split: validation path: data/cat_Latn/dev.tsv - split: test path: data/cat_Latn/test.tsv - config_name: ceb_Latn data_files: - split: train path: data/ceb_Latn/train.tsv - split: validation path: data/ceb_Latn/dev.tsv - split: test path: data/ceb_Latn/test.tsv - config_name: ces_Latn data_files: - split: train path: data/ces_Latn/train.tsv - split: validation path: data/ces_Latn/dev.tsv - split: test path: data/ces_Latn/test.tsv - config_name: cjk_Latn data_files: - split: train path: data/cjk_Latn/train.tsv - split: validation path: data/cjk_Latn/dev.tsv - split: test path: data/cjk_Latn/test.tsv - config_name: ckb_Arab data_files: - split: train path: data/ckb_Arab/train.tsv - split: validation path: data/ckb_Arab/dev.tsv - split: test path: data/ckb_Arab/test.tsv - config_name: crh_Latn data_files: - split: train path: data/crh_Latn/train.tsv - split: validation path: data/crh_Latn/dev.tsv - split: test path: data/crh_Latn/test.tsv - config_name: cym_Latn data_files: - split: train path: data/cym_Latn/train.tsv - split: validation path: data/cym_Latn/dev.tsv - split: test path: data/cym_Latn/test.tsv - config_name: dan_Latn data_files: - split: train path: data/dan_Latn/train.tsv - split: validation path: data/dan_Latn/dev.tsv - split: test path: data/dan_Latn/test.tsv - config_name: deu_Latn data_files: - split: train path: data/deu_Latn/train.tsv - split: validation path: data/deu_Latn/dev.tsv - split: test path: data/deu_Latn/test.tsv - config_name: dik_Latn data_files: - split: train path: data/dik_Latn/train.tsv - split: validation path: data/dik_Latn/dev.tsv - split: test path: data/dik_Latn/test.tsv - config_name: dyu_Latn data_files: - split: train path: data/dyu_Latn/train.tsv - split: validation path: data/dyu_Latn/dev.tsv - split: test path: data/dyu_Latn/test.tsv - config_name: dzo_Tibt data_files: - split: train path: data/dzo_Tibt/train.tsv - split: validation path: data/dzo_Tibt/dev.tsv - split: test path: data/dzo_Tibt/test.tsv - config_name: ell_Grek data_files: - split: train path: data/ell_Grek/train.tsv - split: validation path: data/ell_Grek/dev.tsv - split: test path: data/ell_Grek/test.tsv - config_name: eng_Latn data_files: - split: train path: data/eng_Latn/train.tsv - split: validation path: data/eng_Latn/dev.tsv - split: test path: data/eng_Latn/test.tsv - config_name: epo_Latn data_files: - split: train path: data/epo_Latn/train.tsv - split: validation path: data/epo_Latn/dev.tsv - split: test path: data/epo_Latn/test.tsv - config_name: est_Latn data_files: - split: train path: data/est_Latn/train.tsv - split: validation path: data/est_Latn/dev.tsv - split: test path: data/est_Latn/test.tsv - config_name: eus_Latn data_files: - split: train path: data/eus_Latn/train.tsv - split: validation path: data/eus_Latn/dev.tsv - split: test path: data/eus_Latn/test.tsv - config_name: ewe_Latn data_files: - split: train path: data/ewe_Latn/train.tsv - split: validation path: data/ewe_Latn/dev.tsv - split: test path: data/ewe_Latn/test.tsv - config_name: fao_Latn data_files: - split: train path: data/fao_Latn/train.tsv - split: validation path: data/fao_Latn/dev.tsv - split: test path: data/fao_Latn/test.tsv - config_name: fij_Latn data_files: - split: train path: data/fij_Latn/train.tsv - split: validation path: data/fij_Latn/dev.tsv - split: test path: data/fij_Latn/test.tsv - config_name: fin_Latn data_files: - split: train path: data/fin_Latn/train.tsv - split: validation path: data/fin_Latn/dev.tsv - split: test path: data/fin_Latn/test.tsv - config_name: fon_Latn data_files: - split: train path: data/fon_Latn/train.tsv - split: validation path: data/fon_Latn/dev.tsv - split: test path: data/fon_Latn/test.tsv - config_name: fra_Latn data_files: - split: train path: data/fra_Latn/train.tsv - split: validation path: data/fra_Latn/dev.tsv - split: test path: data/fra_Latn/test.tsv - config_name: fur_Latn data_files: - split: train path: data/fur_Latn/train.tsv - split: validation path: data/fur_Latn/dev.tsv - split: test path: data/fur_Latn/test.tsv - config_name: fuv_Latn data_files: - split: train path: data/fuv_Latn/train.tsv - split: validation path: data/fuv_Latn/dev.tsv - split: test path: data/fuv_Latn/test.tsv - config_name: gaz_Latn data_files: - split: train path: data/gaz_Latn/train.tsv - split: validation path: data/gaz_Latn/dev.tsv - split: test path: data/gaz_Latn/test.tsv - config_name: gla_Latn data_files: - split: train path: data/gla_Latn/train.tsv - split: validation path: data/gla_Latn/dev.tsv - split: test path: data/gla_Latn/test.tsv - config_name: gle_Latn data_files: - split: train path: data/gle_Latn/train.tsv - split: validation path: data/gle_Latn/dev.tsv - split: test path: data/gle_Latn/test.tsv - config_name: glg_Latn data_files: - split: train path: data/glg_Latn/train.tsv - split: validation path: data/glg_Latn/dev.tsv - split: test path: data/glg_Latn/test.tsv - config_name: grn_Latn data_files: - split: train path: data/grn_Latn/train.tsv - split: validation path: data/grn_Latn/dev.tsv - split: test path: data/grn_Latn/test.tsv - config_name: guj_Gujr data_files: - split: train path: data/guj_Gujr/train.tsv - split: validation path: data/guj_Gujr/dev.tsv - split: test path: data/guj_Gujr/test.tsv - config_name: hat_Latn data_files: - split: train path: data/hat_Latn/train.tsv - split: validation path: data/hat_Latn/dev.tsv - split: test path: data/hat_Latn/test.tsv - config_name: hau_Latn data_files: - split: train path: data/hau_Latn/train.tsv - split: validation path: data/hau_Latn/dev.tsv - split: test path: data/hau_Latn/test.tsv - config_name: heb_Hebr data_files: - split: train path: data/heb_Hebr/train.tsv - split: validation path: data/heb_Hebr/dev.tsv - split: test path: data/heb_Hebr/test.tsv - config_name: hin_Deva data_files: - split: train path: data/hin_Deva/train.tsv - split: validation path: data/hin_Deva/dev.tsv - split: test path: data/hin_Deva/test.tsv - config_name: hne_Deva data_files: - split: train path: data/hne_Deva/train.tsv - split: validation path: data/hne_Deva/dev.tsv - split: test path: data/hne_Deva/test.tsv - config_name: hrv_Latn data_files: - split: train path: data/hrv_Latn/train.tsv - split: validation path: data/hrv_Latn/dev.tsv - split: test path: data/hrv_Latn/test.tsv - config_name: hun_Latn data_files: - split: train path: data/hun_Latn/train.tsv - split: validation path: data/hun_Latn/dev.tsv - split: test path: data/hun_Latn/test.tsv - config_name: hye_Armn data_files: - split: train path: data/hye_Armn/train.tsv - split: validation path: data/hye_Armn/dev.tsv - split: test path: data/hye_Armn/test.tsv - config_name: ibo_Latn data_files: - split: train path: data/ibo_Latn/train.tsv - split: validation path: data/ibo_Latn/dev.tsv - split: test path: data/ibo_Latn/test.tsv - config_name: ilo_Latn data_files: - split: train path: data/ilo_Latn/train.tsv - split: validation path: data/ilo_Latn/dev.tsv - split: test path: data/ilo_Latn/test.tsv - config_name: ind_Latn data_files: - split: train path: data/ind_Latn/train.tsv - split: validation path: data/ind_Latn/dev.tsv - split: test path: data/ind_Latn/test.tsv - config_name: isl_Latn data_files: - split: train path: data/isl_Latn/train.tsv - split: validation path: data/isl_Latn/dev.tsv - split: test path: data/isl_Latn/test.tsv - config_name: ita_Latn data_files: - split: train path: data/ita_Latn/train.tsv - split: validation path: data/ita_Latn/dev.tsv - split: test path: data/ita_Latn/test.tsv - config_name: jav_Latn data_files: - split: train path: data/jav_Latn/train.tsv - split: validation path: data/jav_Latn/dev.tsv - split: test path: data/jav_Latn/test.tsv - config_name: jpn_Jpan data_files: - split: train path: data/jpn_Jpan/train.tsv - split: validation path: data/jpn_Jpan/dev.tsv - split: test path: data/jpn_Jpan/test.tsv - config_name: kab_Latn data_files: - split: train path: data/kab_Latn/train.tsv - split: validation path: data/kab_Latn/dev.tsv - split: test path: data/kab_Latn/test.tsv - config_name: kac_Latn data_files: - split: train path: data/kac_Latn/train.tsv - split: validation path: data/kac_Latn/dev.tsv - split: test path: data/kac_Latn/test.tsv - config_name: kam_Latn data_files: - split: train path: data/kam_Latn/train.tsv - split: validation path: data/kam_Latn/dev.tsv - split: test path: data/kam_Latn/test.tsv - config_name: kan_Knda data_files: - split: train path: data/kan_Knda/train.tsv - split: validation path: data/kan_Knda/dev.tsv - split: test path: data/kan_Knda/test.tsv - config_name: kas_Arab data_files: - split: train path: data/kas_Arab/train.tsv - split: validation path: data/kas_Arab/dev.tsv - split: test path: data/kas_Arab/test.tsv - config_name: kas_Deva data_files: - split: train path: data/kas_Deva/train.tsv - split: validation path: data/kas_Deva/dev.tsv - split: test path: data/kas_Deva/test.tsv - config_name: kat_Geor data_files: - split: train path: data/kat_Geor/train.tsv - split: validation path: data/kat_Geor/dev.tsv - split: test path: data/kat_Geor/test.tsv - config_name: kaz_Cyrl data_files: - split: train path: data/kaz_Cyrl/train.tsv - split: validation path: data/kaz_Cyrl/dev.tsv - split: test path: data/kaz_Cyrl/test.tsv - config_name: kbp_Latn data_files: - split: train path: data/kbp_Latn/train.tsv - split: validation path: data/kbp_Latn/dev.tsv - split: test path: data/kbp_Latn/test.tsv - config_name: kea_Latn data_files: - split: train path: data/kea_Latn/train.tsv - split: validation path: data/kea_Latn/dev.tsv - split: test path: data/kea_Latn/test.tsv - config_name: khk_Cyrl data_files: - split: train path: data/khk_Cyrl/train.tsv - split: validation path: data/khk_Cyrl/dev.tsv - split: test path: data/khk_Cyrl/test.tsv - config_name: khm_Khmr data_files: - split: train path: data/khm_Khmr/train.tsv - split: validation path: data/khm_Khmr/dev.tsv - split: test path: data/khm_Khmr/test.tsv - config_name: kik_Latn data_files: - split: train path: data/kik_Latn/train.tsv - split: validation path: data/kik_Latn/dev.tsv - split: test path: data/kik_Latn/test.tsv - config_name: kin_Latn data_files: - split: train path: data/kin_Latn/train.tsv - split: validation path: data/kin_Latn/dev.tsv - split: test path: data/kin_Latn/test.tsv - config_name: kir_Cyrl data_files: - split: train path: data/kir_Cyrl/train.tsv - split: validation path: data/kir_Cyrl/dev.tsv - split: test path: data/kir_Cyrl/test.tsv - config_name: kmb_Latn data_files: - split: train path: data/kmb_Latn/train.tsv - split: validation path: data/kmb_Latn/dev.tsv - split: test path: data/kmb_Latn/test.tsv - config_name: kmr_Latn data_files: - split: train path: data/kmr_Latn/train.tsv - split: validation path: data/kmr_Latn/dev.tsv - split: test path: data/kmr_Latn/test.tsv - config_name: knc_Arab data_files: - split: train path: data/knc_Arab/train.tsv - split: validation path: data/knc_Arab/dev.tsv - split: test path: data/knc_Arab/test.tsv - config_name: knc_Latn data_files: - split: train path: data/knc_Latn/train.tsv - split: validation path: data/knc_Latn/dev.tsv - split: test path: data/knc_Latn/test.tsv - config_name: kon_Latn data_files: - split: train path: data/kon_Latn/train.tsv - split: validation path: data/kon_Latn/dev.tsv - split: test path: data/kon_Latn/test.tsv - config_name: kor_Hang data_files: - split: train path: data/kor_Hang/train.tsv - split: validation path: data/kor_Hang/dev.tsv - split: test path: data/kor_Hang/test.tsv - config_name: lao_Laoo data_files: - split: train path: data/lao_Laoo/train.tsv - split: validation path: data/lao_Laoo/dev.tsv - split: test path: data/lao_Laoo/test.tsv - config_name: lij_Latn data_files: - split: train path: data/lij_Latn/train.tsv - split: validation path: data/lij_Latn/dev.tsv - split: test path: data/lij_Latn/test.tsv - config_name: lim_Latn data_files: - split: train path: data/lim_Latn/train.tsv - split: validation path: data/lim_Latn/dev.tsv - split: test path: data/lim_Latn/test.tsv - config_name: lin_Latn data_files: - split: train path: data/lin_Latn/train.tsv - split: validation path: data/lin_Latn/dev.tsv - split: test path: data/lin_Latn/test.tsv - config_name: lit_Latn data_files: - split: train path: data/lit_Latn/train.tsv - split: validation path: data/lit_Latn/dev.tsv - split: test path: data/lit_Latn/test.tsv - config_name: lmo_Latn data_files: - split: train path: data/lmo_Latn/train.tsv - split: validation path: data/lmo_Latn/dev.tsv - split: test path: data/lmo_Latn/test.tsv - config_name: ltg_Latn data_files: - split: train path: data/ltg_Latn/train.tsv - split: validation path: data/ltg_Latn/dev.tsv - split: test path: data/ltg_Latn/test.tsv - config_name: ltz_Latn data_files: - split: train path: data/ltz_Latn/train.tsv - split: validation path: data/ltz_Latn/dev.tsv - split: test path: data/ltz_Latn/test.tsv - config_name: lua_Latn data_files: - split: train path: data/lua_Latn/train.tsv - split: validation path: data/lua_Latn/dev.tsv - split: test path: data/lua_Latn/test.tsv - config_name: lug_Latn data_files: - split: train path: data/lug_Latn/train.tsv - split: validation path: data/lug_Latn/dev.tsv - split: test path: data/lug_Latn/test.tsv - config_name: luo_Latn data_files: - split: train path: data/luo_Latn/train.tsv - split: validation path: data/luo_Latn/dev.tsv - split: test path: data/luo_Latn/test.tsv - config_name: lus_Latn data_files: - split: train path: data/lus_Latn/train.tsv - split: validation path: data/lus_Latn/dev.tsv - split: test path: data/lus_Latn/test.tsv - config_name: lvs_Latn data_files: - split: train path: data/lvs_Latn/train.tsv - split: validation path: data/lvs_Latn/dev.tsv - split: test path: data/lvs_Latn/test.tsv - config_name: mag_Deva data_files: - split: train path: data/mag_Deva/train.tsv - split: validation path: data/mag_Deva/dev.tsv - split: test path: data/mag_Deva/test.tsv - config_name: mai_Deva data_files: - split: train path: data/mai_Deva/train.tsv - split: validation path: data/mai_Deva/dev.tsv - split: test path: data/mai_Deva/test.tsv - config_name: mal_Mlym data_files: - split: train path: data/mal_Mlym/train.tsv - split: validation path: data/mal_Mlym/dev.tsv - split: test path: data/mal_Mlym/test.tsv - config_name: mar_Deva data_files: - split: train path: data/mar_Deva/train.tsv - split: validation path: data/mar_Deva/dev.tsv - split: test path: data/mar_Deva/test.tsv - config_name: min_Arab data_files: - split: train path: data/min_Arab/train.tsv - split: validation path: data/min_Arab/dev.tsv - split: test path: data/min_Arab/test.tsv - config_name: min_Latn data_files: - split: train path: data/min_Latn/train.tsv - split: validation path: data/min_Latn/dev.tsv - split: test path: data/min_Latn/test.tsv - config_name: mkd_Cyrl data_files: - split: train path: data/mkd_Cyrl/train.tsv - split: validation path: data/mkd_Cyrl/dev.tsv - split: test path: data/mkd_Cyrl/test.tsv - config_name: mlt_Latn data_files: - split: train path: data/mlt_Latn/train.tsv - split: validation path: data/mlt_Latn/dev.tsv - split: test path: data/mlt_Latn/test.tsv - config_name: mni_Beng data_files: - split: train path: data/mni_Beng/train.tsv - split: validation path: data/mni_Beng/dev.tsv - split: test path: data/mni_Beng/test.tsv - config_name: mos_Latn data_files: - split: train path: data/mos_Latn/train.tsv - split: validation path: data/mos_Latn/dev.tsv - split: test path: data/mos_Latn/test.tsv - config_name: mri_Latn data_files: - split: train path: data/mri_Latn/train.tsv - split: validation path: data/mri_Latn/dev.tsv - split: test path: data/mri_Latn/test.tsv - config_name: mya_Mymr data_files: - split: train path: data/mya_Mymr/train.tsv - split: validation path: data/mya_Mymr/dev.tsv - split: test path: data/mya_Mymr/test.tsv - config_name: nld_Latn data_files: - split: train path: data/nld_Latn/train.tsv - split: validation path: data/nld_Latn/dev.tsv - split: test path: data/nld_Latn/test.tsv - config_name: nno_Latn data_files: - split: train path: data/nno_Latn/train.tsv - split: validation path: data/nno_Latn/dev.tsv - split: test path: data/nno_Latn/test.tsv - config_name: nob_Latn data_files: - split: train path: data/nob_Latn/train.tsv - split: validation path: data/nob_Latn/dev.tsv - split: test path: data/nob_Latn/test.tsv - config_name: npi_Deva data_files: - split: train path: data/npi_Deva/train.tsv - split: validation path: data/npi_Deva/dev.tsv - split: test path: data/npi_Deva/test.tsv - config_name: nqo_Nkoo data_files: - split: train path: data/nqo_Nkoo/train.tsv - split: validation path: data/nqo_Nkoo/dev.tsv - split: test path: data/nqo_Nkoo/test.tsv - config_name: nqo_Nkoo.zip data_files: - split: train path: data/nqo_Nkoo.zip/train.tsv - split: validation path: data/nqo_Nkoo.zip/dev.tsv - split: test path: data/nqo_Nkoo.zip/test.tsv - config_name: nso_Latn data_files: - split: train path: data/nso_Latn/train.tsv - split: validation path: data/nso_Latn/dev.tsv - split: test path: data/nso_Latn/test.tsv - config_name: nus_Latn data_files: - split: train path: data/nus_Latn/train.tsv - split: validation path: data/nus_Latn/dev.tsv - split: test path: data/nus_Latn/test.tsv - config_name: nya_Latn data_files: - split: train path: data/nya_Latn/train.tsv - split: validation path: data/nya_Latn/dev.tsv - split: test path: data/nya_Latn/test.tsv - config_name: oci_Latn data_files: - split: train path: data/oci_Latn/train.tsv - split: validation path: data/oci_Latn/dev.tsv - split: test path: data/oci_Latn/test.tsv - config_name: ory_Orya data_files: - split: train path: data/ory_Orya/train.tsv - split: validation path: data/ory_Orya/dev.tsv - split: test path: data/ory_Orya/test.tsv - config_name: pag_Latn data_files: - split: train path: data/pag_Latn/train.tsv - split: validation path: data/pag_Latn/dev.tsv - split: test path: data/pag_Latn/test.tsv - config_name: pan_Guru data_files: - split: train path: data/pan_Guru/train.tsv - split: validation path: data/pan_Guru/dev.tsv - split: test path: data/pan_Guru/test.tsv - config_name: pap_Latn data_files: - split: train path: data/pap_Latn/train.tsv - split: validation path: data/pap_Latn/dev.tsv - split: test path: data/pap_Latn/test.tsv - config_name: pbt_Arab data_files: - split: train path: data/pbt_Arab/train.tsv - split: validation path: data/pbt_Arab/dev.tsv - split: test path: data/pbt_Arab/test.tsv - config_name: pes_Arab data_files: - split: train path: data/pes_Arab/train.tsv - split: validation path: data/pes_Arab/dev.tsv - split: test path: data/pes_Arab/test.tsv - config_name: plt_Latn data_files: - split: train path: data/plt_Latn/train.tsv - split: validation path: data/plt_Latn/dev.tsv - split: test path: data/plt_Latn/test.tsv - config_name: pol_Latn data_files: - split: train path: data/pol_Latn/train.tsv - split: validation path: data/pol_Latn/dev.tsv - split: test path: data/pol_Latn/test.tsv - config_name: por_Latn data_files: - split: train path: data/por_Latn/train.tsv - split: validation path: data/por_Latn/dev.tsv - split: test path: data/por_Latn/test.tsv - config_name: prs_Arab data_files: - split: train path: data/prs_Arab/train.tsv - split: validation path: data/prs_Arab/dev.tsv - split: test path: data/prs_Arab/test.tsv - config_name: quy_Latn data_files: - split: train path: data/quy_Latn/train.tsv - split: validation path: data/quy_Latn/dev.tsv - split: test path: data/quy_Latn/test.tsv - config_name: ron_Latn data_files: - split: train path: data/ron_Latn/train.tsv - split: validation path: data/ron_Latn/dev.tsv - split: test path: data/ron_Latn/test.tsv - config_name: run_Latn data_files: - split: train path: data/run_Latn/train.tsv - split: validation path: data/run_Latn/dev.tsv - split: test path: data/run_Latn/test.tsv - config_name: rus_Cyrl data_files: - split: train path: data/rus_Cyrl/train.tsv - split: validation path: data/rus_Cyrl/dev.tsv - split: test path: data/rus_Cyrl/test.tsv - config_name: sag_Latn data_files: - split: train path: data/sag_Latn/train.tsv - split: validation path: data/sag_Latn/dev.tsv - split: test path: data/sag_Latn/test.tsv - config_name: san_Deva data_files: - split: train path: data/san_Deva/train.tsv - split: validation path: data/san_Deva/dev.tsv - split: test path: data/san_Deva/test.tsv - config_name: sat_Olck data_files: - split: train path: data/sat_Olck/train.tsv - split: validation path: data/sat_Olck/dev.tsv - split: test path: data/sat_Olck/test.tsv - config_name: scn_Latn data_files: - split: train path: data/scn_Latn/train.tsv - split: validation path: data/scn_Latn/dev.tsv - split: test path: data/scn_Latn/test.tsv - config_name: shn_Mymr data_files: - split: train path: data/shn_Mymr/train.tsv - split: validation path: data/shn_Mymr/dev.tsv - split: test path: data/shn_Mymr/test.tsv - config_name: sin_Sinh data_files: - split: train path: data/sin_Sinh/train.tsv - split: validation path: data/sin_Sinh/dev.tsv - split: test path: data/sin_Sinh/test.tsv - config_name: slk_Latn data_files: - split: train path: data/slk_Latn/train.tsv - split: validation path: data/slk_Latn/dev.tsv - split: test path: data/slk_Latn/test.tsv - config_name: slv_Latn data_files: - split: train path: data/slv_Latn/train.tsv - split: validation path: data/slv_Latn/dev.tsv - split: test path: data/slv_Latn/test.tsv - config_name: smo_Latn data_files: - split: train path: data/smo_Latn/train.tsv - split: validation path: data/smo_Latn/dev.tsv - split: test path: data/smo_Latn/test.tsv - config_name: sna_Latn data_files: - split: train path: data/sna_Latn/train.tsv - split: validation path: data/sna_Latn/dev.tsv - split: test path: data/sna_Latn/test.tsv - config_name: snd_Arab data_files: - split: train path: data/snd_Arab/train.tsv - split: validation path: data/snd_Arab/dev.tsv - split: test path: data/snd_Arab/test.tsv - config_name: som_Latn data_files: - split: train path: data/som_Latn/train.tsv - split: validation path: data/som_Latn/dev.tsv - split: test path: data/som_Latn/test.tsv - config_name: sot_Latn data_files: - split: train path: data/sot_Latn/train.tsv - split: validation path: data/sot_Latn/dev.tsv - split: test path: data/sot_Latn/test.tsv - config_name: spa_Latn data_files: - split: train path: data/spa_Latn/train.tsv - split: validation path: data/spa_Latn/dev.tsv - split: test path: data/spa_Latn/test.tsv - config_name: srd_Latn data_files: - split: train path: data/srd_Latn/train.tsv - split: validation path: data/srd_Latn/dev.tsv - split: test path: data/srd_Latn/test.tsv - config_name: srp_Cyrl data_files: - split: train path: data/srp_Cyrl/train.tsv - split: validation path: data/srp_Cyrl/dev.tsv - split: test path: data/srp_Cyrl/test.tsv - config_name: ssw_Latn data_files: - split: train path: data/ssw_Latn/train.tsv - split: validation path: data/ssw_Latn/dev.tsv - split: test path: data/ssw_Latn/test.tsv - config_name: sun_Latn data_files: - split: train path: data/sun_Latn/train.tsv - split: validation path: data/sun_Latn/dev.tsv - split: test path: data/sun_Latn/test.tsv - config_name: swe_Latn data_files: - split: train path: data/swe_Latn/train.tsv - split: validation path: data/swe_Latn/dev.tsv - split: test path: data/swe_Latn/test.tsv - config_name: swh_Latn data_files: - split: train path: data/swh_Latn/train.tsv - split: validation path: data/swh_Latn/dev.tsv - split: test path: data/swh_Latn/test.tsv - config_name: szl_Latn data_files: - split: train path: data/szl_Latn/train.tsv - split: validation path: data/szl_Latn/dev.tsv - split: test path: data/szl_Latn/test.tsv - config_name: tam_Taml data_files: - split: train path: data/tam_Taml/train.tsv - split: validation path: data/tam_Taml/dev.tsv - split: test path: data/tam_Taml/test.tsv - config_name: taq_Latn data_files: - split: train path: data/taq_Latn/train.tsv - split: validation path: data/taq_Latn/dev.tsv - split: test path: data/taq_Latn/test.tsv - config_name: taq_Tfng data_files: - split: train path: data/taq_Tfng/train.tsv - split: validation path: data/taq_Tfng/dev.tsv - split: test path: data/taq_Tfng/test.tsv - config_name: tat_Cyrl data_files: - split: train path: data/tat_Cyrl/train.tsv - split: validation path: data/tat_Cyrl/dev.tsv - split: test path: data/tat_Cyrl/test.tsv - config_name: tel_Telu data_files: - split: train path: data/tel_Telu/train.tsv - split: validation path: data/tel_Telu/dev.tsv - split: test path: data/tel_Telu/test.tsv - config_name: tgk_Cyrl data_files: - split: train path: data/tgk_Cyrl/train.tsv - split: validation path: data/tgk_Cyrl/dev.tsv - split: test path: data/tgk_Cyrl/test.tsv - config_name: tgl_Latn data_files: - split: train path: data/tgl_Latn/train.tsv - split: validation path: data/tgl_Latn/dev.tsv - split: test path: data/tgl_Latn/test.tsv - config_name: tha_Thai data_files: - split: train path: data/tha_Thai/train.tsv - split: validation path: data/tha_Thai/dev.tsv - split: test path: data/tha_Thai/test.tsv - config_name: tir_Ethi data_files: - split: train path: data/tir_Ethi/train.tsv - split: validation path: data/tir_Ethi/dev.tsv - split: test path: data/tir_Ethi/test.tsv - config_name: tpi_Latn data_files: - split: train path: data/tpi_Latn/train.tsv - split: validation path: data/tpi_Latn/dev.tsv - split: test path: data/tpi_Latn/test.tsv - config_name: tsn_Latn data_files: - split: train path: data/tsn_Latn/train.tsv - split: validation path: data/tsn_Latn/dev.tsv - split: test path: data/tsn_Latn/test.tsv - config_name: tso_Latn data_files: - split: train path: data/tso_Latn/train.tsv - split: validation path: data/tso_Latn/dev.tsv - split: test path: data/tso_Latn/test.tsv - config_name: tuk_Latn data_files: - split: train path: data/tuk_Latn/train.tsv - split: validation path: data/tuk_Latn/dev.tsv - split: test path: data/tuk_Latn/test.tsv - config_name: tum_Latn data_files: - split: train path: data/tum_Latn/train.tsv - split: validation path: data/tum_Latn/dev.tsv - split: test path: data/tum_Latn/test.tsv - config_name: tur_Latn data_files: - split: train path: data/tur_Latn/train.tsv - split: validation path: data/tur_Latn/dev.tsv - split: test path: data/tur_Latn/test.tsv - config_name: twi_Latn data_files: - split: train path: data/twi_Latn/train.tsv - split: validation path: data/twi_Latn/dev.tsv - split: test path: data/twi_Latn/test.tsv - config_name: tzm_Tfng data_files: - split: train path: data/tzm_Tfng/train.tsv - split: validation path: data/tzm_Tfng/dev.tsv - split: test path: data/tzm_Tfng/test.tsv - config_name: uig_Arab data_files: - split: train path: data/uig_Arab/train.tsv - split: validation path: data/uig_Arab/dev.tsv - split: test path: data/uig_Arab/test.tsv - config_name: ukr_Cyrl data_files: - split: train path: data/ukr_Cyrl/train.tsv - split: validation path: data/ukr_Cyrl/dev.tsv - split: test path: data/ukr_Cyrl/test.tsv - config_name: umb_Latn data_files: - split: train path: data/umb_Latn/train.tsv - split: validation path: data/umb_Latn/dev.tsv - split: test path: data/umb_Latn/test.tsv - config_name: urd_Arab data_files: - split: train path: data/urd_Arab/train.tsv - split: validation path: data/urd_Arab/dev.tsv - split: test path: data/urd_Arab/test.tsv - config_name: uzn_Latn data_files: - split: train path: data/uzn_Latn/train.tsv - split: validation path: data/uzn_Latn/dev.tsv - split: test path: data/uzn_Latn/test.tsv - config_name: vec_Latn data_files: - split: train path: data/vec_Latn/train.tsv - split: validation path: data/vec_Latn/dev.tsv - split: test path: data/vec_Latn/test.tsv - config_name: vie_Latn data_files: - split: train path: data/vie_Latn/train.tsv - split: validation path: data/vie_Latn/dev.tsv - split: test path: data/vie_Latn/test.tsv - config_name: war_Latn data_files: - split: train path: data/war_Latn/train.tsv - split: validation path: data/war_Latn/dev.tsv - split: test path: data/war_Latn/test.tsv - config_name: wol_Latn data_files: - split: train path: data/wol_Latn/train.tsv - split: validation path: data/wol_Latn/dev.tsv - split: test path: data/wol_Latn/test.tsv - config_name: xho_Latn data_files: - split: train path: data/xho_Latn/train.tsv - split: validation path: data/xho_Latn/dev.tsv - split: test path: data/xho_Latn/test.tsv - config_name: ydd_Hebr data_files: - split: train path: data/ydd_Hebr/train.tsv - split: validation path: data/ydd_Hebr/dev.tsv - split: test path: data/ydd_Hebr/test.tsv - config_name: yor_Latn data_files: - split: train path: data/yor_Latn/train.tsv - split: validation path: data/yor_Latn/dev.tsv - split: test path: data/yor_Latn/test.tsv - config_name: yue_Hant data_files: - split: train path: data/yue_Hant/train.tsv - split: validation path: data/yue_Hant/dev.tsv - split: test path: data/yue_Hant/test.tsv - config_name: zho_Hans data_files: - split: train path: data/zho_Hans/train.tsv - split: validation path: data/zho_Hans/dev.tsv - split: test path: data/zho_Hans/test.tsv - config_name: zho_Hant data_files: - split: train path: data/zho_Hant/train.tsv - split: validation path: data/zho_Hant/dev.tsv - split: test path: data/zho_Hant/test.tsv - config_name: zsm_Latn data_files: - split: train path: data/zsm_Latn/train.tsv - split: validation path: data/zsm_Latn/dev.tsv - split: test path: data/zsm_Latn/test.tsv - config_name: zul_Latn data_files: - split: train path: data/zul_Latn/train.tsv - split: validation path: data/zul_Latn/dev.tsv - split: test path: data/zul_Latn/test.tsv --- # Dataset Card for SIB-200 ## Table of Contents - [Table of Contents](#table-of-contents) - [Dataset Description](#dataset-description) - [Dataset Summary](#dataset-summary) - [Supported Tasks and Leaderboards](#supported-tasks-and-leaderboards) - [Languages](#languages) - [Dataset Structure](#dataset-structure) - [Data Instances](#data-instances) - [Data Fields](#data-fields) - [Data Splits](#data-splits) - [Dataset Creation](#dataset-creation) - [Curation Rationale](#curation-rationale) - [Source Data](#source-data) - [Annotations](#annotations) - [Personal and Sensitive Information](#personal-and-sensitive-information) - [Considerations for Using the Data](#considerations-for-using-the-data) - [Social Impact of Dataset](#social-impact-of-dataset) - [Discussion of Biases](#discussion-of-biases) - [Other Known Limitations](#other-known-limitations) - [Additional Information](#additional-information) - [Dataset Curators](#dataset-curators) - [Licensing Information](#licensing-information) - [Citation Information](#citation-information) - [Contributions](#contributions) ## Dataset Description - **Homepage:** [homepage](https://github.com/dadelani/sib-200) - **Repository:** [github](https://github.com/dadelani/sib-200) - **Paper:** [paper](https://arxiv.org/abs/2309.07445) - **Point of Contact:** d.adelani@ucl.ac.uk ### Dataset Summary SIB-200 is the largest publicly available topic classification dataset based on Flores-200 covering 205 languages and dialects. The train/validation/test sets are available for all the 205 languages. ### Supported Tasks and Leaderboards - `topic classification`: categorize wikipedia sentences into topics e.g science/technology, sports or politics. ### Languages There are 205 languages available : ## Dataset Structure ### Data Instances The examples look like this for English: ``` from datasets import load_dataset data = load_dataset('Davlan/sib200', 'eng_Latn') # Please, specify the language code # A data point example is below: { 'label': 0, 'index_id': 1523, 'text': 'Mutation adds new genetic variation, and selection removes it from the pool of expressed variation.' } ``` ### Data Fields - `label`: topic id - `index_id`: sentence id in flores-200 - `text`: text The topics correspond to this list: ``` "science/technology", "travel", "politics", "sports", "health", "entertainment", "geography" ``` ### Data Splits For all languages, there are three splits. The original splits were named `train`, `dev` and `test` and they correspond to the `train`, `validation` and `test` splits. The splits have the following sizes : | Language | train | validation | test | |-----------------|------:|-----------:|-----:| | English | 701 | 99 | 204 | ## Dataset Creation ### Curation Rationale The dataset was introduced to introduce new resources for 205 languages, many are under-served for natural language processing. [More Information Needed] ### Source Data The source of the data is from the news domain, details can be found here **** #### Initial Data Collection and Normalization The articles were word-tokenized, information on the exact pre-processing pipeline is unavailable. #### Who are the source language producers? The source language was produced by journalists and writers employed by the news agency and newspaper mentioned above. ### Annotations #### Annotation process Details can be found here ** #### Who are the annotators? Annotators were recruited from [Masakhane](https://www.masakhane.io/) ### Personal and Sensitive Information The data is sourced from newspaper source and only contains mentions of public figures or individuals ## Considerations for Using the Data ### Social Impact of Dataset [More Information Needed] ### Discussion of Biases [More Information Needed] ### Other Known Limitations Users should keep in mind that the dataset only contains news text, which might limit the applicability of the developed systems to other domains. ## Additional Information ### Dataset Curators ### Licensing Information The licensing status of the data is CC 4.0 Commercial ### Citation Information Provide the [BibTex](http://www.bibtex.org/)-formatted reference for the dataset. For example: ``` @misc{adelani2023sib200, title={SIB-200: A Simple, Inclusive, and Big Evaluation Dataset for Topic Classification in 200+ Languages and Dialects}, author={David Ifeoluwa Adelani and Hannah Liu and Xiaoyu Shen and Nikita Vassilyev and Jesujoba O. Alabi and Yanke Mao and Haonan Gao and Annie En-Shiun Lee}, year={2023}, eprint={2309.07445}, archivePrefix={arXiv}, primaryClass={cs.CL} } ``` ### Contributions Thanks to [@dadelani](https://github.com/dadelani) for adding this dataset.
提供机构:
Davlan
原始信息汇总

数据集概述

数据集基本信息

  • 名称: sib200
  • 语言多样性: 多语言
  • 语言数量: 超过200种语言
  • 语言类型: 包括多种书写系统,如阿拉伯文、拉丁文、西里尔文等
  • 数据集大小: 1K<n<10K
  • 数据来源: 原始数据
  • 许可证: CC-BY-SA-4.0

数据集内容

  • 任务类型: 文本分类
  • 具体任务: 主题分类
  • 标签类型: 新闻主题

数据文件配置

  • 配置示例:
    • 语言: ace_Arab
      • 训练集: data/ace_Arab/train.tsv
      • 验证集: data/ace_Arab/dev.tsv
      • 测试集: data/ace_Arab/test.tsv
    • 语言: ace_Latn
      • 训练集: data/ace_Latn/train.tsv
      • 验证集: data/ace_Latn/dev.tsv
      • 测试集: data/ace_Latn/test.tsv
    • 语言: acm_Arab
      • 训练集: data/acm_Arab/train.tsv
      • 验证集: data/acm_Arab/dev.tsv
      • 测试集: data/acm_Arab/test.tsv
    • 语言: acq_Arab
      • 训练集: data/acq_Arab/train.tsv
      • 验证集: data/acq_Arab/dev.tsv
      • 测试集: data/acq_Arab/test.tsv
    • 语言: aeb_Arab
      • 训练集: data/aeb_Arab/train.tsv
      • 验证集: data/aeb_Arab/dev.tsv
      • 测试集: data/aeb_Arab/test.tsv
    • 语言: afr_Latn
      • 训练集: data/afr_Latn/train.tsv
      • 验证集: data/afr_Latn/dev.tsv
      • 测试集: data/afr_Latn/test.tsv
    • 语言: ajp_Arab
      • 训练集: data/ajp_Arab/train.tsv
      • 验证集: data/ajp_Arab/dev.tsv
      • 测试集: data/ajp_Arab/test.tsv
    • 语言: aka_Latn
      • 训练集: data/aka_Latn/train.tsv
      • 验证集: data/aka_Latn/dev.tsv
      • 测试集: data/aka_Latn/test.tsv
    • 语言: als_Latn
      • 训练集: data/als_Latn/train.tsv
      • 验证集: data/als_Latn/dev.tsv
      • 测试集: data/als_Latn/test.tsv
    • 语言: amh_Ethi
      • 训练集: data/amh_Ethi/train.tsv
      • 验证集: data/amh_Ethi/dev.tsv
      • 测试集: data/amh_Ethi/test.tsv
    • 语言: apc_Arab
      • 训练集: data/apc_Arab/train.tsv
      • 验证集: data/apc_Arab/dev.tsv
      • 测试集: data/apc_Arab/test.tsv
    • 语言: arb_Arab
      • 训练集: data/arb_Arab/train.tsv
      • 验证集: data/arb_Arab/dev.tsv
      • 测试集: data/arb_Arab/test.tsv
    • 语言: arb_Latn
      • 训练集: data/arb_Latn/train.tsv
      • 验证集: data/arb_Latn/dev.tsv
      • 测试集: data/arb_Latn/test.tsv
    • 语言: ars_Arab
      • 训练集: data/ars_Arab/train.tsv
      • 验证集: data/ars_Arab/dev.tsv
      • 测试集: data/ars_Arab/test.tsv
    • 语言: ary_Arab
      • 训练集: data/ary_Arab/train.tsv
      • 验证集: data/ary_Arab/dev.tsv
      • 测试集: data/ary_Arab/test.tsv
    • 语言: arz_Arab
      • 训练集: data/arz_Arab/train.tsv
      • 验证集: data/arz_Arab/dev.tsv
      • 测试集: data/arz_Arab/test.tsv
    • 语言: asm_Beng
      • 训练集: data/asm_Beng/train.tsv
      • 验证集: data/asm_Beng/dev.tsv
      • 测试集: data/asm_Beng/test.tsv
    • 语言: ast_Latn
      • 训练集: data/ast_Latn/train.tsv
      • 验证集: data/ast_Latn/dev.tsv
      • 测试集: data/ast_Latn/test.tsv
    • 语言: awa_Deva
      • 训练集: data/awa_Deva/train.tsv
      • 验证集: data/awa_Deva/dev.tsv
      • 测试集: data/awa_Deva/test.tsv
    • 语言: ayr_Latn
      • 训练集: data/ayr_Latn/train.tsv
      • 验证集: data/ayr_Latn/dev.tsv
      • 测试集: data/ayr_Latn/test.tsv
    • 语言: azb_Arab
      • 训练集: data/azb_Arab/train.tsv
      • 验证集: data/azb_Arab/dev.tsv
      • 测试集: data/azb_Arab/test.tsv
    • 语言: azj_Latn
      • 训练集: data/azj_Latn/train.tsv
      • 验证集: data/azj_Latn/dev.tsv
      • 测试集: data/azj_Latn/test.tsv
    • 语言: bak_Cyrl
      • 训练集: data/bak_Cyrl/train.tsv
      • 验证集: data/bak_Cyrl/dev.tsv
      • 测试集: data/bak_Cyrl/test.tsv
    • 语言: bam_Latn
      • 训练集: data/bam_Latn/train.tsv
      • 验证集: data/bam_Latn/dev.tsv
      • 测试集: data/bam_Latn/test.tsv
    • 语言: ban_Latn
      • 训练集: data/ban_Latn/train.tsv
      • 验证集: data/ban_Latn/dev.tsv
      • 测试集: data/ban_Latn/test.tsv
    • 语言: bel_Cyrl
      • 训练集: data/bel_Cyrl/train.tsv
      • 验证集: data/bel_Cyrl/dev.tsv
      • 测试集: data/bel_Cyrl/test.tsv
    • 语言: bem_Latn
      • 训练集: data/bem_Latn/train.tsv
      • 验证集: data/bem_Latn/dev.tsv
      • 测试集: data/bem_Latn/test.tsv
    • 语言: ben_Beng
      • 训练集: data/ben_Beng/train.tsv
      • 验证集: data/ben_Beng/dev.tsv
      • 测试集: data/ben_Beng/test.tsv
    • 语言: bho_Deva
      • 训练集: data/bho_Deva/train.tsv
      • 验证集: data/bho_Deva/dev.tsv
      • 测试集: data/bho_Deva/test.tsv
    • 语言: bjn_Arab
      • 训练集: data/bjn_Arab/train.tsv
      • 验证集: data/bjn_Arab/dev.tsv
      • 测试集: data/bjn_Arab/test.tsv
    • 语言: bjn_Latn
      • 训练集: data/bjn_Latn/train.tsv
      • 验证集: data/bjn_Latn/dev.tsv
      • 测试集: data/bjn_Latn/test.tsv
    • 语言: bod_Tibt
      • 训练集: data/bod_Tibt/train.tsv
      • 验证集: data/bod_Tibt/dev.tsv
      • 测试集: data/bod_Tibt/test.tsv
    • 语言: bos_Latn
      • 训练集: data/bos_Latn/train.tsv
      • 验证集: data/bos_Latn/dev.tsv
      • 测试集: data/bos_Latn/test.tsv
    • 语言: bug_Latn
      • 训练集: data/bug_Latn/train.tsv
      • 验证集: data/bug_Latn/dev.tsv
      • 测试集: data/bug_Latn/test.tsv
    • 语言: bul_Cyrl
      • 训练集: data/bul_Cyrl/train.tsv
      • 验证集: data/bul_Cyrl/dev.tsv
      • 测试集: data/bul_Cyrl/test.tsv
    • 语言: cat_Latn
      • 训练集: data/cat_Latn/train.tsv
      • 验证集: data/cat_Latn/dev.tsv
      • 测试集: data/cat_Latn/test.tsv
    • 语言: ceb_Latn
      • 训练集: data/ceb_Latn/train.tsv
      • 验证集: data/ceb_Latn/dev.tsv
      • 测试集: data/ceb_Latn/test.tsv
    • 语言: ces_Latn
      • 训练集: data/ces_Latn/train.tsv
      • 验证集: data/ces_Latn/dev.tsv
      • 测试集: data/ces_Latn/test.tsv
    • 语言: cjk_Latn
      • 训练集: data/cjk_Latn/train.tsv
      • 验证集: data/cjk_Latn/dev.tsv
      • 测试集: data/cjk_Latn/test.tsv
    • 语言: ckb_Arab
      • 训练集: data/ckb_Arab/train.tsv
      • 验证集: data/ckb_Arab/dev.tsv
      • 测试集: data/ckb_Arab/test.tsv
    • 语言: crh_Latn
      • 训练集: data/crh_Latn/train.tsv
      • 验证集: data/crh_Latn/dev.tsv
      • 测试集: data/crh_Latn/test.tsv
    • 语言: cym_Latn
      • 训练集: data/cym_Latn/train.tsv
      • 验证集: data/cym_Latn/dev.tsv
      • 测试集: data/cym_Latn/test.tsv
    • 语言: dan_Latn
      • 训练集: data/dan_Latn/train.tsv
      • 验证集: data/dan_Latn/dev.tsv
      • 测试集: data/dan_Latn/test.tsv
    • 语言: deu_Latn
      • 训练集: data/deu_Latn/train.tsv
      • 验证集: data/deu_Latn/dev.tsv
      • 测试集: data/deu_Latn/test.tsv
    • 语言: dik_Latn
      • 训练集: data/dik_Latn/train.tsv
      • 验证集: data/dik_Latn/dev.tsv
      • 测试集: data/dik_Latn/test.tsv
    • 语言: dyu_Latn
      • 训练集: data/dyu_Latn/train.tsv
      • 验证集: data/dyu_Latn/dev.tsv
      • 测试集: data/dyu_Latn/test.tsv
    • 语言: dzo_Tibt
      • 训练集: data/dzo_Tibt/train.tsv
      • 验证集: data/dzo_Tibt/dev.tsv
      • 测试集: data/dzo_Tibt/test.tsv
    • 语言: ell_Grek
      • 训练集: data/ell_Grek/train.tsv
      • 验证集: data/ell_Grek/dev.tsv
      • 测试集: data/ell_Grek/test.tsv
    • 语言: eng_Latn
      • 训练集: data/eng_Latn/train.tsv
      • 验证集: data/eng_Latn/dev.tsv
      • 测试集: data/eng_Latn/test.tsv
    • 语言: epo_Latn
      • 训练集: data/epo_Latn/train.tsv
      • 验证集: data/epo_Latn/dev.tsv
      • 测试集: data/epo_Latn/test.tsv
    • 语言: est_Latn
      • 训练集: data/est_Latn/train.tsv
      • 验证集: data/est_Latn/dev.tsv
      • 测试集: data/est_Latn/test.tsv
    • 语言: eus_Latn
      • 训练集: data/eus_Latn/train.tsv
      • 验证集: data/eus_Latn/dev.tsv
      • 测试集: data/eus_Latn/test.tsv
    • 语言: ewe_Latn
      • 训练集: data/ewe_Latn/train.tsv
      • 验证集: data/ewe_Latn/dev.tsv
      • 测试集: data/ewe_Latn/test.tsv
    • 语言: fao_Latn
      • 训练集: data/fao_Latn/train.tsv
      • 验证集: data/fao_Latn/dev.tsv
      • 测试集: data/fao_Latn/test.tsv
    • 语言: fij_Latn
      • 训练集: data/fij_Latn/train.tsv
      • 验证集: data/fij_Latn/dev.tsv
      • 测试集: data/fij_Latn/test.tsv
    • 语言: fin_Latn
      • 训练集: data/fin_Latn/train.tsv
      • 验证集: data/fin_Latn/dev.tsv
      • 测试集: data/fin_Latn/test.tsv
    • 语言: fon_Latn
      • 训练集: data/fon_Latn/train.tsv
      • 验证集: data/fon_Latn/dev.tsv
      • 测试集: data/fon_Latn/test.tsv
    • 语言: fra_Latn
      • 训练集: data/fra_Latn/train.tsv
      • 验证集: data/fra_Latn/dev.tsv
      • 测试集: data/fra_Latn/test.tsv
    • 语言: fur_Latn
      • 训练集: data/fur_Latn/train.tsv
      • 验证集: data/fur_Latn/dev.tsv
      • 测试集: data/fur_Latn/test.tsv
    • 语言: fuv_Latn
      • 训练集: data/fuv_Latn/train.tsv
      • 验证集: data/fuv_Latn/dev.tsv
      • 测试集: data/fuv_Latn/test.tsv
    • 语言: gaz_Latn
      • 训练集: data/gaz_
5,000+
优质数据集
54 个
任务类型
进入经典数据集
二维码
社区交流群

面向社区/商业的数据集话题

二维码
科研交流群

面向高校/科研机构的开源数据集话题

数据驱动未来

携手共赢发展

商业合作