mbzuai-ugrip-statement-tuning/massive
收藏Hugging Face2024-06-06 更新2024-06-12 收录
下载链接:
https://hf-mirror.com/datasets/mbzuai-ugrip-statement-tuning/massive
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: af
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1109678
num_examples: 11514
- name: validation
num_bytes: 194946
num_examples: 2033
download_size: 452635
dataset_size: 1304624
- config_name: am
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1394162
num_examples: 11514
- name: validation
num_bytes: 246557
num_examples: 2033
download_size: 572604
dataset_size: 1640719
- config_name: ar
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1234663
num_examples: 11514
- name: validation
num_bytes: 218735
num_examples: 2033
download_size: 497064
dataset_size: 1453398
- config_name: az
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1168998
num_examples: 11514
- name: validation
num_bytes: 205086
num_examples: 2033
download_size: 466948
dataset_size: 1374084
- config_name: bn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1745554
num_examples: 11514
- name: validation
num_bytes: 306045
num_examples: 2033
download_size: 619285
dataset_size: 2051599
- config_name: ca
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1137106
num_examples: 11514
- name: validation
num_bytes: 200287
num_examples: 2033
download_size: 459837
dataset_size: 1337393
- config_name: cy
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1126063
num_examples: 11514
- name: validation
num_bytes: 198193
num_examples: 2033
download_size: 463573
dataset_size: 1324256
- config_name: da
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1086468
num_examples: 11514
- name: validation
num_bytes: 191371
num_examples: 2033
download_size: 439422
dataset_size: 1277839
- config_name: de
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1136675
num_examples: 11514
- name: validation
num_bytes: 200820
num_examples: 2033
download_size: 471920
dataset_size: 1337495
- config_name: el
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1541312
num_examples: 11514
- name: validation
num_bytes: 270881
num_examples: 2033
download_size: 617657
dataset_size: 1812193
- config_name: en
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1080913
num_examples: 11514
- name: validation
num_bytes: 190813
num_examples: 2033
download_size: 432014
dataset_size: 1271726
- config_name: es
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1143140
num_examples: 11514
- name: validation
num_bytes: 201112
num_examples: 2033
download_size: 468903
dataset_size: 1344252
- config_name: fa
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1377230
num_examples: 11514
- name: validation
num_bytes: 241262
num_examples: 2033
download_size: 531007
dataset_size: 1618492
- config_name: fi
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1124055
num_examples: 11514
- name: validation
num_bytes: 197892
num_examples: 2033
download_size: 459957
dataset_size: 1321947
- config_name: fr
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1180442
num_examples: 11514
- name: validation
num_bytes: 208054
num_examples: 2033
download_size: 484562
dataset_size: 1388496
- config_name: he
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1276756
num_examples: 11514
- name: validation
num_bytes: 223898
num_examples: 2033
download_size: 501370
dataset_size: 1500654
- config_name: hi
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1744363
num_examples: 11514
- name: validation
num_bytes: 304622
num_examples: 2033
download_size: 630394
dataset_size: 2048985
- config_name: hu
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1142165
num_examples: 11514
- name: validation
num_bytes: 200096
num_examples: 2033
download_size: 481633
dataset_size: 1342261
- config_name: hy
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1474892
num_examples: 11514
- name: validation
num_bytes: 257249
num_examples: 2033
download_size: 575589
dataset_size: 1732141
- config_name: id
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1116139
num_examples: 11514
- name: validation
num_bytes: 196204
num_examples: 2033
download_size: 436382
dataset_size: 1312343
- config_name: is
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1155592
num_examples: 11514
- name: validation
num_bytes: 202818
num_examples: 2033
download_size: 474230
dataset_size: 1358410
- config_name: it
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1122823
num_examples: 11514
- name: validation
num_bytes: 197339
num_examples: 2033
download_size: 451912
dataset_size: 1320162
- config_name: ja
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1197285
num_examples: 11514
- name: validation
num_bytes: 211534
num_examples: 2033
download_size: 479002
dataset_size: 1408819
- config_name: jv
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1080906
num_examples: 11514
- name: validation
num_bytes: 189180
num_examples: 2033
download_size: 435160
dataset_size: 1270086
- config_name: ka
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1683984
num_examples: 11514
- name: validation
num_bytes: 294672
num_examples: 2033
download_size: 578561
dataset_size: 1978656
- config_name: km
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1718683
num_examples: 11514
- name: validation
num_bytes: 304631
num_examples: 2033
download_size: 609001
dataset_size: 2023314
- config_name: kn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1906568
num_examples: 11514
- name: validation
num_bytes: 333532
num_examples: 2033
download_size: 667729
dataset_size: 2240100
- config_name: ko
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1123574
num_examples: 11514
- name: validation
num_bytes: 198034
num_examples: 2033
download_size: 454881
dataset_size: 1321608
- config_name: lv
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1138666
num_examples: 11514
- name: validation
num_bytes: 200013
num_examples: 2033
download_size: 471652
dataset_size: 1338679
- config_name: ml
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2073281
num_examples: 11514
- name: validation
num_bytes: 363546
num_examples: 2033
download_size: 710476
dataset_size: 2436827
- config_name: mn
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1497928
num_examples: 11514
- name: validation
num_bytes: 264903
num_examples: 2033
download_size: 580653
dataset_size: 1762831
- config_name: ms
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1137681
num_examples: 11514
- name: validation
num_bytes: 199651
num_examples: 2033
download_size: 445132
dataset_size: 1337332
- config_name: my
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2017787
num_examples: 11514
- name: validation
num_bytes: 351475
num_examples: 2033
download_size: 702009
dataset_size: 2369262
- config_name: nb
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1089702
num_examples: 11514
- name: validation
num_bytes: 191895
num_examples: 2033
download_size: 442418
dataset_size: 1281597
- config_name: nl
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1135536
num_examples: 11514
- name: validation
num_bytes: 199906
num_examples: 2033
download_size: 464776
dataset_size: 1335442
- config_name: pl
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1120033
num_examples: 11514
- name: validation
num_bytes: 197122
num_examples: 2033
download_size: 462681
dataset_size: 1317155
- config_name: pt
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1134461
num_examples: 11514
- name: validation
num_bytes: 199018
num_examples: 2033
download_size: 462950
dataset_size: 1333479
- config_name: ro
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1118842
num_examples: 11514
- name: validation
num_bytes: 196492
num_examples: 2033
download_size: 471338
dataset_size: 1315334
- config_name: ru
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1463069
num_examples: 11514
- name: validation
num_bytes: 257460
num_examples: 2033
download_size: 592187
dataset_size: 1720529
- config_name: sl
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1098532
num_examples: 11514
- name: validation
num_bytes: 193098
num_examples: 2033
download_size: 454072
dataset_size: 1291630
- config_name: sq
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1150187
num_examples: 11514
- name: validation
num_bytes: 202371
num_examples: 2033
download_size: 464347
dataset_size: 1352558
- config_name: sv
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1102727
num_examples: 11514
- name: validation
num_bytes: 194944
num_examples: 2033
download_size: 443326
dataset_size: 1297671
- config_name: sw
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1119426
num_examples: 11514
- name: validation
num_bytes: 197525
num_examples: 2033
download_size: 445321
dataset_size: 1316951
- config_name: ta
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 2049351
num_examples: 11514
- name: validation
num_bytes: 360128
num_examples: 2033
download_size: 695231
dataset_size: 2409479
- config_name: te
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1889870
num_examples: 11514
- name: validation
num_bytes: 330871
num_examples: 2033
download_size: 686919
dataset_size: 2220741
- config_name: th
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1693456
num_examples: 11514
- name: validation
num_bytes: 299044
num_examples: 2033
download_size: 623773
dataset_size: 1992500
- config_name: tl
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1194061
num_examples: 11514
- name: validation
num_bytes: 208325
num_examples: 2033
download_size: 475408
dataset_size: 1402386
- config_name: tr
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1113859
num_examples: 11514
- name: validation
num_bytes: 195302
num_examples: 2033
download_size: 456796
dataset_size: 1309161
- config_name: ur
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1404468
num_examples: 11514
- name: validation
num_bytes: 246502
num_examples: 2033
download_size: 554250
dataset_size: 1650970
- config_name: vi
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1257367
num_examples: 11514
- name: validation
num_bytes: 221431
num_examples: 2033
download_size: 501700
dataset_size: 1478798
- config_name: zh
features:
- name: is_true
dtype: int64
- name: statement
dtype: string
splits:
- name: train
num_bytes: 1003514
num_examples: 11514
- name: validation
num_bytes: 176309
num_examples: 2033
download_size: 470045
dataset_size: 1179823
configs:
- config_name: af
data_files:
- split: train
path: af/train-*
- split: validation
path: af/validation-*
- config_name: am
data_files:
- split: train
path: am/train-*
- split: validation
path: am/validation-*
- config_name: ar
data_files:
- split: train
path: ar/train-*
- split: validation
path: ar/validation-*
- config_name: az
data_files:
- split: train
path: az/train-*
- split: validation
path: az/validation-*
- config_name: bn
data_files:
- split: train
path: bn/train-*
- split: validation
path: bn/validation-*
- config_name: ca
data_files:
- split: train
path: ca/train-*
- split: validation
path: ca/validation-*
- config_name: cy
data_files:
- split: train
path: cy/train-*
- split: validation
path: cy/validation-*
- config_name: da
data_files:
- split: train
path: da/train-*
- split: validation
path: da/validation-*
- config_name: de
data_files:
- split: train
path: de/train-*
- split: validation
path: de/validation-*
- config_name: el
data_files:
- split: train
path: el/train-*
- split: validation
path: el/validation-*
- config_name: en
data_files:
- split: train
path: en/train-*
- split: validation
path: en/validation-*
- config_name: es
data_files:
- split: train
path: es/train-*
- split: validation
path: es/validation-*
- config_name: fa
data_files:
- split: train
path: fa/train-*
- split: validation
path: fa/validation-*
- config_name: fi
data_files:
- split: train
path: fi/train-*
- split: validation
path: fi/validation-*
- config_name: fr
data_files:
- split: train
path: fr/train-*
- split: validation
path: fr/validation-*
- config_name: he
data_files:
- split: train
path: he/train-*
- split: validation
path: he/validation-*
- config_name: hi
data_files:
- split: train
path: hi/train-*
- split: validation
path: hi/validation-*
- config_name: hu
data_files:
- split: train
path: hu/train-*
- split: validation
path: hu/validation-*
- config_name: hy
data_files:
- split: train
path: hy/train-*
- split: validation
path: hy/validation-*
- config_name: id
data_files:
- split: train
path: id/train-*
- split: validation
path: id/validation-*
- config_name: is
data_files:
- split: train
path: is/train-*
- split: validation
path: is/validation-*
- config_name: it
data_files:
- split: train
path: it/train-*
- split: validation
path: it/validation-*
- config_name: ja
data_files:
- split: train
path: ja/train-*
- split: validation
path: ja/validation-*
- config_name: jv
data_files:
- split: train
path: jv/train-*
- split: validation
path: jv/validation-*
- config_name: ka
data_files:
- split: train
path: ka/train-*
- split: validation
path: ka/validation-*
- config_name: km
data_files:
- split: train
path: km/train-*
- split: validation
path: km/validation-*
- config_name: kn
data_files:
- split: train
path: kn/train-*
- split: validation
path: kn/validation-*
- config_name: ko
data_files:
- split: train
path: ko/train-*
- split: validation
path: ko/validation-*
- config_name: lv
data_files:
- split: train
path: lv/train-*
- split: validation
path: lv/validation-*
- config_name: ml
data_files:
- split: train
path: ml/train-*
- split: validation
path: ml/validation-*
- config_name: mn
data_files:
- split: train
path: mn/train-*
- split: validation
path: mn/validation-*
- config_name: ms
data_files:
- split: train
path: ms/train-*
- split: validation
path: ms/validation-*
- config_name: my
data_files:
- split: train
path: my/train-*
- split: validation
path: my/validation-*
- config_name: nb
data_files:
- split: train
path: nb/train-*
- split: validation
path: nb/validation-*
- config_name: nl
data_files:
- split: train
path: nl/train-*
- split: validation
path: nl/validation-*
- config_name: pl
data_files:
- split: train
path: pl/train-*
- split: validation
path: pl/validation-*
- config_name: pt
data_files:
- split: train
path: pt/train-*
- split: validation
path: pt/validation-*
- config_name: ro
data_files:
- split: train
path: ro/train-*
- split: validation
path: ro/validation-*
- config_name: ru
data_files:
- split: train
path: ru/train-*
- split: validation
path: ru/validation-*
- config_name: sl
data_files:
- split: train
path: sl/train-*
- split: validation
path: sl/validation-*
- config_name: sq
data_files:
- split: train
path: sq/train-*
- split: validation
path: sq/validation-*
- config_name: sv
data_files:
- split: train
path: sv/train-*
- split: validation
path: sv/validation-*
- config_name: sw
data_files:
- split: train
path: sw/train-*
- split: validation
path: sw/validation-*
- config_name: ta
data_files:
- split: train
path: ta/train-*
- split: validation
path: ta/validation-*
- config_name: te
data_files:
- split: train
path: te/train-*
- split: validation
path: te/validation-*
- config_name: th
data_files:
- split: train
path: th/train-*
- split: validation
path: th/validation-*
- config_name: tl
data_files:
- split: train
path: tl/train-*
- split: validation
path: tl/validation-*
- config_name: tr
data_files:
- split: train
path: tr/train-*
- split: validation
path: tr/validation-*
- config_name: ur
data_files:
- split: train
path: ur/train-*
- split: validation
path: ur/validation-*
- config_name: vi
data_files:
- split: train
path: vi/train-*
- split: validation
path: vi/validation-*
- config_name: zh
data_files:
- split: train
path: zh/train-*
- split: validation
path: zh/validation-*
---
提供机构:
mbzuai-ugrip-statement-tuning
原始信息汇总
数据集概述
数据集配置信息
| 配置名称 | 特征 |
|---|---|
| af | - is_true: int64<br>- statement: string |
| am | - is_true: int64<br>- statement: string |
| ar | - is_true: int64<br>- statement: string |
| az | - is_true: int64<br>- statement: string |
| bn | - is_true: int64<br>- statement: string |
| ca | - is_true: int64<br>- statement: string |
| cy | - is_true: int64<br>- statement: string |
| da | - is_true: int64<br>- statement: string |
| de | - is_true: int64<br>- statement: string |
| el | - is_true: int64<br>- statement: string |
| en | - is_true: int64<br>- statement: string |
| es | - is_true: int64<br>- statement: string |
| fa | - is_true: int64<br>- statement: string |
| fi | - is_true: int64<br>- statement: string |
| fr | - is_true: int64<br>- statement: string |
| he | - is_true: int64<br>- statement: string |
| hi | - is_true: int64<br>- statement: string |
| hu | - is_true: int64<br>- statement: string |
| hy | - is_true: int64<br>- statement: string |
| id | - is_true: int64<br>- statement: string |
| is | - is_true: int64<br>- statement: string |
| it | - is_true: int64<br>- statement: string |
| ja | - is_true: int64<br>- statement: string |
| jv | - is_true: int64<br>- statement: string |
| ka | - is_true: int64<br>- statement: string |
| km | - is_true: int64<br>- statement: string |
| kn | - is_true: int64<br>- statement: string |
| ko | - is_true: int64<br>- statement: string |
| lv | - is_true: int64<br>- statement: string |
| ml | - is_true: int64<br>- statement: string |
| mn | - is_true: int64<br>- statement: string |
| ms | - is_true: int64<br>- statement: string |
| my | - is_true: int64<br>- statement: string |
| nb | - is_true: int64<br>- statement: string |
| nl | - is_true: int64<br>- statement: string |
| pl | - is_true: int64<br>- statement: string |
| pt | - is_true: int64<br>- statement: string |
| ro | - is_true: int64<br>- statement: string |
| ru | - is_true: int64<br>- statement: string |
| sl | - is_true: int64<br>- statement: string |
| sq | - is_true: int64<br>- statement: string |
| sv | - is_true: int64<br>- statement: string |
| sw | - is_true: int64<br>- statement: string |
| ta | - is_true: int64<br>- statement: string |
| te | - is_true: int64<br>- statement: string |
| th | - is_true: int64<br>- statement: string |
| tl | - is_true: int64<br>- statement: string |
| tr | - is_true: int64<br>- statement: string |
| ur | - is_true: int64<br>- statement: string |
| vi | - is_true: int64<br>- statement: string |
| zh | - is_true: int64<br>- statement: string |
数据集大小信息
| 配置名称 | 下载大小 | 数据集大小 | 训练集大小 | 验证集大小 |
|---|---|---|---|---|
| af | 452635 | 1304624 | 1109678 | 194946 |
| am | 572604 | 1640719 | 1394162 | 246557 |
| ar | 497064 | 1453398 | 1234663 | 218735 |
| az | 466948 | 1374084 | 1168998 | 205086 |
| bn | 619285 | 2051599 | 1745554 | 306045 |
| ca | 459837 | 1337393 | 1137106 | 200287 |
| cy | 463573 | 1324256 | 1126063 | 198193 |
| da | 439422 | 1277839 | 1086468 | 191371 |
| de | 471920 | 1337495 | 1136675 | 200820 |
| el | 617657 | 1812193 | 1541312 | 270881 |
| en | 432014 | 1271726 | 1080913 | 190813 |
| es | 468903 | 1344252 | 1143140 | 201112 |
| fa | 531007 | 1618492 | 1377230 | 241262 |
| fi | 459957 | 1321947 | 1124055 | 197892 |
| fr | 484562 | 1388496 | 1180442 | 208054 |
| he | 501370 | 1500654 | 1276756 | 223898 |
| hi | 630394 | 2048985 | 1744363 | 304622 |
| hu | 481633 | 1342261 | 1142165 | 200096 |
| hy | 575589 | 1732141 | 1474892 | 257249 |
| id | 436382 | 1312343 | 1116139 | 196204 |
| is | 474230 | 1358410 | 1155592 | 202818 |
| it | 451912 | 1320162 | 1122823 | 197339 |
| ja | 479002 | 1408819 | 1197285 | 211534 |
| jv | 435160 | 1270086 | 1080906 | 189180 |
| ka | 578561 | 1978656 | 1683984 | 294672 |
| km | 609001 | 2023314 | 1718683 | 304631 |
| kn | 667729 | 2240100 | 1906568 | 333532 |
| ko | 454881 | 1321608 | 1123574 | 198034 |
| lv | 471652 | 1338679 | 1138666 | 200013 |
| ml | 710476 | 2436827 | 2073281 | 363546 |
| mn | 580653 | 1762831 | 1497928 | 264903 |
| ms | 445132 | 1337332 | 1137681 | 199651 |
| my | 702009 | 2369262 | 2017787 | 351475 |
| nb | 442418 | 1281597 | 1089702 | 191895 |
| nl | 464776 | 1335442 | 1135536 | 199906 |
| pl | 462681 | 1317155 | 1120033 | 197122 |
| pt | 462950 | 1333479 | 1134461 | 199018 |
| ro | 471338 | 1315334 | 1118842 | 196492 |
| ru | 592187 | 1720529 | 1463069 | 257460 |
| sl | 454072 | 1291630 | 1098532 | 193098 |
| sq | 464347 | 1352558 | 1150187 | 202371 |
| sv | 443326 | 1297671 | 1102727 | 194944 |
| sw | 445321 | 1316951 | 1119426 | 197525 |
| ta | 695231 | 2409479 | 2049351 | 360128 |
| te | 686919 | 2220741 | 1889870 | 330871 |
| th | 623773 | 1992500 | 1693456 | 299044 |
| tl | 475408 | 1402386 | 1194061 | 208325 |
| tr | 456796 | 1309161 | 1113859 | 195302 |
| ur | 554250 | 1650970 | 1404468 | 246502 |
| vi | 501700 | 1478798 | 1257367 | 221431 |
| zh | 470045 | 1179823 | 1003514 | 176309 |
数据集分割信息
| 配置名称 | 训练集路径 | 验证集路径 |
|---|---|---|
| af | af/train-* | af/validation-* |
| am | am/train-* | am/validation-* |
| ar | ar/train-* | ar/validation-* |
| az | az/train-* | az/validation-* |
| bn | bn/train-* | bn/validation-* |
| ca | ca/train-* | ca/validation-* |
| cy | cy/train-* | cy/validation-* |
| da | da/train-* | da/validation-* |
| de | de/train-* | de/validation-* |
| el | el/train-* | el/validation-* |
| en | en/train-* | en/validation-* |
| es | es/train-* | es/validation-* |
| fa | fa/train-* | fa/validation-* |
| fi | fi/train-* | fi/validation-* |
| fr | fr/train-* | fr/validation-* |
| he | he/train-* | he/validation-* |
| hi | hi/train-* | hi/validation-* |
| hu | hu/train-* | hu/validation-* |
| hy | hy/train-* | hy/validation-* |
| id | id/train-* | id/validation-* |
| is | is/train-* | is/validation-* |
| it | it/train-* | it/validation-* |
| ja | ja/train-* | ja/validation-* |
| jv | jv/train-* | jv/validation-* |
| ka | ka/train-* | ka/validation-* |
| km | km/train-* | km/validation-* |
| kn | kn/train-* | kn/validation-* |
| ko | ko/train-* | ko/validation-* |
| lv | lv/train-* | lv/validation-* |
| ml | ml/train-* | ml/validation-* |
| mn | mn/train-* | mn/validation-* |
| ms | ms/train-* | ms/validation-* |
| my | my/train-* | my/validation-* |
| nb | nb/train-* | nb/validation-* |
| nl | nl/train-* | nl/validation-* |
| pl | pl/train-* | pl/validation-* |
| pt | pt/train-* | pt/validation-* |
| ro | ro/train-* | ro/validation-* |
| ru | ru/train-* | ru/validation-* |
| sl | sl/train-* | sl/validation-* |
| sq | sq/train-* | sq/validation-* |
| sv | sv/train-* | sv/validation-* |
| sw | sw/train-* | sw/validation-* |
| ta | ta/train-* | ta/validation-* |
| te | te/train-* | te/validation-* |
| th | th/train-* | th/validation-* |
| tl | tl/train-* | tl/validation-* |
| tr | tr/train-* | tr/validation-* |
| ur | ur/train-* | ur/validation-* |
| vi | vi/train-* | vi/validation-* |
| zh | zh/train-* | zh/validation-* |



