five

alamios/OpenSubtitles-v2018

收藏
Hugging Face2025-12-30 更新2026-03-29 收录
下载链接:
https://hf-mirror.com/datasets/alamios/OpenSubtitles-v2018
下载链接
链接失效反馈
官方服务:
资源简介:
--- license: unknown task_categories: - translation pretty_name: OpenSubtitles-v2018 dataset_info: - config_name: af-ar splits: - name: train num_bytes: 1624857 num_examples: 12336 download_size: 915187 dataset_size: 1624857 - config_name: af-bg splits: - name: train num_bytes: 2676693 num_examples: 18190 download_size: 1476200 dataset_size: 2676693 - config_name: af-bn splits: - name: train num_bytes: 213800 num_examples: 1185 download_size: 110366 dataset_size: 213800 - config_name: af-bs splits: - name: train num_bytes: 528288 num_examples: 4301 download_size: 333802 dataset_size: 528288 - config_name: af-cs splits: - name: train num_bytes: 2086126 num_examples: 17930 download_size: 1281548 dataset_size: 2086126 - config_name: af-da splits: - name: train num_bytes: 1498402 num_examples: 12525 download_size: 889248 dataset_size: 1498402 - config_name: af-de splits: - name: train num_bytes: 2344839 num_examples: 19279 download_size: 1400572 dataset_size: 2344839 - config_name: af-el splits: - name: train num_bytes: 4304695 num_examples: 29832 download_size: 2353786 dataset_size: 4304695 - config_name: af-en splits: - name: train num_bytes: 5129965 num_examples: 44703 download_size: 3057666 dataset_size: 5129965 - config_name: af-es splits: - name: train num_bytes: 4080356 num_examples: 34306 download_size: 2452179 dataset_size: 4080356 - config_name: af-et splits: - name: train num_bytes: 701802 num_examples: 6336 download_size: 432127 dataset_size: 701802 - config_name: af-fa splits: - name: train num_bytes: 441214 num_examples: 3065 download_size: 251341 dataset_size: 441214 - config_name: af-fi splits: - name: train num_bytes: 1189954 num_examples: 10250 download_size: 717619 dataset_size: 1189954 - config_name: af-fr splits: - name: train num_bytes: 2228040 num_examples: 18563 download_size: 1323878 dataset_size: 2228040 - config_name: af-he splits: - name: train num_bytes: 1852213 num_examples: 14534 download_size: 1036476 dataset_size: 1852213 - config_name: af-hi splits: - name: train num_bytes: 162416 num_examples: 1146 download_size: 84533 dataset_size: 162416 - config_name: af-hr splits: - name: train num_bytes: 1734617 num_examples: 14796 download_size: 1056803 dataset_size: 1734617 - config_name: af-hu splits: - name: train num_bytes: 1905522 num_examples: 16317 download_size: 1170404 dataset_size: 1905522 - config_name: af-id splits: - name: train num_bytes: 510009 num_examples: 4555 download_size: 300481 dataset_size: 510009 - config_name: af-it splits: - name: train num_bytes: 1814935 num_examples: 15006 download_size: 1104117 dataset_size: 1814935 - config_name: af-ja splits: - name: train num_bytes: 228949 num_examples: 1984 download_size: 139600 dataset_size: 228949 - config_name: af-lt splits: - name: train num_bytes: 70520 num_examples: 651 download_size: 47969 dataset_size: 70520 - config_name: af-lv splits: - name: train num_bytes: 115757 num_examples: 1084 download_size: 76282 dataset_size: 115757 - config_name: af-mk splits: - name: train num_bytes: 547612 num_examples: 3821 download_size: 304271 dataset_size: 547612 - config_name: af-ml splits: - name: train num_bytes: 401309 num_examples: 2184 download_size: 197735 dataset_size: 401309 - config_name: af-ms splits: - name: train num_bytes: 126424 num_examples: 1308 download_size: 73962 dataset_size: 126424 - config_name: af-nl splits: - name: train num_bytes: 2627143 num_examples: 22606 download_size: 1556449 dataset_size: 2627143 - config_name: af-no splits: - name: train num_bytes: 551517 num_examples: 5130 download_size: 338168 dataset_size: 551517 - config_name: af-pl splits: - name: train num_bytes: 1963735 num_examples: 16326 download_size: 1195507 dataset_size: 1963735 - config_name: af-pt splits: - name: train num_bytes: 2662227 num_examples: 22683 download_size: 1590504 dataset_size: 2662227 - config_name: af-ro splits: - name: train num_bytes: 3816150 num_examples: 32189 download_size: 2297432 dataset_size: 3816150 - config_name: af-ru splits: - name: train num_bytes: 2213299 num_examples: 15405 download_size: 1235679 dataset_size: 2213299 - config_name: af-si splits: - name: train num_bytes: 169031 num_examples: 977 download_size: 88372 dataset_size: 169031 - config_name: af-sk splits: - name: train num_bytes: 596140 num_examples: 4713 download_size: 368093 dataset_size: 596140 - config_name: af-sl splits: - name: train num_bytes: 1262930 num_examples: 11073 download_size: 772741 dataset_size: 1262930 - config_name: af-sq splits: - name: train num_bytes: 120404 num_examples: 1078 download_size: 77871 dataset_size: 120404 - config_name: af-sr splits: - name: train num_bytes: 1964264 num_examples: 17396 download_size: 1198323 dataset_size: 1964264 - config_name: af-sv splits: - name: train num_bytes: 860918 num_examples: 7704 download_size: 520719 dataset_size: 860918 - config_name: af-ta splits: - name: train num_bytes: 202773 num_examples: 1125 download_size: 97111 dataset_size: 202773 - config_name: af-th splits: - name: train num_bytes: 334232 num_examples: 2081 download_size: 168423 dataset_size: 334232 - config_name: af-tr splits: - name: train num_bytes: 2884265 num_examples: 24218 download_size: 1722691 dataset_size: 2884265 - config_name: af-uk splits: - name: train num_bytes: 327689 num_examples: 2488 download_size: 191782 dataset_size: 327689 - config_name: af-vi splits: - name: train num_bytes: 406431 num_examples: 3304 download_size: 235099 dataset_size: 406431 - config_name: ar-af splits: - name: train num_bytes: 1624857 num_examples: 12336 download_size: 915187 dataset_size: 1624857 - config_name: ar-bg splits: - name: train num_bytes: 3725123240 num_examples: 23076891 download_size: 1882687247 dataset_size: 3725123240 - config_name: ar-bn splits: - name: train num_bytes: 56803253 num_examples: 331701 download_size: 26783584 dataset_size: 56803253 - config_name: ar-bs splits: - name: train num_bytes: 1207123625 num_examples: 8898484 download_size: 669990581 dataset_size: 1207123625 - config_name: ar-ca splits: - name: train num_bytes: 48245693 num_examples: 349709 download_size: 26493396 dataset_size: 48245693 - config_name: ar-cs splits: - name: train num_bytes: 3324328869 num_examples: 24068513 download_size: 1838129654 dataset_size: 3324328869 - config_name: ar-da splits: - name: train num_bytes: 1481372752 num_examples: 10762663 download_size: 802170547 dataset_size: 1481372752 - config_name: ar-de splits: - name: train num_bytes: 1765920759 num_examples: 12439023 download_size: 958466999 dataset_size: 1765920759 - config_name: ar-el splits: - name: train num_bytes: 3745963243 num_examples: 22468462 download_size: 1898849733 dataset_size: 3745963243 - config_name: ar-en splits: - name: train num_bytes: 4041284109 num_examples: 29823188 download_size: 2168666470 dataset_size: 4041284109 - config_name: ar-es splits: - name: train num_bytes: 3713331324 num_examples: 26641247 download_size: 2012418908 dataset_size: 3713331324 - config_name: ar-et splits: - name: train num_bytes: 1316758719 num_examples: 9692040 download_size: 725414200 dataset_size: 1316758719 - config_name: ar-eu splits: - name: train num_bytes: 76878827 num_examples: 578303 download_size: 42521482 dataset_size: 76878827 - config_name: ar-fa splits: - name: train num_bytes: 829722018 num_examples: 5493576 download_size: 423414559 dataset_size: 829722018 - config_name: ar-fi splits: - name: train num_bytes: 2403140444 num_examples: 17120182 download_size: 1307692614 dataset_size: 2403140444 - config_name: ar-fr splits: - name: train num_bytes: 2851130938 num_examples: 20181740 download_size: 1539009600 dataset_size: 2851130938 - config_name: ar-gl splits: - name: train num_bytes: 10666132 num_examples: 77718 download_size: 5995937 dataset_size: 10666132 - config_name: ar-he splits: - name: train num_bytes: 3152249989 num_examples: 20577019 download_size: 1606988089 dataset_size: 3152249989 - config_name: ar-hi splits: - name: train num_bytes: 12445990 num_examples: 70935 download_size: 5829567 dataset_size: 12445990 - config_name: ar-hr splits: - name: train num_bytes: 2724982376 num_examples: 20034003 download_size: 1500575586 dataset_size: 2724982376 - config_name: ar-hu splits: - name: train num_bytes: 3301465865 num_examples: 23767831 download_size: 1817080056 dataset_size: 3301465865 - config_name: ar-hy splits: - name: train num_bytes: 419022 num_examples: 2308 download_size: 231120 dataset_size: 419022 - config_name: ar-id splits: - name: train num_bytes: 950577208 num_examples: 6950290 download_size: 503674983 dataset_size: 950577208 - config_name: ar-is splits: - name: train num_bytes: 152493701 num_examples: 1105868 download_size: 84397158 dataset_size: 152493701 - config_name: ar-it splits: - name: train num_bytes: 2827223716 num_examples: 20022861 download_size: 1542861122 dataset_size: 2827223716 - config_name: ar-ja splits: - name: train num_bytes: 252130055 num_examples: 1834940 download_size: 134194769 dataset_size: 252130055 - config_name: ar-ka splits: - name: train num_bytes: 27849923 num_examples: 161654 download_size: 12980173 dataset_size: 27849923 - config_name: ar-kk splits: - name: train num_bytes: 165315 num_examples: 1279 download_size: 92500 dataset_size: 165315 - config_name: ar-ko splits: - name: train num_bytes: 171750308 num_examples: 1249195 download_size: 94461318 dataset_size: 171750308 - config_name: ar-lt splits: - name: train num_bytes: 155538454 num_examples: 1177564 download_size: 87885663 dataset_size: 155538454 - config_name: ar-lv splits: - name: train num_bytes: 58183218 num_examples: 433544 download_size: 32858968 dataset_size: 58183218 - config_name: ar-mk splits: - name: train num_bytes: 425995305 num_examples: 2699946 download_size: 216218190 dataset_size: 425995305 - config_name: ar-ml splits: - name: train num_bytes: 63250771 num_examples: 323386 download_size: 28207730 dataset_size: 63250771 - config_name: ar-ms splits: - name: train num_bytes: 206583889 num_examples: 1542856 download_size: 110085370 dataset_size: 206583889 - config_name: ar-nl splits: - name: train num_bytes: 2997566468 num_examples: 21221483 download_size: 1618626545 dataset_size: 2997566468 - config_name: ar-no splits: - name: train num_bytes: 814068826 num_examples: 5954781 download_size: 442560415 dataset_size: 814068826 - config_name: ar-pl splits: - name: train num_bytes: 3359197556 num_examples: 24043342 download_size: 1845147561 dataset_size: 3359197556 - config_name: ar-pt splits: - name: train num_bytes: 2829576795 num_examples: 20343173 download_size: 1533941738 dataset_size: 2829576795 - config_name: ar-ro splits: - name: train num_bytes: 3623932117 num_examples: 26173933 download_size: 1969204810 dataset_size: 3623932117 - config_name: ar-ru splits: - name: train num_bytes: 2433884060 num_examples: 14885701 download_size: 1248164093 dataset_size: 2433884060 - config_name: ar-si splits: - name: train num_bytes: 85928225 num_examples: 483959 download_size: 40260579 dataset_size: 85928225 - config_name: ar-sk splits: - name: train num_bytes: 808043498 num_examples: 5914026 download_size: 449390568 dataset_size: 808043498 - config_name: ar-sl splits: - name: train num_bytes: 1956994640 num_examples: 14469640 download_size: 1079805655 dataset_size: 1956994640 - config_name: ar-sq splits: - name: train num_bytes: 209503571 num_examples: 1548085 download_size: 115066482 dataset_size: 209503571 - config_name: ar-sr splits: - name: train num_bytes: 2919647936 num_examples: 21116415 download_size: 1600128717 dataset_size: 2919647936 - config_name: ar-sv splits: - name: train num_bytes: 1710374910 num_examples: 12276924 download_size: 922290818 dataset_size: 1710374910 - config_name: ar-ta splits: - name: train num_bytes: 4642330 num_examples: 24676 download_size: 2057283 dataset_size: 4642330 - config_name: ar-te splits: - name: train num_bytes: 3452430 num_examples: 19326 download_size: 1618921 dataset_size: 3452430 - config_name: ar-th splits: - name: train num_bytes: 535419296 num_examples: 2947486 download_size: 249322971 dataset_size: 535419296 - config_name: ar-tl splits: - name: train num_bytes: 1013684 num_examples: 7578 download_size: 561191 dataset_size: 1013684 - config_name: ar-tr splits: - name: train num_bytes: 3690474501 num_examples: 26528738 download_size: 1994176882 dataset_size: 3690474501 - config_name: ar-uk splits: - name: train num_bytes: 93216778 num_examples: 591338 download_size: 48444982 dataset_size: 93216778 - config_name: ar-ur splits: - name: train num_bytes: 3875112 num_examples: 25650 download_size: 1997751 dataset_size: 3875112 - config_name: ar-vi splits: - name: train num_bytes: 421228815 num_examples: 2875003 download_size: 221286242 dataset_size: 421228815 - config_name: bg-af splits: - name: train num_bytes: 2676693 num_examples: 18190 download_size: 1476200 dataset_size: 2676693 - config_name: bg-ar splits: - name: train num_bytes: 3725123240 num_examples: 23076891 download_size: 1882687247 dataset_size: 3725123240 - config_name: bg-bn splits: - name: train num_bytes: 53387661 num_examples: 295869 download_size: 24801362 dataset_size: 53387661 - config_name: bg-bs splits: - name: train num_bytes: 1466390998 num_examples: 10563401 download_size: 791853387 dataset_size: 1466390998 - config_name: bg-ca splits: - name: train num_bytes: 52009557 num_examples: 367893 download_size: 27751974 dataset_size: 52009557 - config_name: bg-cs splits: - name: train num_bytes: 4240111613 num_examples: 29956616 download_size: 2280526705 dataset_size: 4240111613 - config_name: bg-da splits: - name: train num_bytes: 1678455917 num_examples: 11922628 download_size: 885990188 dataset_size: 1678455917 - config_name: bg-de splits: - name: train num_bytes: 2184879109 num_examples: 15078722 download_size: 1156134883 dataset_size: 2184879109 - config_name: bg-el splits: - name: train num_bytes: 4701299588 num_examples: 27674913 download_size: 2332443318 dataset_size: 4701299588 - config_name: bg-en splits: - name: train num_bytes: 5605918826 num_examples: 40204338 download_size: 2930379054 dataset_size: 5605918826 - config_name: bg-es splits: - name: train num_bytes: 5010318451 num_examples: 34980373 download_size: 2646356888 dataset_size: 5010318451 - config_name: bg-et splits: - name: train num_bytes: 1490926699 num_examples: 10793546 download_size: 799813964 dataset_size: 1490926699 - config_name: bg-eu splits: - name: train num_bytes: 82797615 num_examples: 605100 download_size: 44721568 dataset_size: 82797615 - config_name: bg-fa splits: - name: train num_bytes: 790775090 num_examples: 4858857 download_size: 396217345 dataset_size: 790775090 - config_name: bg-fi splits: - name: train num_bytes: 2986401978 num_examples: 20723534 download_size: 1583985961 dataset_size: 2986401978 - config_name: bg-fr splits: - name: train num_bytes: 3685021850 num_examples: 25448304 download_size: 1938105262 dataset_size: 3685021850 - config_name: bg-gl splits: - name: train num_bytes: 9782016 num_examples: 69047 download_size: 5322244 dataset_size: 9782016 - config_name: bg-he splits: - name: train num_bytes: 3603224502 num_examples: 22887401 download_size: 1795565813 dataset_size: 3603224502 - config_name: bg-hi splits: - name: train num_bytes: 10832360 num_examples: 58902 download_size: 4997481 dataset_size: 10832360 - config_name: bg-hr splits: - name: train num_bytes: 3445341125 num_examples: 24640509 download_size: 1846791655 dataset_size: 3445341125 - config_name: bg-hu splits: - name: train num_bytes: 4258070135 num_examples: 29923273 download_size: 2280298751 dataset_size: 4258070135 - config_name: bg-hy splits: - name: train num_bytes: 189659 num_examples: 1119 download_size: 101518 dataset_size: 189659 - config_name: bg-id splits: - name: train num_bytes: 985401445 num_examples: 7017758 download_size: 509187812 dataset_size: 985401445 - config_name: bg-is splits: - name: train num_bytes: 185320318 num_examples: 1356280 download_size: 99892656 dataset_size: 185320318 - config_name: bg-it splits: - name: train num_bytes: 3352642463 num_examples: 23136590 download_size: 1781578123 dataset_size: 3352642463 - config_name: bg-ja splits: - name: train num_bytes: 242458219 num_examples: 1652166 download_size: 126453806 dataset_size: 242458219 - config_name: bg-ka splits: - name: train num_bytes: 32999054 num_examples: 193270 download_size: 14988462 dataset_size: 32999054 - config_name: bg-kk splits: - name: train num_bytes: 465678 num_examples: 3395 download_size: 242222 dataset_size: 465678 - config_name: bg-ko splits: - name: train num_bytes: 155582107 num_examples: 1050636 download_size: 83837542 dataset_size: 155582107 - config_name: bg-lt splits: - name: train num_bytes: 166414335 num_examples: 1247963 download_size: 91754661 dataset_size: 166414335 - config_name: bg-lv splits: - name: train num_bytes: 60581955 num_examples: 454286 download_size: 33299414 dataset_size: 60581955 - config_name: bg-mk splits: - name: train num_bytes: 478221322 num_examples: 3017933 download_size: 237086711 dataset_size: 478221322 - config_name: bg-ml splits: - name: train num_bytes: 56787827 num_examples: 271301 download_size: 24989585 dataset_size: 56787827 - config_name: bg-ms splits: - name: train num_bytes: 203822838 num_examples: 1497927 download_size: 106058627 dataset_size: 203822838 - config_name: bg-nl splits: - name: train num_bytes: 3816167220 num_examples: 26311194 download_size: 2006975358 dataset_size: 3816167220 - config_name: bg-no splits: - name: train num_bytes: 919120538 num_examples: 6615774 download_size: 487168253 dataset_size: 919120538 - config_name: bg-pl splits: - name: train num_bytes: 4401912719 num_examples: 30589171 download_size: 2353745454 dataset_size: 4401912719 - config_name: bg-pt splits: - name: train num_bytes: 3636375619 num_examples: 25536097 download_size: 1916970379 dataset_size: 3636375619 - config_name: bg-ro splits: - name: train num_bytes: 4903954167 num_examples: 34484838 download_size: 2594839742 dataset_size: 4903954167 - config_name: bg-ru splits: - name: train num_bytes: 2879531980 num_examples: 17340821 download_size: 1442195418 dataset_size: 2879531980 - config_name: bg-si splits: - name: train num_bytes: 86234943 num_examples: 460504 download_size: 39669021 dataset_size: 86234943 - config_name: bg-sk splits: - name: train num_bytes: 955986971 num_examples: 6861749 download_size: 517649170 dataset_size: 955986971 - config_name: bg-sl splits: - name: train num_bytes: 2288297404 num_examples: 16597568 download_size: 1229293152 dataset_size: 2288297404 - config_name: bg-sq splits: - name: train num_bytes: 227359878 num_examples: 1653774 download_size: 121756041 dataset_size: 227359878 - config_name: bg-sr splits: - name: train num_bytes: 4005554245 num_examples: 28306263 download_size: 2137616652 dataset_size: 4005554245 - config_name: bg-sv splits: - name: train num_bytes: 1952701548 num_examples: 13699784 download_size: 1027999551 dataset_size: 1952701548 - config_name: bg-ta splits: - name: train num_bytes: 4192067 num_examples: 20905 download_size: 1832897 dataset_size: 4192067 - config_name: bg-te splits: - name: train num_bytes: 3270138 num_examples: 17143 download_size: 1519653 dataset_size: 3270138 - config_name: bg-th splits: - name: train num_bytes: 536535872 num_examples: 2727606 download_size: 244997814 dataset_size: 536535872 - config_name: bg-tl splits: - name: train num_bytes: 993947 num_examples: 6685 download_size: 544467 dataset_size: 993947 - config_name: bg-tr splits: - name: train num_bytes: 4424357819 num_examples: 31075655 download_size: 2331361333 dataset_size: 4424357819 - config_name: bg-uk splits: - name: train num_bytes: 105467853 num_examples: 657579 download_size: 53539970 dataset_size: 105467853 - config_name: bg-ur splits: - name: train num_bytes: 3564972 num_examples: 21660 download_size: 1815192 dataset_size: 3564972 - config_name: bg-vi splits: - name: train num_bytes: 429035515 num_examples: 2855128 download_size: 220304596 dataset_size: 429035515 - config_name: bn-af splits: - name: train num_bytes: 213800 num_examples: 1185 download_size: 110366 dataset_size: 213800 - config_name: bn-ar splits: - name: train num_bytes: 56803253 num_examples: 331701 download_size: 26783584 dataset_size: 56803253 - config_name: bn-bg splits: - name: train num_bytes: 53387661 num_examples: 295869 download_size: 24801362 dataset_size: 53387661 - config_name: bn-bs splits: - name: train num_bytes: 25072603 num_examples: 158164 download_size: 12408012 dataset_size: 25072603 - config_name: bn-ca splits: - name: train num_bytes: 2258636 num_examples: 13220 download_size: 1117772 dataset_size: 2258636 - config_name: bn-cs splits: - name: train num_bytes: 48606134 num_examples: 311485 download_size: 24145438 dataset_size: 48606134 - config_name: bn-da splits: - name: train num_bytes: 42524591 num_examples: 269290 download_size: 20580643 dataset_size: 42524591 - config_name: bn-de splits: - name: train num_bytes: 44064388 num_examples: 277674 download_size: 21493343 dataset_size: 44064388 - config_name: bn-el splits: - name: train num_bytes: 58398223 num_examples: 318118 download_size: 27191946 dataset_size: 58398223 - config_name: bn-en splits: - name: train num_bytes: 61557067 num_examples: 413602 download_size: 29677990 dataset_size: 61557067 - config_name: bn-es splits: - name: train num_bytes: 52286838 num_examples: 333733 download_size: 25516391 dataset_size: 52286838 - config_name: bn-et splits: - name: train num_bytes: 39690488 num_examples: 252675 download_size: 19556097 dataset_size: 39690488 - config_name: bn-eu splits: - name: train num_bytes: 6687774 num_examples: 42420 download_size: 3242825 dataset_size: 6687774 - config_name: bn-fa splits: - name: train num_bytes: 42725655 num_examples: 249761 download_size: 19922037 dataset_size: 42725655 - config_name: bn-fi splits: - name: train num_bytes: 44551341 num_examples: 279351 download_size: 21742104 dataset_size: 44551341 - config_name: bn-fr splits: - name: train num_bytes: 44187141 num_examples: 276210 download_size: 21530347 dataset_size: 44187141 - config_name: bn-gl splits: - name: train num_bytes: 1153952 num_examples: 7062 download_size: 568647 dataset_size: 1153952 - config_name: bn-he splits: - name: train num_bytes: 51619098 num_examples: 302547 download_size: 24060846 dataset_size: 51619098 - config_name: bn-hi splits: - name: train num_bytes: 2517417 num_examples: 12713 download_size: 1086684 dataset_size: 2517417 - config_name: bn-hr splits: - name: train num_bytes: 47244970 num_examples: 303620 download_size: 23322427 dataset_size: 47244970 - config_name: bn-hu splits: - name: train num_bytes: 49308557 num_examples: 317418 download_size: 24418557 dataset_size: 49308557 - config_name: bn-id splits: - name: train num_bytes: 49238140 num_examples: 318214 download_size: 23402049 dataset_size: 49238140 - config_name: bn-is splits: - name: train num_bytes: 6371171 num_examples: 38272 download_size: 3130910 dataset_size: 6371171 - config_name: bn-it splits: - name: train num_bytes: 41447909 num_examples: 261904 download_size: 20332175 dataset_size: 41447909 - config_name: bn-ja splits: - name: train num_bytes: 20957102 num_examples: 129721 download_size: 10095911 dataset_size: 20957102 - config_name: bn-ka splits: - name: train num_bytes: 1660197 num_examples: 8681 download_size: 726062 dataset_size: 1660197 - config_name: bn-ko splits: - name: train num_bytes: 12309598 num_examples: 78656 download_size: 6094055 dataset_size: 12309598 - config_name: bn-lt splits: - name: train num_bytes: 15141512 num_examples: 96242 download_size: 7551474 dataset_size: 15141512 - config_name: bn-lv splits: - name: train num_bytes: 6292016 num_examples: 41206 download_size: 3115498 dataset_size: 6292016 - config_name: bn-mk splits: - name: train num_bytes: 29823897 num_examples: 164359 download_size: 13832814 dataset_size: 29823897 - config_name: bn-ml splits: - name: train num_bytes: 14630579 num_examples: 68197 download_size: 6092327 dataset_size: 14630579 - config_name: bn-ms splits: - name: train num_bytes: 29748935 num_examples: 191371 download_size: 14143297 dataset_size: 29748935 - config_name: bn-nl splits: - name: train num_bytes: 45017780 num_examples: 279962 download_size: 21852207 dataset_size: 45017780 - config_name: bn-no splits: - name: train num_bytes: 36380592 num_examples: 230949 download_size: 17643199 dataset_size: 36380592 - config_name: bn-pl splits: - name: train num_bytes: 48871457 num_examples: 308384 download_size: 24116776 dataset_size: 48871457 - config_name: bn-pt splits: - name: train num_bytes: 47796750 num_examples: 303297 download_size: 23348691 dataset_size: 47796750 - config_name: bn-ro splits: - name: train num_bytes: 51044103 num_examples: 324680 download_size: 25005560 dataset_size: 51044103 - config_name: bn-ru splits: - name: train num_bytes: 46991914 num_examples: 264225 download_size: 22207315 dataset_size: 46991914 - config_name: bn-si splits: - name: train num_bytes: 9399788 num_examples: 47488 download_size: 4113239 dataset_size: 9399788 - config_name: bn-sk splits: - name: train num_bytes: 19230434 num_examples: 123258 download_size: 9605086 dataset_size: 19230434 - config_name: bn-sl splits: - name: train num_bytes: 43206384 num_examples: 278003 download_size: 21297300 dataset_size: 43206384 - config_name: bn-sq splits: - name: train num_bytes: 16728900 num_examples: 103831 download_size: 8197369 dataset_size: 16728900 - config_name: bn-sr splits: - name: train num_bytes: 47351885 num_examples: 288856 download_size: 22999739 dataset_size: 47351885 - config_name: bn-sv splits: - name: train num_bytes: 44280487 num_examples: 275980 download_size: 21415654 dataset_size: 44280487 - config_name: bn-ta splits: - name: train num_bytes: 623419 num_examples: 3195 download_size: 261936 dataset_size: 623419 - config_name: bn-th splits: - name: train num_bytes: 23714054 num_examples: 117126 download_size: 10402379 dataset_size: 23714054 - config_name: bn-tl splits: - name: train num_bytes: 296962 num_examples: 2051 download_size: 150590 dataset_size: 296962 - config_name: bn-tr splits: - name: train num_bytes: 51290410 num_examples: 327016 download_size: 25048848 dataset_size: 51290410 - config_name: bn-uk splits: - name: train num_bytes: 5443253 num_examples: 30784 download_size: 2621900 dataset_size: 5443253 - config_name: bn-ur splits: - name: train num_bytes: 1158849 num_examples: 6656 download_size: 545013 dataset_size: 1158849 - config_name: bn-vi splits: - name: train num_bytes: 36639339 num_examples: 219569 download_size: 17409638 dataset_size: 36639339 - config_name: bs-af splits: - name: train num_bytes: 528288 num_examples: 4301 download_size: 333802 dataset_size: 528288 - config_name: bs-ar splits: - name: train num_bytes: 1207123625 num_examples: 8898484 download_size: 669990581 dataset_size: 1207123625 - config_name: bs-bg splits: - name: train num_bytes: 1466390998 num_examples: 10563401 download_size: 791853387 dataset_size: 1466390998 - config_name: bs-bn splits: - name: train num_bytes: 25072603 num_examples: 158164 download_size: 12408012 dataset_size: 25072603 - config_name: bs-ca splits: - name: train num_bytes: 19174628 num_examples: 162614 download_size: 11430105 dataset_size: 19174628 - config_name: bs-cs splits: - name: train num_bytes: 1291430899 num_examples: 11106509 download_size: 775331761 dataset_size: 1291430899 - config_name: bs-da splits: - name: train num_bytes: 597312536 num_examples: 5170495 download_size: 351408375 dataset_size: 597312536 - config_name: bs-de splits: - name: train num_bytes: 701872288 num_examples: 5848316 download_size: 411665602 dataset_size: 701872288 - config_name: bs-el splits: - name: train num_bytes: 1541378374 num_examples: 10694560 download_size: 832206205 dataset_size: 1541378374 - config_name: bs-en splits: - name: train num_bytes: 1611328744 num_examples: 14041160 download_size: 933742422 dataset_size: 1611328744 - config_name: bs-es splits: - name: train num_bytes: 1473667845 num_examples: 12511518 download_size: 864928228 dataset_size: 1473667845 - config_name: bs-et splits: - name: train num_bytes: 542954187 num_examples: 4758151 download_size: 323950933 dataset_size: 542954187 - config_name: bs-eu splits: - name: train num_bytes: 29512428 num_examples: 260046 download_size: 17703002 dataset_size: 29512428 - config_name: bs-fa splits: - name: train num_bytes: 316129784 num_examples: 2282321 download_size: 172449573 dataset_size: 316129784 - config_name: bs-fi splits: - name: train num_bytes: 931756182 num_examples: 7924786 download_size: 552621966 dataset_size: 931756182 - config_name: bs-fr splits: - name: train num_bytes: 1088530465 num_examples: 9151041 download_size: 637723975 dataset_size: 1088530465 - config_name: bs-gl splits: - name: train num_bytes: 4593381 num_examples: 38761 download_size: 2797117 dataset_size: 4593381 - config_name: bs-he splits: - name: train num_bytes: 1128616959 num_examples: 8603247 download_size: 617122802 dataset_size: 1128616959 - config_name: bs-hi splits: - name: train num_bytes: 4365187 num_examples: 27317 download_size: 2177685 dataset_size: 4365187 - config_name: bs-hr splits: - name: train num_bytes: 1140384114 num_examples: 10476787 download_size: 672084885 dataset_size: 1140384114 - config_name: bs-hu splits: - name: train num_bytes: 1204441141 num_examples: 10315616 download_size: 720105363 dataset_size: 1204441141 - config_name: bs-hy splits: - name: train num_bytes: 7962 num_examples: 71 download_size: 8990 dataset_size: 7962 - config_name: bs-id splits: - name: train num_bytes: 354311060 num_examples: 3038174 download_size: 202408931 dataset_size: 354311060 - config_name: bs-is splits: - name: train num_bytes: 79823224 num_examples: 692517 download_size: 47747110 dataset_size: 79823224 - config_name: bs-it splits: - name: train num_bytes: 945468774 num_examples: 7934886 download_size: 559322827 dataset_size: 945468774 - config_name: bs-ja splits: - name: train num_bytes: 87191652 num_examples: 717200 download_size: 50556829 dataset_size: 87191652 - config_name: bs-ka splits: - name: train num_bytes: 14689224 num_examples: 94897 download_size: 7219698 dataset_size: 14689224 - config_name: bs-kk splits: - name: train num_bytes: 142991 num_examples: 1019 download_size: 83562 dataset_size: 142991 - config_name: bs-ko splits: - name: train num_bytes: 47717701 num_examples: 384696 download_size: 28489370 dataset_size: 47717701 - config_name: bs-lt splits: - name: train num_bytes: 79067945 num_examples: 704369 download_size: 48407755 dataset_size: 79067945 - config_name: bs-lv splits: - name: train num_bytes: 29849186 num_examples: 265709 download_size: 18261146 dataset_size: 29849186 - config_name: bs-mk splits: - name: train num_bytes: 229038925 num_examples: 1698831 download_size: 123374903 dataset_size: 229038925 - config_name: bs-ml splits: - name: train num_bytes: 27367058 num_examples: 145776 download_size: 12706118 dataset_size: 27367058 - config_name: bs-ms splits: - name: train num_bytes: 82700580 num_examples: 724398 download_size: 47616087 dataset_size: 82700580 - config_name: bs-nl splits: - name: train num_bytes: 1154797180 num_examples: 9753423 download_size: 677689849 dataset_size: 1154797180 - config_name: bs-no splits: - name: train num_bytes: 328673210 num_examples: 2885057 download_size: 194207989 dataset_size: 328673210 - config_name: bs-pl splits: - name: train num_bytes: 1275648323 num_examples: 10823330 download_size: 762743643 dataset_size: 1275648323 - config_name: bs-pt splits: - name: train num_bytes: 1110290608 num_examples: 9503027 download_size: 652143395 dataset_size: 1110290608 - config_name: bs-ro splits: - name: train num_bytes: 1420770979 num_examples: 12154625 download_size: 835243980 dataset_size: 1420770979 - config_name: bs-ru splits: - name: train num_bytes: 846376890 num_examples: 5997006 download_size: 462746912 dataset_size: 846376890 - config_name: bs-si splits: - name: train num_bytes: 36193901 num_examples: 219197 download_size: 17777566 dataset_size: 36193901 - config_name: bs-sk splits: - name: train num_bytes: 296138066 num_examples: 2581382 download_size: 178448740 dataset_size: 296138066 - config_name: bs-sl splits: - name: train num_bytes: 774023484 num_examples: 6947809 download_size: 463618907 dataset_size: 774023484 - config_name: bs-sq splits: - name: train num_bytes: 113752846 num_examples: 994688 download_size: 67461324 dataset_size: 113752846 - config_name: bs-sr splits: - name: train num_bytes: 1240461792 num_examples: 11207177 download_size: 725112274 dataset_size: 1240461792 - config_name: bs-sv splits: - name: train num_bytes: 658528949 num_examples: 5637922 download_size: 386326813 dataset_size: 658528949 - config_name: bs-ta splits: - name: train num_bytes: 1980224 num_examples: 11433 download_size: 922998 dataset_size: 1980224 - config_name: bs-te splits: - name: train num_bytes: 1834877 num_examples: 11262 download_size: 906589 dataset_size: 1834877 - config_name: bs-th splits: - name: train num_bytes: 202140440 num_examples: 1167505 download_size: 99087333 dataset_size: 202140440 - config_name: bs-tl splits: - name: train num_bytes: 226647 num_examples: 1648 download_size: 138980 dataset_size: 226647 - config_name: bs-tr splits: - name: train num_bytes: 1333752741 num_examples: 11354396 download_size: 780913167 dataset_size: 1333752741 - config_name: bs-uk splits: - name: train num_bytes: 35389877 num_examples: 257809 download_size: 19644518 dataset_size: 35389877 - config_name: bs-ur splits: - name: train num_bytes: 1385143 num_examples: 9693 download_size: 764586 dataset_size: 1385143 - config_name: bs-vi splits: - name: train num_bytes: 182410345 num_examples: 1449367 download_size: 102878289 dataset_size: 182410345 - config_name: ca-ar splits: - name: train num_bytes: 48245693 num_examples: 349709 download_size: 26493396 dataset_size: 48245693 - config_name: ca-bg splits: - name: train num_bytes: 52009557 num_examples: 367893 download_size: 27751974 dataset_size: 52009557 - config_name: ca-bn splits: - name: train num_bytes: 2258636 num_examples: 13220 download_size: 1117772 dataset_size: 2258636 - config_name: ca-bs splits: - name: train num_bytes: 19174628 num_examples: 162614 download_size: 11430105 dataset_size: 19174628 - config_name: ca-cs splits: - name: train num_bytes: 50199312 num_examples: 427869 download_size: 29580156 dataset_size: 50199312 - config_name: ca-da splits: - name: train num_bytes: 31613797 num_examples: 264759 download_size: 18189080 dataset_size: 31613797 - config_name: ca-de splits: - name: train num_bytes: 37124044 num_examples: 303329 download_size: 21335354 dataset_size: 37124044 - config_name: ca-el splits: - name: train num_bytes: 58295438 num_examples: 397260 download_size: 31008078 dataset_size: 58295438 - config_name: ca-en splits: - name: train num_bytes: 55691267 num_examples: 482598 download_size: 31806471 dataset_size: 55691267 - config_name: ca-es splits: - name: train num_bytes: 56415705 num_examples: 484580 download_size: 32373131 dataset_size: 56415705 - config_name: ca-et splits: - name: train num_bytes: 29206377 num_examples: 248298 download_size: 17158114 dataset_size: 29206377 - config_name: ca-eu splits: - name: train num_bytes: 3466851 num_examples: 29114 download_size: 2035378 dataset_size: 3466851 - config_name: ca-fa splits: - name: train num_bytes: 22213142 num_examples: 156841 download_size: 11914008 dataset_size: 22213142 - config_name: ca-fi splits: - name: train num_bytes: 38747384 num_examples: 323534 download_size: 22555790 dataset_size: 38747384 - config_name: ca-fr splits: - name: train num_bytes: 47423771 num_examples: 392159 download_size: 27287084 dataset_size: 47423771 - config_name: ca-gl splits: - name: train num_bytes: 1960267 num_examples: 16379 download_size: 1157012 dataset_size: 1960267 - config_name: ca-he splits: - name: train num_bytes: 48887291 num_examples: 369068 download_size: 26369119 dataset_size: 48887291 - config_name: ca-hi splits: - name: train num_bytes: 834266 num_examples: 5430 download_size: 398082 dataset_size: 834266 - config_name: ca-hr splits: - name: train num_bytes: 42721456 num_examples: 368058 download_size: 25193360 dataset_size: 42721456 - config_name: ca-hu splits: - name: train num_bytes: 49130221 num_examples: 416729 download_size: 28833679 dataset_size: 49130221 - config_name: ca-id splits: - name: train num_bytes: 26965121 num_examples: 223678 download_size: 15050721 dataset_size: 26965121 - config_name: ca-is splits: - name: train num_bytes: 2713362 num_examples: 22386 download_size: 1638036 dataset_size: 2713362 - config_name: ca-it splits: - name: train num_bytes: 47175349 num_examples: 391293 download_size: 27407000 dataset_size: 47175349 - config_name: ca-ja splits: - name: train num_bytes: 14732331 num_examples: 116270 download_size: 8364225 dataset_size: 14732331 - config_name: ca-ka splits: - name: train num_bytes: 992947 num_examples: 5858 download_size: 473398 dataset_size: 992947 - config_name: ca-ko splits: - name: train num_bytes: 5006944 num_examples: 39433 download_size: 2979399 dataset_size: 5006944 - config_name: ca-lt splits: - name: train num_bytes: 4272009 num_examples: 36118 download_size: 2590152 dataset_size: 4272009 - config_name: ca-lv splits: - name: train num_bytes: 1387340 num_examples: 12919 download_size: 843739 dataset_size: 1387340 - config_name: ca-mk splits: - name: train num_bytes: 17656794 num_examples: 126653 download_size: 9308586 dataset_size: 17656794 - config_name: ca-ml splits: - name: train num_bytes: 1185797 num_examples: 6293 download_size: 553556 dataset_size: 1185797 - config_name: ca-ms splits: - name: train num_bytes: 5489853 num_examples: 46781 download_size: 3047263 dataset_size: 5489853 - config_name: ca-nl splits: - name: train num_bytes: 46043243 num_examples: 382529 download_size: 26512627 dataset_size: 46043243 - config_name: ca-no splits: - name: train num_bytes: 23195107 num_examples: 196221 download_size: 13382938 dataset_size: 23195107 - config_name: ca-pl splits: - name: train num_bytes: 48529888 num_examples: 407987 download_size: 28524973 dataset_size: 48529888 - config_name: ca-pt splits: - name: train num_bytes: 45705264 num_examples: 384142 download_size: 26394305 dataset_size: 45705264 - config_name: ca-ro splits: - name: train num_bytes: 50207827 num_examples: 423896 download_size: 29090640 dataset_size: 50207827 - config_name: ca-ru splits: - name: train num_bytes: 46515636 num_examples: 333735 download_size: 24970370 dataset_size: 46515636 - config_name: ca-si splits: - name: train num_bytes: 1346430 num_examples: 8488 download_size: 669721 dataset_size: 1346430 - config_name: ca-sk splits: - name: train num_bytes: 11843684 num_examples: 103505 download_size: 7086512 dataset_size: 11843684 - config_name: ca-sl splits: - name: train num_bytes: 33857371 num_examples: 290837 download_size: 20012268 dataset_size: 33857371 - config_name: ca-sq splits: - name: train num_bytes: 4616422 num_examples: 38801 download_size: 2711281 dataset_size: 4616422 - config_name: ca-sr splits: - name: train num_bytes: 44348997 num_examples: 372630 download_size: 25902854 dataset_size: 44348997 - config_name: ca-sv splits: - name: train num_bytes: 30067859 num_examples: 249711 download_size: 17325163 dataset_size: 30067859 - config_name: ca-th splits: - name: train num_bytes: 19332023 num_examples: 109978 download_size: 9376721 dataset_size: 19332023 - config_name: ca-tr splits: - name: train num_bytes: 51233240 num_examples: 430642 download_size: 29547526 dataset_size: 51233240 - config_name: ca-uk splits: - name: train num_bytes: 6157907 num_examples: 43827 download_size: 3329240 dataset_size: 6157907 - config_name: ca-vi splits: - name: train num_bytes: 18251372 num_examples: 139798 download_size: 10118589 dataset_size: 18251372 - config_name: cs-af splits: - name: train num_bytes: 2086126 num_examples: 17930 download_size: 1281548 dataset_size: 2086126 - config_name: cs-ar splits: - name: train num_bytes: 3324328869 num_examples: 24068513 download_size: 1838129654 dataset_size: 3324328869 - config_name: cs-bg splits: - name: train num_bytes: 4240111613 num_examples: 29956616 download_size: 2280526705 dataset_size: 4240111613 - config_name: cs-bn splits: - name: train num_bytes: 48606134 num_examples: 311485 download_size: 24145438 dataset_size: 48606134 - config_name: cs-bs splits: - name: train num_bytes: 1291430899 num_examples: 11106509 download_size: 775331761 dataset_size: 1291430899 - config_name: cs-ca splits: - name: train num_bytes: 50199312 num_examples: 427869 download_size: 29580156 dataset_size: 50199312 - config_name: cs-da splits: - name: train num_bytes: 1513500270 num_examples: 12857723 download_size: 886980068 dataset_size: 1513500270 - config_name: cs-de splits: - name: train num_bytes: 1997700123 num_examples: 16380096 download_size: 1165115957 dataset_size: 1997700123 - config_name: cs-el splits: - name: train num_bytes: 4209413789 num_examples: 28822339 download_size: 2263665785 dataset_size: 4209413789 - config_name: cs-en splits: - name: train num_bytes: 4958917143 num_examples: 42346436 download_size: 2859318039 dataset_size: 4958917143 - config_name: cs-es splits: - name: train num_bytes: 4511770874 num_examples: 37430752 download_size: 2632584310 dataset_size: 4511770874 - config_name: cs-et splits: - name: train num_bytes: 1317410029 num_examples: 11354455 download_size: 782933276 dataset_size: 1317410029 - config_name: cs-eu splits: - name: train num_bytes: 78086817 num_examples: 686533 download_size: 46731570 dataset_size: 78086817 - config_name: cs-fa splits: - name: train num_bytes: 734097243 num_examples: 5280170 download_size: 400473315 dataset_size: 734097243 - config_name: cs-fi splits: - name: train num_bytes: 2581423264 num_examples: 21447959 download_size: 1522761753 dataset_size: 2581423264 - config_name: cs-fr splits: - name: train num_bytes: 3342266409 num_examples: 27447126 download_size: 1943360811 dataset_size: 3342266409 - config_name: cs-gl splits: - name: train num_bytes: 11309443 num_examples: 96875 download_size: 6858740 dataset_size: 11309443 - config_name: cs-he splits: - name: train num_bytes: 3280075533 num_examples: 24503253 download_size: 1786583742 dataset_size: 3280075533 - config_name: cs-hi splits: - name: train num_bytes: 9679955 num_examples: 61895 download_size: 4770873 dataset_size: 9679955 - config_name: cs-hr splits: - name: train num_bytes: 3024289366 num_examples: 25833568 download_size: 1800612714 dataset_size: 3024289366 - config_name: cs-hu splits: - name: train num_bytes: 3880458592 num_examples: 32559149 download_size: 2298745342 dataset_size: 3880458592 - config_name: cs-hy splits: - name: train num_bytes: 524997 num_examples: 3263 download_size: 308355 dataset_size: 524997 - config_name: cs-id splits: - name: train num_bytes: 855326519 num_examples: 7261871 download_size: 487112887 dataset_size: 855326519 - config_name: cs-is splits: - name: train num_bytes: 163474316 num_examples: 1433036 download_size: 97115756 dataset_size: 163474316 - config_name: cs-it splits: - name: train num_bytes: 3238436006 num_examples: 26628326 download_size: 1900423982 dataset_size: 3238436006 - config_name: cs-ja splits: - name: train num_bytes: 220884618 num_examples: 1793821 download_size: 127193197 dataset_size: 220884618 - config_name: cs-ka splits: - name: train num_bytes: 25681863 num_examples: 163352 download_size: 12657473 dataset_size: 25681863 - config_name: cs-kk splits: - name: train num_bytes: 143586 num_examples: 1264 download_size: 85825 dataset_size: 143586 - config_name: cs-ko splits: - name: train num_bytes: 148137487 num_examples: 1186980 download_size: 88268017 dataset_size: 148137487 - config_name: cs-lt splits: - name: train num_bytes: 145265360 num_examples: 1307483 download_size: 88496159 dataset_size: 145265360 - config_name: cs-lv splits: - name: train num_bytes: 53887385 num_examples: 482427 download_size: 32754813 dataset_size: 53887385 - config_name: cs-mk splits: - name: train num_bytes: 405932340 num_examples: 2962846 download_size: 219285568 dataset_size: 405932340 - config_name: cs-ml splits: - name: train num_bytes: 54499808 num_examples: 294981 download_size: 25363536 dataset_size: 54499808 - config_name: cs-ms splits: - name: train num_bytes: 180186908 num_examples: 1576665 download_size: 103225596 dataset_size: 180186908 - config_name: cs-nl splits: - name: train num_bytes: 3323074342 num_examples: 27395678 download_size: 1938315831 dataset_size: 3323074342 - config_name: cs-no splits: - name: train num_bytes: 825472787 num_examples: 7135203 download_size: 485832305 dataset_size: 825472787 - config_name: cs-pl splits: - name: train num_bytes: 3975557611 num_examples: 33029487 download_size: 2357243437 dataset_size: 3975557611 - config_name: cs-pt splits: - name: train num_bytes: 3030385756 num_examples: 25436263 download_size: 1768515174 dataset_size: 3030385756 - config_name: cs-ro splits: - name: train num_bytes: 4120752955 num_examples: 34478606 download_size: 2410165855 dataset_size: 4120752955 - config_name: cs-ru splits: - name: train num_bytes: 2775775666 num_examples: 19402678 download_size: 1508345822 dataset_size: 2775775666 - config_name: cs-si splits: - name: train num_bytes: 74604851 num_examples: 454257 download_size: 36670813 dataset_size: 74604851 - config_name: cs-sk splits: - name: train num_bytes: 809217151 num_examples: 7060065 download_size: 482587712 dataset_size: 809217151 - config_name: cs-sl splits: - name: train num_bytes: 1908319335 num_examples: 16605949 download_size: 1140671701 dataset_size: 1908319335 - config_name: cs-sq splits: - name: train num_bytes: 192672998 num_examples: 1668326 download_size: 113932942 dataset_size: 192672998 - config_name: cs-sr splits: - name: train num_bytes: 3357068079 num_examples: 28272036 download_size: 1988355456 dataset_size: 3357068079 - config_name: cs-sv splits: - name: train num_bytes: 1772898290 num_examples: 14880023 download_size: 1035154954 dataset_size: 1772898290 - config_name: cs-ta splits: - name: train num_bytes: 3432879 num_examples: 20340 download_size: 1589205 dataset_size: 3432879 - config_name: cs-te splits: - name: train num_bytes: 2963070 num_examples: 18718 download_size: 1450631 dataset_size: 2963070 - config_name: cs-th splits: - name: train num_bytes: 490630442 num_examples: 2846060 download_size: 240534009 dataset_size: 490630442 - config_name: cs-tl splits: - name: train num_bytes: 873459 num_examples: 7439 download_size: 526885 dataset_size: 873459 - config_name: cs-tr splits: - name: train num_bytes: 3905507788 num_examples: 32632447 download_size: 2272977883 dataset_size: 3905507788 - config_name: cs-uk splits: - name: train num_bytes: 100339267 num_examples: 730804 download_size: 55410782 dataset_size: 100339267 - config_name: cs-ur splits: - name: train num_bytes: 3228588 num_examples: 23035 download_size: 1788810 dataset_size: 3228588 - config_name: cs-vi splits: - name: train num_bytes: 385622743 num_examples: 3040137 download_size: 216864357 dataset_size: 385622743 - config_name: da-af splits: - name: train num_bytes: 1498402 num_examples: 12525 download_size: 889248 dataset_size: 1498402 - config_name: da-ar splits: - name: train num_bytes: 1481372752 num_examples: 10762663 download_size: 802170547 dataset_size: 1481372752 - config_name: da-bg splits: - name: train num_bytes: 1678455917 num_examples: 11922628 download_size: 885990188 dataset_size: 1678455917 - config_name: da-bn splits: - name: train num_bytes: 42524591 num_examples: 269290 download_size: 20580643 dataset_size: 42524591 - config_name: da-bs splits: - name: train num_bytes: 597312536 num_examples: 5170495 download_size: 351408375 dataset_size: 597312536 - config_name: da-ca splits: - name: train num_bytes: 31613797 num_examples: 264759 download_size: 18189080 dataset_size: 31613797 - config_name: da-cs splits: - name: train num_bytes: 1513500270 num_examples: 12857723 download_size: 886980068 dataset_size: 1513500270 - config_name: da-de splits: - name: train num_bytes: 999508741 num_examples: 8321258 download_size: 569978195 dataset_size: 999508741 - config_name: da-el splits: - name: train num_bytes: 1774096541 num_examples: 12175439 download_size: 936200935 dataset_size: 1774096541 - config_name: da-en splits: - name: train num_bytes: 1684384739 num_examples: 14474569 download_size: 954048526 dataset_size: 1684384739 - config_name: da-es splits: - name: train num_bytes: 1596813159 num_examples: 13468576 download_size: 915454273 dataset_size: 1596813159 - config_name: da-et splits: - name: train num_bytes: 819966895 num_examples: 7096294 download_size: 476413763 dataset_size: 819966895 - config_name: da-eu splits: - name: train num_bytes: 48374689 num_examples: 428242 download_size: 28235592 dataset_size: 48374689 - config_name: da-fa splits: - name: train num_bytes: 520103141 num_examples: 3723055 download_size: 276464918 dataset_size: 520103141 - config_name: da-fi splits: - name: train num_bytes: 1435435794 num_examples: 12434485 download_size: 822270589 dataset_size: 1435435794 - config_name: da-fr splits: - name: train num_bytes: 1311015663 num_examples: 10961829 download_size: 747814349 dataset_size: 1311015663 - config_name: da-gl splits: - name: train num_bytes: 5081892 num_examples: 42876 download_size: 3005975 dataset_size: 5081892 - config_name: da-he splits: - name: train num_bytes: 1460114748 num_examples: 10992441 download_size: 779437750 dataset_size: 1460114748 - config_name: da-hi splits: - name: train num_bytes: 8250390 num_examples: 51829 download_size: 3984917 dataset_size: 8250390 - config_name: da-hr splits: - name: train num_bytes: 1315370139 num_examples: 11400846 download_size: 768447855 dataset_size: 1315370139 - config_name: da-hu splits: - name: train num_bytes: 1410868629 num_examples: 11972539 download_size: 823174488 dataset_size: 1410868629 - config_name: da-id splits: - name: train num_bytes: 573521735 num_examples: 4844433 download_size: 319235326 dataset_size: 573521735 - config_name: da-is splits: - name: train num_bytes: 142283151 num_examples: 1232620 download_size: 82440528 dataset_size: 142283151 - config_name: da-it splits: - name: train num_bytes: 1179129267 num_examples: 9834003 download_size: 679224508 dataset_size: 1179129267 - config_name: da-ja splits: - name: train num_bytes: 168347839 num_examples: 1381611 download_size: 94322518 dataset_size: 168347839 - config_name: da-ka splits: - name: train num_bytes: 21512391 num_examples: 135036 download_size: 10282056 dataset_size: 21512391 - config_name: da-kk splits: - name: train num_bytes: 422750 num_examples: 3447 download_size: 232114 dataset_size: 422750 - config_name: da-ko splits: - name: train num_bytes: 87164739 num_examples: 705508 download_size: 50687872 dataset_size: 87164739 - config_name: da-lt splits: - name: train num_bytes: 123680581 num_examples: 1094095 download_size: 73808231 dataset_size: 123680581 - config_name: da-lv splits: - name: train num_bytes: 45116352 num_examples: 400915 download_size: 26817997 dataset_size: 45116352 - config_name: da-mk splits: - name: train num_bytes: 323793502 num_examples: 2329327 download_size: 170531418 dataset_size: 323793502 - config_name: da-ml splits: - name: train num_bytes: 37961114 num_examples: 201499 download_size: 17321019 dataset_size: 37961114 - config_name: da-ms splits: - name: train num_bytes: 144576253 num_examples: 1247971 download_size: 80934783 dataset_size: 144576253 - config_name: da-nl splits: - name: train num_bytes: 1437483648 num_examples: 12208535 download_size: 818731226 dataset_size: 1437483648 - config_name: da-no splits: - name: train num_bytes: 742282904 num_examples: 6690423 download_size: 419948754 dataset_size: 742282904 - config_name: da-pl splits: - name: train num_bytes: 1443008450 num_examples: 12197406 download_size: 840550547 dataset_size: 1443008450 - config_name: da-pt splits: - name: train num_bytes: 1406981386 num_examples: 11989009 download_size: 804462230 dataset_size: 1406981386 - config_name: da-ro splits: - name: train num_bytes: 1554969828 num_examples: 13120495 download_size: 894137585 dataset_size: 1554969828 - config_name: da-ru splits: - name: train num_bytes: 1082634005 num_examples: 7543012 download_size: 578802280 dataset_size: 1082634005 - config_name: da-si splits: - name: train num_bytes: 59345052 num_examples: 358142 download_size: 28476367 dataset_size: 59345052 - config_name: da-sk splits: - name: train num_bytes: 404552954 num_examples: 3464136 download_size: 237985927 dataset_size: 404552954 - config_name: da-sl splits: - name: train num_bytes: 1062503824 num_examples: 9368478 download_size: 621255310 dataset_size: 1062503824 - config_name: da-sq splits: - name: train num_bytes: 154650977 num_examples: 1328813 download_size: 89388178 dataset_size: 154650977 - config_name: da-sr splits: - name: train num_bytes: 1348248860 num_examples: 11444039 download_size: 781595266 dataset_size: 1348248860 - config_name: da-sv splits: - name: train num_bytes: 1146106544 num_examples: 10048583 download_size: 648566400 dataset_size: 1146106544 - config_name: da-ta splits: - name: train num_bytes: 3180684 num_examples: 18369 download_size: 1445542 dataset_size: 3180684 - config_name: da-te splits: - name: train num_bytes: 2847730 num_examples: 17496 download_size: 1373002 dataset_size: 2847730 - config_name: da-th splits: - name: train num_bytes: 310445820 num_examples: 1794942 download_size: 148396298 dataset_size: 310445820 - config_name: da-tl splits: - name: train num_bytes: 328178 num_examples: 3059 download_size: 194393 dataset_size: 328178 - config_name: da-tr splits: - name: train num_bytes: 1531776455 num_examples: 12888418 download_size: 875526783 dataset_size: 1531776455 - config_name: da-uk splits: - name: train num_bytes: 54998950 num_examples: 396040 download_size: 29855367 dataset_size: 54998950 - config_name: da-ur splits: - name: train num_bytes: 2628484 num_examples: 18276 download_size: 1416881 dataset_size: 2628484 - config_name: da-vi splits: - name: train num_bytes: 284588156 num_examples: 2223616 download_size: 156155759 dataset_size: 284588156 - config_name: de-af splits: - name: train num_bytes: 2344839 num_examples: 19279 download_size: 1400572 dataset_size: 2344839 - config_name: de-ar splits: - name: train num_bytes: 1765920759 num_examples: 12439023 download_size: 958466999 dataset_size: 1765920759 - config_name: de-bg splits: - name: train num_bytes: 2184879109 num_examples: 15078722 download_size: 1156134883 dataset_size: 2184879109 - config_name: de-bn splits: - name: train num_bytes: 44064388 num_examples: 277674 download_size: 21493343 dataset_size: 44064388 - config_name: de-bs splits: - name: train num_bytes: 701872288 num_examples: 5848316 download_size: 411665602 dataset_size: 701872288 - config_name: de-ca splits: - name: train num_bytes: 37124044 num_examples: 303329 download_size: 21335354 dataset_size: 37124044 - config_name: de-cs splits: - name: train num_bytes: 1997700123 num_examples: 16380096 download_size: 1165115957 dataset_size: 1997700123 - config_name: de-da splits: - name: train num_bytes: 999508741 num_examples: 8321258 download_size: 569978195 dataset_size: 999508741 - config_name: de-el splits: - name: train num_bytes: 2370542861 num_examples: 15883581 download_size: 1252883216 dataset_size: 2370542861 - config_name: de-en splits: - name: train num_bytes: 2697913029 num_examples: 22512639 download_size: 1523253452 dataset_size: 2697913029 - config_name: de-es splits: - name: train num_bytes: 2327844818 num_examples: 18951214 download_size: 1331205765 dataset_size: 2327844818 - config_name: de-et splits: - name: train num_bytes: 827570421 num_examples: 6907155 download_size: 480808673 dataset_size: 827570421 - config_name: de-eu splits: - name: train num_bytes: 61697455 num_examples: 534929 download_size: 35978773 dataset_size: 61697455 - config_name: de-fa splits: - name: train num_bytes: 527197307 num_examples: 3672356 download_size: 281666934 dataset_size: 527197307 - config_name: de-fi splits: - name: train num_bytes: 1619305587 num_examples: 13212027 download_size: 928756254 dataset_size: 1619305587 - config_name: de-fr splits: - name: train num_bytes: 1977954169 num_examples: 15994801 download_size: 1124495566 dataset_size: 1977954169 - config_name: de-gl splits: - name: train num_bytes: 9238905 num_examples: 77160 download_size: 5447997 dataset_size: 9238905 - config_name: de-he splits: - name: train num_bytes: 1745743632 num_examples: 12751222 download_size: 932940603 dataset_size: 1745743632 - config_name: de-hi splits: - name: train num_bytes: 9081927 num_examples: 56047 download_size: 4407106 dataset_size: 9081927 - config_name: de-hr splits: - name: train num_bytes: 1655626799 num_examples: 13798486 download_size: 963557732 dataset_size: 1655626799 - config_name: de-hu splits: - name: train num_bytes: 1929800058 num_examples: 15764968 download_size: 1121269163 dataset_size: 1929800058 - config_name: de-hy splits: - name: train num_bytes: 365023 num_examples: 2057 download_size: 215829 dataset_size: 365023 - config_name: de-id splits: - name: train num_bytes: 572278171 num_examples: 4668250 download_size: 319120129 dataset_size: 572278171 - config_name: de-is splits: - name: train num_bytes: 124447746 num_examples: 1063594 download_size: 72329044 dataset_size: 124447746 - config_name: de-it splits: - name: train num_bytes: 1675546221 num_examples: 13566188 download_size: 961354594 dataset_size: 1675546221 - config_name: de-ja splits: - name: train num_bytes: 184347104 num_examples: 1447086 download_size: 103584890 dataset_size: 184347104 - config_name: de-ka splits: - name: train num_bytes: 19636234 num_examples: 123121 download_size: 9475825 dataset_size: 19636234 - config_name: de-kk splits: - name: train num_bytes: 469048 num_examples: 3715 download_size: 257788 dataset_size: 469048 - config_name: de-ko splits: - name: train num_bytes: 96371270 num_examples: 750914 download_size: 56195729 dataset_size: 96371270 - config_name: de-lt splits: - name: train num_bytes: 111711755 num_examples: 975756 download_size: 66594609 dataset_size: 111711755 - config_name: de-lv splits: - name: train num_bytes: 45899031 num_examples: 401817 download_size: 27191631 dataset_size: 45899031 - config_name: de-mk splits: - name: train num_bytes: 297927753 num_examples: 2097352 download_size: 157682179 dataset_size: 297927753 - config_name: de-ml splits: - name: train num_bytes: 45593227 num_examples: 244269 download_size: 20973806 dataset_size: 45593227 - config_name: de-ms splits: - name: train num_bytes: 141273355 num_examples: 1194238 download_size: 79321363 dataset_size: 141273355 - config_name: de-nl splits: - name: train num_bytes: 1925454169 num_examples: 15665535 download_size: 1095338277 dataset_size: 1925454169 - config_name: de-no splits: - name: train num_bytes: 636738826 num_examples: 5383006 download_size: 364233509 dataset_size: 636738826 - config_name: de-pl splits: - name: train num_bytes: 1983979187 num_examples: 16076178 download_size: 1151054584 dataset_size: 1983979187 - config_name: de-pt splits: - name: train num_bytes: 1795134904 num_examples: 14737184 download_size: 1023639421 dataset_size: 1795134904 - config_name: de-ro splits: - name: train num_bytes: 2162906086 num_examples: 17591098 download_size: 1240337422 dataset_size: 2162906086 - config_name: de-ru splits: - name: train num_bytes: 1491789685 num_examples: 10191086 download_size: 798589305 dataset_size: 1491789685 - config_name: de-si splits: - name: train num_bytes: 55220250 num_examples: 324862 download_size: 26729654 dataset_size: 55220250 - config_name: de-sk splits: - name: train num_bytes: 456501829 num_examples: 3764288 download_size: 268030078 dataset_size: 456501829 - config_name: de-sl splits: - name: train num_bytes: 1161043365 num_examples: 9827875 download_size: 678336472 dataset_size: 1161043365 - config_name: de-sq splits: - name: train num_bytes: 129442137 num_examples: 1073832 download_size: 75337053 dataset_size: 129442137 - config_name: de-sr splits: - name: train num_bytes: 1805114230 num_examples: 14843597 download_size: 1047418207 dataset_size: 1805114230 - config_name: de-sv splits: - name: train num_bytes: 1171363604 num_examples: 9657407 download_size: 667069408 dataset_size: 1171363604 - config_name: de-ta splits: - name: train num_bytes: 3009325 num_examples: 17376 download_size: 1374679 dataset_size: 3009325 - config_name: de-te splits: - name: train num_bytes: 2004450 num_examples: 12811 download_size: 972183 dataset_size: 2004450 - config_name: de-th splits: - name: train num_bytes: 296889985 num_examples: 1670586 download_size: 143164534 dataset_size: 296889985 - config_name: de-tl splits: - name: train num_bytes: 1550113 num_examples: 12913 download_size: 854519 dataset_size: 1550113 - config_name: de-tr splits: - name: train num_bytes: 1780813074 num_examples: 17205364 download_size: 1204247178 dataset_size: 1780813074 - config_name: de-uk splits: - name: train num_bytes: 64032415 num_examples: 454610 download_size: 34715937 dataset_size: 64032415 - config_name: de-ur splits: - name: train num_bytes: 2715028 num_examples: 18678 download_size: 1472368 dataset_size: 2715028 - config_name: de-vi splits: - name: train num_bytes: 299917788 num_examples: 2284697 download_size: 165073287 dataset_size: 299917788 - config_name: el-af splits: - name: train num_bytes: 4304695 num_examples: 29832 download_size: 2353786 dataset_size: 4304695 - config_name: el-ar splits: - name: train num_bytes: 3745963243 num_examples: 22468462 download_size: 1898849733 dataset_size: 3745963243 - config_name: el-bg splits: - name: train num_bytes: 4701299588 num_examples: 27674913 download_size: 2332443318 dataset_size: 4701299588 - config_name: el-bn splits: - name: train num_bytes: 58398223 num_examples: 318118 download_size: 27191946 dataset_size: 58398223 - config_name: el-bs splits: - name: train num_bytes: 1541378374 num_examples: 10694560 download_size: 832206205 dataset_size: 1541378374 - config_name: el-ca splits: - name: train num_bytes: 58295438 num_examples: 397260 download_size: 31008078 dataset_size: 58295438 - config_name: el-cs splits: - name: train num_bytes: 4209413789 num_examples: 28822339 download_size: 2263665785 dataset_size: 4209413789 - config_name: el-da splits: - name: train num_bytes: 1774096541 num_examples: 12175439 download_size: 936200935 dataset_size: 1774096541 - config_name: el-de splits: - name: train num_bytes: 2370542861 num_examples: 15883581 download_size: 1252883216 dataset_size: 2370542861 - config_name: el-en splits: - name: train num_bytes: 5828346305 num_examples: 40492942 download_size: 3047056130 dataset_size: 5828346305 - config_name: el-es splits: - name: train num_bytes: 5282611464 num_examples: 35745989 download_size: 2793349576 dataset_size: 5282611464 - config_name: el-et splits: - name: train num_bytes: 1479690639 num_examples: 10344538 download_size: 793686483 dataset_size: 1479690639 - config_name: el-eu splits: - name: train num_bytes: 94941038 num_examples: 673934 download_size: 51057816 dataset_size: 94941038 - config_name: el-fa splits: - name: train num_bytes: 861395572 num_examples: 5135640 download_size: 432199144 dataset_size: 861395572 - config_name: el-fi splits: - name: train num_bytes: 3058356549 num_examples: 20522413 download_size: 1622412788 dataset_size: 3058356549 - config_name: el-fr splits: - name: train num_bytes: 4087191945 num_examples: 27288201 download_size: 2150547430 dataset_size: 4087191945 - config_name: el-gl splits: - name: train num_bytes: 15637046 num_examples: 105495 download_size: 8532242 dataset_size: 15637046 - config_name: el-he splits: - name: train num_bytes: 3566328893 num_examples: 22042563 download_size: 1782039798 dataset_size: 3566328893 - config_name: el-hi splits: - name: train num_bytes: 11434968 num_examples: 62014 download_size: 5291694 dataset_size: 11434968 - config_name: el-hr splits: - name: train num_bytes: 3546784465 num_examples: 24498188 download_size: 1901454018 dataset_size: 3546784465 - config_name: el-hu splits: - name: train num_bytes: 3593206690 num_examples: 28253647 download_size: 2222302392 dataset_size: 3593206690 - config_name: el-hy splits: - name: train num_bytes: 735920 num_examples: 3235 download_size: 350099 dataset_size: 735920 - config_name: el-id splits: - name: train num_bytes: 1207943843 num_examples: 7095656 download_size: 535432550 dataset_size: 1207943843 - config_name: el-is splits: - name: train num_bytes: 229896228 num_examples: 1393628 download_size: 105702340 dataset_size: 229896228 - config_name: el-it splits: - name: train num_bytes: 4006127637 num_examples: 23064600 download_size: 1832561012 dataset_size: 4006127637 - config_name: el-ja splits: - name: train num_bytes: 309569068 num_examples: 1740082 download_size: 139182373 dataset_size: 309569068 - config_name: el-ka splits: - name: train num_bytes: 35346282 num_examples: 167388 download_size: 14414629 dataset_size: 35346282 - config_name: el-kk splits: - name: train num_bytes: 383466 num_examples: 2333 download_size: 173224 dataset_size: 383466 - config_name: el-ko splits: - name: train num_bytes: 201457405 num_examples: 1129226 download_size: 93368158 dataset_size: 201457405 - config_name: el-lt splits: - name: train num_bytes: 207497198 num_examples: 1277881 download_size: 97033589 dataset_size: 207497198 - config_name: el-lv splits: - name: train num_bytes: 75055620 num_examples: 458340 download_size: 35113730 dataset_size: 75055620 - config_name: el-mk splits: - name: train num_bytes: 562546442 num_examples: 2962520 download_size: 244467122 dataset_size: 562546442 - config_name: el-ml splits: - name: train num_bytes: 72077898 num_examples: 302851 download_size: 28584756 dataset_size: 72077898 - config_name: el-ms splits: - name: train num_bytes: 256485741 num_examples: 1547627 download_size: 113766025 dataset_size: 256485741 - config_name: el-nl splits: - name: train num_bytes: 4553680280 num_examples: 26138141 download_size: 2065117639 dataset_size: 4553680280 - config_name: el-no splits: - name: train num_bytes: 1155041215 num_examples: 6850704 download_size: 524232028 dataset_size: 1155041215 - config_name: el-pl splits: - name: train num_bytes: 4321466672 num_examples: 29058119 download_size: 2312266659 dataset_size: 4321466672 - config_name: el-pt splits: - name: train num_bytes: 3629792970 num_examples: 24845250 download_size: 1914885055 dataset_size: 3629792970 - config_name: el-ro splits: - name: train num_bytes: 5743725087 num_examples: 33491396 download_size: 2615079473 dataset_size: 5743725087 - config_name: el-ru splits: - name: train num_bytes: 3356430222 num_examples: 17008094 download_size: 1481156139 dataset_size: 3356430222 - config_name: el-si splits: - name: train num_bytes: 101008167 num_examples: 466440 download_size: 41444085 dataset_size: 101008167 - config_name: el-sk splits: - name: train num_bytes: 1119152328 num_examples: 6600893 download_size: 519574339 dataset_size: 1119152328 - config_name: el-sl splits: - name: train num_bytes: 2641161518 num_examples: 15731333 download_size: 1214813209 dataset_size: 2641161518 - config_name: el-sq splits: - name: train num_bytes: 281729405 num_examples: 1689873 download_size: 128591842 dataset_size: 281729405 - config_name: el-sr splits: - name: train num_bytes: 4773576405 num_examples: 27873774 download_size: 2191070533 dataset_size: 4773576405 - config_name: el-sv splits: - name: train num_bytes: 2369270401 num_examples: 13735943 download_size: 1072737721 dataset_size: 2369270401 - config_name: el-ta splits: - name: train num_bytes: 4592198 num_examples: 20440 download_size: 1801465 dataset_size: 4592198 - config_name: el-te splits: - name: train num_bytes: 3883126 num_examples: 18096 download_size: 1603179 dataset_size: 3883126 - config_name: el-th splits: - name: train num_bytes: 573575076 num_examples: 2505706 download_size: 235809496 dataset_size: 573575076 - config_name: el-tl splits: - name: train num_bytes: 1711021 num_examples: 10131 download_size: 772593 dataset_size: 1711021 - config_name: el-tr splits: - name: train num_bytes: 5329964195 num_examples: 31032881 download_size: 2416706054 dataset_size: 5329964195 - config_name: el-uk splits: - name: train num_bytes: 125784250 num_examples: 658201 download_size: 55910575 dataset_size: 125784250 - config_name: el-ur splits: - name: train num_bytes: 3816110 num_examples: 23072 download_size: 1945516 dataset_size: 3816110 - config_name: el-vi splits: - name: train num_bytes: 458881731 num_examples: 2966137 download_size: 235713513 dataset_size: 458881731 - config_name: en-af splits: - name: train num_bytes: 5129965 num_examples: 44703 download_size: 3057666 dataset_size: 5129965 - config_name: en-ar splits: - name: train num_bytes: 4041284109 num_examples: 29823188 download_size: 2168666470 dataset_size: 4041284109 - config_name: en-bg splits: - name: train num_bytes: 5605918826 num_examples: 40204338 download_size: 2930379054 dataset_size: 5605918826 - config_name: en-bn splits: - name: train num_bytes: 61557067 num_examples: 413602 download_size: 29677990 dataset_size: 61557067 - config_name: en-bs splits: - name: train num_bytes: 1611328744 num_examples: 14041160 download_size: 933742422 dataset_size: 1611328744 - config_name: en-ca splits: - name: train num_bytes: 55691267 num_examples: 482598 download_size: 31806471 dataset_size: 55691267 - config_name: en-cs splits: - name: train num_bytes: 4958917143 num_examples: 42346436 download_size: 2859318039 dataset_size: 4958917143 - config_name: en-da splits: - name: train num_bytes: 1684384739 num_examples: 14474569 download_size: 954048526 dataset_size: 1684384739 - config_name: en-de splits: - name: train num_bytes: 2261702432 num_examples: 22512639 download_size: 1523253452 dataset_size: 2261702432 - config_name: en-el splits: - name: train num_bytes: 5031934826 num_examples: 40492942 download_size: 3047056130 dataset_size: 5031934826 - config_name: en-es splits: - name: train num_bytes: 7283153754 num_examples: 61434251 download_size: 4117089280 dataset_size: 7283153754 - config_name: en-et splits: - name: train num_bytes: 1424288445 num_examples: 12486898 download_size: 817432740 dataset_size: 1424288445 - config_name: en-eu splits: - name: train num_bytes: 90529067 num_examples: 805780 download_size: 52312189 dataset_size: 90529067 - config_name: en-fa splits: - name: train num_bytes: 839267322 num_examples: 6198109 download_size: 444546578 dataset_size: 839267322 - config_name: en-fi splits: - name: train num_bytes: 3281488758 num_examples: 27281566 download_size: 1875007453 dataset_size: 3281488758 - config_name: en-fr splits: - name: train num_bytes: 5008106734 num_examples: 41763488 download_size: 2819512415 dataset_size: 5008106734 - config_name: en-gl splits: - name: train num_bytes: 24667369 num_examples: 203550 download_size: 14416268 dataset_size: 24667369 - config_name: en-he splits: - name: train num_bytes: 3937594473 num_examples: 29887386 download_size: 2079882854 dataset_size: 3937594473 - config_name: en-hi splits: - name: train num_bytes: 13845344 num_examples: 93016 download_size: 6595622 dataset_size: 13845344 - config_name: en-hr splits: - name: train num_bytes: 4070450945 num_examples: 35131729 download_size: 2344656938 dataset_size: 4070450945 - config_name: en-hu splits: - name: train num_bytes: 5027017815 num_examples: 42655519 download_size: 2884293234 dataset_size: 5027017815 - config_name: en-hy splits: - name: train num_bytes: 535378 num_examples: 3390 download_size: 304684 dataset_size: 535378 - config_name: en-id splits: - name: train num_bytes: 1055673441 num_examples: 9268181 download_size: 578401178 dataset_size: 1055673441 - config_name: en-is splits: - name: train num_bytes: 179506522 num_examples: 1569189 download_size: 103091458 dataset_size: 179506522 - config_name: en-it splits: - name: train num_bytes: 4240969886 num_examples: 35216229 download_size: 2413457956 dataset_size: 4240969886 - config_name: en-ja splits: - name: train num_bytes: 253948077 num_examples: 2083600 download_size: 141348223 dataset_size: 253948077 - config_name: en-ka splits: - name: train num_bytes: 30705227 num_examples: 199978 download_size: 14731620 dataset_size: 30705227 - config_name: en-kk splits: - name: train num_bytes: 451923 num_examples: 3712 download_size: 247679 dataset_size: 451923 - config_name: en-ko splits: - name: train num_bytes: 171059418 num_examples: 1391190 download_size: 99008025 dataset_size: 171059418 - config_name: en-lt splits: - name: train num_bytes: 155535702 num_examples: 1415961 download_size: 91241899 dataset_size: 155535702 - config_name: en-lv splits: - name: train num_bytes: 57245192 num_examples: 519553 download_size: 33566084 dataset_size: 57245192 - config_name: en-mk splits: - name: train num_bytes: 458454572 num_examples: 3401326 download_size: 240194746 dataset_size: 458454572 - config_name: en-ml splits: - name: train num_bytes: 68004683 num_examples: 386868 download_size: 30848251 dataset_size: 68004683 - config_name: en-ms splits: - name: train num_bytes: 215138272 num_examples: 1928345 download_size: 118514831 dataset_size: 215138272 - config_name: en-nl splits: - name: train num_bytes: 4478308445 num_examples: 37200621 download_size: 2530643758 dataset_size: 4478308445 - config_name: en-no splits: - name: train num_bytes: 995456478 num_examples: 8624996 download_size: 566632578 dataset_size: 995456478 - config_name: en-pl splits: - name: train num_bytes: 5028902061 num_examples: 41998942 download_size: 2892850883 dataset_size: 5028902061 - config_name: en-pt splits: - name: train num_bytes: 3926004451 num_examples: 33222606 download_size: 2218278440 dataset_size: 3926004451 - config_name: en-ro splits: - name: train num_bytes: 5941123947 num_examples: 50693226 download_size: 3361333588 dataset_size: 5941123947 - config_name: en-ru splits: - name: train num_bytes: 3621900455 num_examples: 25910105 download_size: 1912229277 dataset_size: 3621900455 - config_name: en-si splits: - name: train num_bytes: 95095311 num_examples: 601164 download_size: 45540052 dataset_size: 95095311 - config_name: en-sk splits: - name: train num_bytes: 1016735266 num_examples: 8850871 download_size: 590210362 dataset_size: 1016735266 - config_name: en-sl splits: - name: train num_bytes: 2244809408 num_examples: 19641457 download_size: 1298201375 dataset_size: 2244809408 - config_name: en-sq splits: - name: train num_bytes: 215347638 num_examples: 1903990 download_size: 122671967 dataset_size: 215347638 - config_name: en-sr splits: - name: train num_bytes: 4986633944 num_examples: 42635098 download_size: 2859258243 dataset_size: 4986633944 - config_name: en-sv splits: - name: train num_bytes: 2093174228 num_examples: 17660152 download_size: 1183519631 dataset_size: 2093174228 - config_name: en-ta splits: - name: train num_bytes: 5218326 num_examples: 32417 download_size: 2350665 dataset_size: 5218326 - config_name: en-te splits: - name: train num_bytes: 3995120 num_examples: 27222 download_size: 1902804 dataset_size: 3995120 - config_name: en-th splits: - name: train num_bytes: 546907138 num_examples: 3281533 download_size: 261622137 dataset_size: 546907138 - config_name: en-tl splits: - name: train num_bytes: 1779985 num_examples: 16032 download_size: 976424 dataset_size: 1779985 - config_name: en-tr splits: - name: train num_bytes: 5293763367 num_examples: 44986121 download_size: 2981747525 dataset_size: 5293763367 - config_name: en-uk splits: - name: train num_bytes: 117067969 num_examples: 877780 download_size: 62752356 dataset_size: 117067969 - config_name: en-ur splits: - name: train num_bytes: 3860232 num_examples: 29074 download_size: 2058990 dataset_size: 3860232 - config_name: en-vi splits: - name: train num_bytes: 364150103 num_examples: 3505276 download_size: 234779741 dataset_size: 364150103 - config_name: es-af splits: - name: train num_bytes: 4080356 num_examples: 34306 download_size: 2452179 dataset_size: 4080356 - config_name: es-ar splits: - name: train num_bytes: 3713331324 num_examples: 26641247 download_size: 2012418908 dataset_size: 3713331324 - config_name: es-bg splits: - name: train num_bytes: 5010318451 num_examples: 34980373 download_size: 2646356888 dataset_size: 5010318451 - config_name: es-bn splits: - name: train num_bytes: 52286838 num_examples: 333733 download_size: 25516391 dataset_size: 52286838 - config_name: es-bs splits: - name: train num_bytes: 1473667845 num_examples: 12511518 download_size: 864928228 dataset_size: 1473667845 - config_name: es-ca splits: - name: train num_bytes: 56415705 num_examples: 484580 download_size: 32373131 dataset_size: 56415705 - config_name: es-cs splits: - name: train num_bytes: 4511770874 num_examples: 37430752 download_size: 2632584310 dataset_size: 4511770874 - config_name: es-da splits: - name: train num_bytes: 1596813159 num_examples: 13468576 download_size: 915454273 dataset_size: 1596813159 - config_name: es-de splits: - name: train num_bytes: 1962946639 num_examples: 18951214 download_size: 1331205765 dataset_size: 1962946639 - config_name: es-el splits: - name: train num_bytes: 4583385248 num_examples: 35745989 download_size: 2793349576 dataset_size: 4583385248 - config_name: es-en splits: - name: train num_bytes: 7283153754 num_examples: 61434251 download_size: 4117089280 dataset_size: 7283153754 - config_name: es-et splits: - name: train num_bytes: 1362680109 num_examples: 11623628 download_size: 793286587 dataset_size: 1362680109 - config_name: es-eu splits: - name: train num_bytes: 89809568 num_examples: 793593 download_size: 52364503 dataset_size: 89809568 - config_name: es-fa splits: - name: train num_bytes: 780982364 num_examples: 5578242 download_size: 418153498 dataset_size: 780982364 - config_name: es-fi splits: - name: train num_bytes: 2992243747 num_examples: 24504287 download_size: 1727509864 dataset_size: 2992243747 - config_name: es-fr splits: - name: train num_bytes: 4432049983 num_examples: 36142601 download_size: 2523824648 dataset_size: 4432049983 - config_name: es-gl splits: - name: train num_bytes: 26247341 num_examples: 216940 download_size: 15383010 dataset_size: 26247341 - config_name: es-he splits: - name: train num_bytes: 3667468207 num_examples: 27059535 download_size: 1958564684 dataset_size: 3667468207 - config_name: es-hi splits: - name: train num_bytes: 11280529 num_examples: 71209 download_size: 5460637 dataset_size: 11280529 - config_name: es-hr splits: - name: train num_bytes: 3641948935 num_examples: 30622533 download_size: 2124168311 dataset_size: 3641948935 - config_name: es-hu splits: - name: train num_bytes: 4494867342 num_examples: 37100976 download_size: 2609202590 dataset_size: 4494867342 - config_name: es-hy splits: - name: train num_bytes: 554671 num_examples: 3304 download_size: 317793 dataset_size: 554671 - config_name: es-id splits: - name: train num_bytes: 946159723 num_examples: 7971638 download_size: 527208058 dataset_size: 946159723 - config_name: es-is splits: - name: train num_bytes: 172524015 num_examples: 1502402 download_size: 100419287 dataset_size: 172524015 - config_name: es-it splits: - name: train num_bytes: 3765943929 num_examples: 30595034 download_size: 2165528825 dataset_size: 3765943929 - config_name: es-ja splits: - name: train num_bytes: 240567826 num_examples: 1922477 download_size: 135241937 dataset_size: 240567826 - config_name: es-ka splits: - name: train num_bytes: 28745504 num_examples: 181190 download_size: 13931405 dataset_size: 28745504 - config_name: es-kk splits: - name: train num_bytes: 305455 num_examples: 2479 download_size: 173386 dataset_size: 305455 - config_name: es-ko splits: - name: train num_bytes: 152524323 num_examples: 1214195 download_size: 89100823 dataset_size: 152524323 - config_name: es-lt splits: - name: train num_bytes: 150566139 num_examples: 1345855 download_size: 89812715 dataset_size: 150566139 - config_name: es-lv splits: - name: train num_bytes: 55795703 num_examples: 494900 download_size: 33232435 dataset_size: 55795703 - config_name: es-mk splits: - name: train num_bytes: 441995857 num_examples: 3196372 download_size: 234093907 dataset_size: 441995857 - config_name: es-ml splits: - name: train num_bytes: 60628538 num_examples: 327235 download_size: 27796481 dataset_size: 60628538 - config_name: es-ms splits: - name: train num_bytes: 198890804 num_examples: 1722512 download_size: 111352461 dataset_size: 198890804 - config_name: es-nl splits: - name: train num_bytes: 4002246274 num_examples: 32561921 download_size: 2284595130 dataset_size: 4002246274 - config_name: es-no splits: - name: train num_bytes: 924514961 num_examples: 7892621 download_size: 532628997 dataset_size: 924514961 - config_name: es-pl splits: - name: train num_bytes: 4630342379 num_examples: 37756338 download_size: 2691429471 dataset_size: 4630342379 - config_name: es-pt splits: - name: train num_bytes: 3637992322 num_examples: 30294749 download_size: 2076748936 dataset_size: 3637992322 - config_name: es-ro splits: - name: train num_bytes: 5222773855 num_examples: 43203352 download_size: 2993415434 dataset_size: 5222773855 - config_name: es-ru splits: - name: train num_bytes: 3193894825 num_examples: 22084962 download_size: 1703183259 dataset_size: 3193894825 - config_name: es-si splits: - name: train num_bytes: 83786864 num_examples: 512221 download_size: 40580333 dataset_size: 83786864 - config_name: es-sk splits: - name: train num_bytes: 934981285 num_examples: 7895925 download_size: 549355348 dataset_size: 934981285 - config_name: es-sl splits: - name: train num_bytes: 2106853891 num_examples: 18058017 download_size: 1232811547 dataset_size: 2106853891 - config_name: es-sq splits: - name: train num_bytes: 206237301 num_examples: 1774185 download_size: 119322615 dataset_size: 206237301 - config_name: es-sr splits: - name: train num_bytes: 4420284368 num_examples: 36740123 download_size: 2566679673 dataset_size: 4420284368 - config_name: es-sv splits: - name: train num_bytes: 1930096122 num_examples: 16028440 download_size: 1104241656 dataset_size: 1930096122 - config_name: es-ta splits: - name: train num_bytes: 4263517 num_examples: 24956 download_size: 1947242 dataset_size: 4263517 - config_name: es-te splits: - name: train num_bytes: 3438665 num_examples: 21762 download_size: 1664204 dataset_size: 3438665 - config_name: es-th splits: - name: train num_bytes: 513655458 num_examples: 2970255 download_size: 247633561 dataset_size: 513655458 - config_name: es-tl splits: - name: train num_bytes: 1607847 num_examples: 13547 download_size: 893545 dataset_size: 1607847 - config_name: es-tr splits: - name: train num_bytes: 4805023547 num_examples: 39643276 download_size: 2740139443 dataset_size: 4805023547 - config_name: es-uk splits: - name: train num_bytes: 108104017 num_examples: 780116 download_size: 58650002 dataset_size: 108104017 - config_name: es-ur splits: - name: train num_bytes: 3446488 num_examples: 24994 download_size: 1865029 dataset_size: 3446488 - config_name: es-vi splits: - name: train num_bytes: 409738525 num_examples: 3215157 download_size: 225723989 dataset_size: 409738525 - config_name: et-af splits: - name: train num_bytes: 701802 num_examples: 6336 download_size: 432127 dataset_size: 701802 - config_name: et-ar splits: - name: train num_bytes: 1316758719 num_examples: 9692040 download_size: 725414200 dataset_size: 1316758719 - config_name: et-bg splits: - name: train num_bytes: 1490926699 num_examples: 10793546 download_size: 799813964 dataset_size: 1490926699 - config_name: et-bn splits: - name: train num_bytes: 39690488 num_examples: 252675 download_size: 19556097 dataset_size: 39690488 - config_name: et-bs splits: - name: train num_bytes: 542954187 num_examples: 4758151 download_size: 323950933 dataset_size: 542954187 - config_name: et-ca splits: - name: train num_bytes: 29206377 num_examples: 248298 download_size: 17158114 dataset_size: 29206377 - config_name: et-cs splits: - name: train num_bytes: 1317410029 num_examples: 11354455 download_size: 782933276 dataset_size: 1317410029 - config_name: et-da splits: - name: train num_bytes: 819966895 num_examples: 7096294 download_size: 476413763 dataset_size: 819966895 - config_name: et-de splits: - name: train num_bytes: 827570421 num_examples: 6907155 download_size: 480808673 dataset_size: 827570421 - config_name: et-el splits: - name: train num_bytes: 1479690639 num_examples: 10344538 download_size: 793686483 dataset_size: 1479690639 - config_name: et-en splits: - name: train num_bytes: 1424288445 num_examples: 12486898 download_size: 817432740 dataset_size: 1424288445 - config_name: et-es splits: - name: train num_bytes: 1362680109 num_examples: 11623628 download_size: 793286587 dataset_size: 1362680109 - config_name: et-eu splits: - name: train num_bytes: 45214451 num_examples: 406330 download_size: 26797328 dataset_size: 45214451 - config_name: et-fa splits: - name: train num_bytes: 422089259 num_examples: 3037779 download_size: 228735837 dataset_size: 422089259 - config_name: et-fi splits: - name: train num_bytes: 1110170778 num_examples: 9504879 download_size: 650014718 dataset_size: 1110170778 - config_name: et-fr splits: - name: train num_bytes: 1114911184 num_examples: 9402906 download_size: 647149319 dataset_size: 1114911184 - config_name: et-gl splits: - name: train num_bytes: 5747017 num_examples: 49199 download_size: 3456028 dataset_size: 5747017 - config_name: et-he splits: - name: train num_bytes: 1281186568 num_examples: 9814491 download_size: 694847554 dataset_size: 1281186568 - config_name: et-hi splits: - name: train num_bytes: 7007605 num_examples: 43981 download_size: 3448393 dataset_size: 7007605 - config_name: et-hr splits: - name: train num_bytes: 1137072332 num_examples: 9977630 download_size: 674180740 dataset_size: 1137072332 - config_name: et-hu splits: - name: train num_bytes: 1225453310 num_examples: 10559714 download_size: 725560854 dataset_size: 1225453310 - config_name: et-hy splits: - name: train num_bytes: 21963 num_examples: 149 download_size: 17260 dataset_size: 21963 - config_name: et-id splits: - name: train num_bytes: 501018578 num_examples: 4282230 download_size: 283886176 dataset_size: 501018578 - config_name: et-is splits: - name: train num_bytes: 128275388 num_examples: 1110083 download_size: 75724039 dataset_size: 128275388 - config_name: et-it splits: - name: train num_bytes: 966506113 num_examples: 8146755 download_size: 565966199 dataset_size: 966506113 - config_name: et-ja splits: - name: train num_bytes: 143038594 num_examples: 1176436 download_size: 81982436 dataset_size: 143038594 - config_name: et-ka splits: - name: train num_bytes: 17300813 num_examples: 110021 download_size: 8480614 dataset_size: 17300813 - config_name: et-kk splits: - name: train num_bytes: 126502 num_examples: 1140 download_size: 76326 dataset_size: 126502 - config_name: et-ko splits: - name: train num_bytes: 59663642 num_examples: 492792 download_size: 35451791 dataset_size: 59663642 - config_name: et-lt splits: - name: train num_bytes: 126901863 num_examples: 1136463 download_size: 76823587 dataset_size: 126901863 - config_name: et-lv splits: - name: train num_bytes: 44678131 num_examples: 407588 download_size: 26981034 dataset_size: 44678131 - config_name: et-mk splits: - name: train num_bytes: 319552120 num_examples: 2331823 download_size: 171063902 dataset_size: 319552120 - config_name: et-ml splits: - name: train num_bytes: 33773736 num_examples: 179992 download_size: 15670139 dataset_size: 33773736 - config_name: et-ms splits: - name: train num_bytes: 129364714 num_examples: 1135835 download_size: 73737881 dataset_size: 129364714 - config_name: et-nl splits: - name: train num_bytes: 1210250838 num_examples: 10251088 download_size: 702626738 dataset_size: 1210250838 - config_name: et-no splits: - name: train num_bytes: 445334705 num_examples: 3895270 download_size: 260557025 dataset_size: 445334705 - config_name: et-pl splits: - name: train num_bytes: 1280684329 num_examples: 10903089 download_size: 757805613 dataset_size: 1280684329 - config_name: et-pt splits: - name: train num_bytes: 1217332722 num_examples: 10452309 download_size: 707187030 dataset_size: 1217332722 - config_name: et-ro splits: - name: train num_bytes: 1344532761 num_examples: 11541438 download_size: 783875397 dataset_size: 1344532761 - config_name: et-ru splits: - name: train num_bytes: 920209470 num_examples: 6571496 download_size: 499716413 dataset_size: 920209470 - config_name: et-si splits: - name: train num_bytes: 54413124 num_examples: 331221 download_size: 26587114 dataset_size: 54413124 - config_name: et-sk splits: - name: train num_bytes: 382375735 num_examples: 3309749 download_size: 227783019 dataset_size: 382375735 - config_name: et-sl splits: - name: train num_bytes: 975198798 num_examples: 8677449 download_size: 578467438 dataset_size: 975198798 - config_name: et-sq splits: - name: train num_bytes: 161621933 num_examples: 1394116 download_size: 94899545 dataset_size: 161621933 - config_name: et-sr splits: - name: train num_bytes: 1209801207 num_examples: 10368386 download_size: 711087537 dataset_size: 1209801207 - config_name: et-sv splits: - name: train num_bytes: 861307088 num_examples: 7365540 download_size: 499940673 dataset_size: 861307088 - config_name: et-ta splits: - name: train num_bytes: 2416007 num_examples: 14339 download_size: 1112144 dataset_size: 2416007 - config_name: et-te splits: - name: train num_bytes: 2303022 num_examples: 14442 download_size: 1123437 dataset_size: 2303022 - config_name: et-th splits: - name: train num_bytes: 299145165 num_examples: 1746500 download_size: 145311018 dataset_size: 299145165 - config_name: et-tl splits: - name: train num_bytes: 330894 num_examples: 3089 download_size: 198020 dataset_size: 330894 - config_name: et-tr splits: - name: train num_bytes: 1327335381 num_examples: 11350121 download_size: 769538468 dataset_size: 1327335381 - config_name: et-uk splits: - name: train num_bytes: 52542624 num_examples: 384837 download_size: 28929877 dataset_size: 52542624 - config_name: et-ur splits: - name: train num_bytes: 2735577 num_examples: 19522 download_size: 1510384 dataset_size: 2735577 - config_name: et-vi splits: - name: train num_bytes: 258530352 num_examples: 2048374 download_size: 144458871 dataset_size: 258530352 - config_name: eu-ar splits: - name: train num_bytes: 76878827 num_examples: 578303 download_size: 42521482 dataset_size: 76878827 - config_name: eu-bg splits: - name: train num_bytes: 82797615 num_examples: 605100 download_size: 44721568 dataset_size: 82797615 - config_name: eu-bn splits: - name: train num_bytes: 6687774 num_examples: 42420 download_size: 3242825 dataset_size: 6687774 - config_name: eu-bs splits: - name: train num_bytes: 29512428 num_examples: 260046 download_size: 17703002 dataset_size: 29512428 - config_name: eu-ca splits: - name: train num_bytes: 3466851 num_examples: 29114 download_size: 2035378 dataset_size: 3466851 - config_name: eu-cs splits: - name: train num_bytes: 78086817 num_examples: 686533 download_size: 46731570 dataset_size: 78086817 - config_name: eu-da splits: - name: train num_bytes: 48374689 num_examples: 428242 download_size: 28235592 dataset_size: 48374689 - config_name: eu-de splits: - name: train num_bytes: 61697455 num_examples: 534929 download_size: 35978773 dataset_size: 61697455 - config_name: eu-el splits: - name: train num_bytes: 94941038 num_examples: 673934 download_size: 51057816 dataset_size: 94941038 - config_name: eu-en splits: - name: train num_bytes: 90529067 num_examples: 805780 download_size: 52312189 dataset_size: 90529067 - config_name: eu-es splits: - name: train num_bytes: 89809568 num_examples: 793593 download_size: 52364503 dataset_size: 89809568 - config_name: eu-et splits: - name: train num_bytes: 37732670 num_examples: 406330 download_size: 26797328 dataset_size: 37732670 - config_name: eu-fa splits: - name: train num_bytes: 33560335 num_examples: 245775 download_size: 18294190 dataset_size: 33560335 - config_name: eu-fi splits: - name: train num_bytes: 66433653 num_examples: 581609 download_size: 39155592 dataset_size: 66433653 - config_name: eu-fr splits: - name: train num_bytes: 74231133 num_examples: 636163 download_size: 43328430 dataset_size: 74231133 - config_name: eu-gl splits: - name: train num_bytes: 1513047 num_examples: 12391 download_size: 894771 dataset_size: 1513047 - config_name: eu-he splits: - name: train num_bytes: 73290931 num_examples: 566714 download_size: 39959198 dataset_size: 73290931 - config_name: eu-hi splits: - name: train num_bytes: 1617590 num_examples: 9977 download_size: 796314 dataset_size: 1617590 - config_name: eu-hr splits: - name: train num_bytes: 65046749 num_examples: 579474 download_size: 38799388 dataset_size: 65046749 - config_name: eu-hu splits: - name: train num_bytes: 75954918 num_examples: 663683 download_size: 45277429 dataset_size: 75954918 - config_name: eu-id splits: - name: train num_bytes: 34937246 num_examples: 307850 download_size: 19862779 dataset_size: 34937246 - config_name: eu-is splits: - name: train num_bytes: 5796399 num_examples: 48947 download_size: 3435615 dataset_size: 5796399 - config_name: eu-it splits: - name: train num_bytes: 66332976 num_examples: 568661 download_size: 39045887 dataset_size: 66332976 - config_name: eu-ja splits: - name: train num_bytes: 16374502 num_examples: 139143 download_size: 9460546 dataset_size: 16374502 - config_name: eu-ka splits: - name: train num_bytes: 1522347 num_examples: 9416 download_size: 738495 dataset_size: 1522347 - config_name: eu-ko splits: - name: train num_bytes: 8519715 num_examples: 72165 download_size: 5035001 dataset_size: 8519715 - config_name: eu-lt splits: - name: train num_bytes: 12064105 num_examples: 108116 download_size: 7249924 dataset_size: 12064105 - config_name: eu-lv splits: - name: train num_bytes: 4124882 num_examples: 36813 download_size: 2504925 dataset_size: 4124882 - config_name: eu-mk splits: - name: train num_bytes: 17492794 num_examples: 127072 download_size: 9422558 dataset_size: 17492794 - config_name: eu-ml splits: - name: train num_bytes: 7757339 num_examples: 42720 download_size: 3536579 dataset_size: 7757339 - config_name: eu-ms splits: - name: train num_bytes: 14381735 num_examples: 129201 download_size: 8176456 dataset_size: 14381735 - config_name: eu-nl splits: - name: train num_bytes: 71323302 num_examples: 619884 download_size: 41644849 dataset_size: 71323302 - config_name: eu-no splits: - name: train num_bytes: 38313391 num_examples: 344246 download_size: 22405880 dataset_size: 38313391 - config_name: eu-pl splits: - name: train num_bytes: 73963554 num_examples: 644058 download_size: 44036892 dataset_size: 73963554 - config_name: eu-pt splits: - name: train num_bytes: 73488457 num_examples: 641295 download_size: 42988209 dataset_size: 73488457 - config_name: eu-ro splits: - name: train num_bytes: 82308461 num_examples: 715988 download_size: 48303764 dataset_size: 82308461 - config_name: eu-ru splits: - name: train num_bytes: 61034749 num_examples: 435117 download_size: 33483197 dataset_size: 61034749 - config_name: eu-si splits: - name: train num_bytes: 5612825 num_examples: 34559 download_size: 2768890 dataset_size: 5612825 - config_name: eu-sk splits: - name: train num_bytes: 24486208 num_examples: 215375 download_size: 14796872 dataset_size: 24486208 - config_name: eu-sl splits: - name: train num_bytes: 55101632 num_examples: 495622 download_size: 32969200 dataset_size: 55101632 - config_name: eu-sq splits: - name: train num_bytes: 6695153 num_examples: 57894 download_size: 3939135 dataset_size: 6695153 - config_name: eu-sr splits: - name: train num_bytes: 71309377 num_examples: 615814 download_size: 42056475 dataset_size: 71309377 - config_name: eu-sv splits: - name: train num_bytes: 60282463 num_examples: 528236 download_size: 35055766 dataset_size: 60282463 - config_name: eu-ta splits: - name: train num_bytes: 550584 num_examples: 3353 download_size: 257139 dataset_size: 550584 - config_name: eu-te splits: - name: train num_bytes: 101918 num_examples: 734 download_size: 54838 dataset_size: 101918 - config_name: eu-th splits: - name: train num_bytes: 13267277 num_examples: 80754 download_size: 6556528 dataset_size: 13267277 - config_name: eu-tl splits: - name: train num_bytes: 274996 num_examples: 2597 download_size: 161625 dataset_size: 274996 - config_name: eu-tr splits: - name: train num_bytes: 83598165 num_examples: 722770 download_size: 48860202 dataset_size: 83598165 - config_name: eu-uk splits: - name: train num_bytes: 5470562 num_examples: 37731 download_size: 3032188 dataset_size: 5470562 - config_name: eu-ur splits: - name: train num_bytes: 240549 num_examples: 2010 download_size: 142255 dataset_size: 240549 - config_name: eu-vi splits: - name: train num_bytes: 24896791 num_examples: 201280 download_size: 13893245 dataset_size: 24896791 - config_name: fa-af splits: - name: train num_bytes: 441214 num_examples: 3065 download_size: 251341 dataset_size: 441214 - config_name: fa-ar splits: - name: train num_bytes: 829722018 num_examples: 5493576 download_size: 423414559 dataset_size: 829722018 - config_name: fa-bg splits: - name: train num_bytes: 790775090 num_examples: 4858857 download_size: 396217345 dataset_size: 790775090 - config_name: fa-bn splits: - name: train num_bytes: 42725655 num_examples: 249761 download_size: 19922037 dataset_size: 42725655 - config_name: fa-bs splits: - name: train num_bytes: 316129784 num_examples: 2282321 download_size: 172449573 dataset_size: 316129784 - config_name: fa-ca splits: - name: train num_bytes: 22213142 num_examples: 156841 download_size: 11914008 dataset_size: 22213142 - config_name: fa-cs splits: - name: train num_bytes: 734097243 num_examples: 5280170 download_size: 400473315 dataset_size: 734097243 - config_name: fa-da splits: - name: train num_bytes: 520103141 num_examples: 3723055 download_size: 276464918 dataset_size: 520103141 - config_name: fa-de splits: - name: train num_bytes: 527197307 num_examples: 3672356 download_size: 281666934 dataset_size: 527197307 - config_name: fa-el splits: - name: train num_bytes: 861395572 num_examples: 5135640 download_size: 432199144 dataset_size: 861395572 - config_name: fa-en splits: - name: train num_bytes: 839267322 num_examples: 6198109 download_size: 444546578 dataset_size: 839267322 - config_name: fa-es splits: - name: train num_bytes: 780982364 num_examples: 5578242 download_size: 418153498 dataset_size: 780982364 - config_name: fa-et splits: - name: train num_bytes: 365149971 num_examples: 3037779 download_size: 228735837 dataset_size: 365149971 - config_name: fa-eu splits: - name: train num_bytes: 29142012 num_examples: 245775 download_size: 18294190 dataset_size: 29142012 - config_name: fa-fi splits: - name: train num_bytes: 630519583 num_examples: 4438697 download_size: 338017500 dataset_size: 630519583 - config_name: fa-fr splits: - name: train num_bytes: 628065541 num_examples: 4391138 download_size: 335117913 dataset_size: 628065541 - config_name: fa-gl splits: - name: train num_bytes: 4977428 num_examples: 34289 download_size: 2727894 dataset_size: 4977428 - config_name: fa-he splits: - name: train num_bytes: 759729633 num_examples: 4944773 download_size: 383412688 dataset_size: 759729633 - config_name: fa-hi splits: - name: train num_bytes: 8412178 num_examples: 48917 download_size: 3916312 dataset_size: 8412178 - config_name: fa-hr splits: - name: train num_bytes: 655665827 num_examples: 4763534 download_size: 356358920 dataset_size: 655665827 - config_name: fa-hu splits: - name: train num_bytes: 725429558 num_examples: 5201071 download_size: 394375094 dataset_size: 725429558 - config_name: fa-id splits: - name: train num_bytes: 418200692 num_examples: 3014021 download_size: 217778258 dataset_size: 418200692 - config_name: fa-is splits: - name: train num_bytes: 40825950 num_examples: 282354 download_size: 22308706 dataset_size: 40825950 - config_name: fa-it splits: - name: train num_bytes: 594692793 num_examples: 4172470 download_size: 319966892 dataset_size: 594692793 - config_name: fa-ja splits: - name: train num_bytes: 138339335 num_examples: 1003540 download_size: 72642936 dataset_size: 138339335 - config_name: fa-ka splits: - name: train num_bytes: 16916468 num_examples: 96319 download_size: 7817398 dataset_size: 16916468 - config_name: fa-kk splits: - name: train num_bytes: 145968 num_examples: 1010 download_size: 80333 dataset_size: 145968 - config_name: fa-ko splits: - name: train num_bytes: 77184218 num_examples: 559268 download_size: 41696782 dataset_size: 77184218 - config_name: fa-lt splits: - name: train num_bytes: 83557934 num_examples: 615776 download_size: 46404602 dataset_size: 83557934 - config_name: fa-lv splits: - name: train num_bytes: 30704766 num_examples: 228404 download_size: 17073357 dataset_size: 30704766 - config_name: fa-mk splits: - name: train num_bytes: 182133904 num_examples: 1130441 download_size: 91441894 dataset_size: 182133904 - config_name: fa-ml splits: - name: train num_bytes: 33886591 num_examples: 171298 download_size: 15042905 dataset_size: 33886591 - config_name: fa-ms splits: - name: train num_bytes: 136176676 num_examples: 1003616 download_size: 71398776 dataset_size: 136176676 - config_name: fa-nl splits: - name: train num_bytes: 688115102 num_examples: 4803759 download_size: 366568756 dataset_size: 688115102 - config_name: fa-no splits: - name: train num_bytes: 331811948 num_examples: 2389634 download_size: 177331980 dataset_size: 331811948 - config_name: fa-pl splits: - name: train num_bytes: 714495457 num_examples: 5037487 download_size: 387657665 dataset_size: 714495457 - config_name: fa-pt splits: - name: train num_bytes: 692321360 num_examples: 4927719 download_size: 370190228 dataset_size: 692321360 - config_name: fa-ro splits: - name: train num_bytes: 787583777 num_examples: 5646113 download_size: 422300987 dataset_size: 787583777 - config_name: fa-ru splits: - name: train num_bytes: 590935582 num_examples: 3605885 download_size: 299648868 dataset_size: 590935582 - config_name: fa-si splits: - name: train num_bytes: 51605023 num_examples: 292777 download_size: 23948500 dataset_size: 51605023 - config_name: fa-sk splits: - name: train num_bytes: 209629756 num_examples: 1518563 download_size: 115176817 dataset_size: 209629756 - config_name: fa-sl splits: - name: train num_bytes: 541297797 num_examples: 3932093 download_size: 293957151 dataset_size: 541297797 - config_name: fa-sq splits: - name: train num_bytes: 112670101 num_examples: 811976 download_size: 60925128 dataset_size: 112670101 - config_name: fa-sr splits: - name: train num_bytes: 655341255 num_examples: 4628679 download_size: 352768566 dataset_size: 655341255 - config_name: fa-sv splits: - name: train num_bytes: 535000989 num_examples: 3784463 download_size: 283997947 dataset_size: 535000989 - config_name: fa-ta splits: - name: train num_bytes: 2728002 num_examples: 14703 download_size: 1208945 dataset_size: 2728002 - config_name: fa-te splits: - name: train num_bytes: 1776988 num_examples: 10112 download_size: 829989 dataset_size: 1776988 - config_name: fa-th splits: - name: train num_bytes: 204207449 num_examples: 1132448 download_size: 94563745 dataset_size: 204207449 - config_name: fa-tl splits: - name: train num_bytes: 955565 num_examples: 7018 download_size: 526730 dataset_size: 955565 - config_name: fa-tr splits: - name: train num_bytes: 777929872 num_examples: 5556914 download_size: 415510437 dataset_size: 777929872 - config_name: fa-uk splits: - name: train num_bytes: 37889986 num_examples: 238453 download_size: 19472177 dataset_size: 37889986 - config_name: fa-ur splits: - name: train num_bytes: 2982033 num_examples: 19034 download_size: 1526952 dataset_size: 2982033 - config_name: fa-vi splits: - name: train num_bytes: 225790751 num_examples: 1514036 download_size: 116902627 dataset_size: 225790751 - config_name: fi-af splits: - name: train num_bytes: 1189954 num_examples: 10250 download_size: 717619 dataset_size: 1189954 - config_name: fi-ar splits: - name: train num_bytes: 2403140444 num_examples: 17120182 download_size: 1307692614 dataset_size: 2403140444 - config_name: fi-bg splits: - name: train num_bytes: 2986401978 num_examples: 20723534 download_size: 1583985961 dataset_size: 2986401978 - config_name: fi-bn splits: - name: train num_bytes: 44551341 num_examples: 279351 download_size: 21742104 dataset_size: 44551341 - config_name: fi-bs splits: - name: train num_bytes: 931756182 num_examples: 7924786 download_size: 552621966 dataset_size: 931756182 - config_name: fi-ca splits: - name: train num_bytes: 38747384 num_examples: 323534 download_size: 22555790 dataset_size: 38747384 - config_name: fi-cs splits: - name: train num_bytes: 2581423264 num_examples: 21447959 download_size: 1522761753 dataset_size: 2581423264 - config_name: fi-da splits: - name: train num_bytes: 1435435794 num_examples: 12434485 download_size: 822270589 dataset_size: 1435435794 - config_name: fi-de splits: - name: train num_bytes: 1619305587 num_examples: 13212027 download_size: 928756254 dataset_size: 1619305587 - config_name: fi-el splits: - name: train num_bytes: 3058356549 num_examples: 20522413 download_size: 1622412788 dataset_size: 3058356549 - config_name: fi-en splits: - name: train num_bytes: 3281488758 num_examples: 27281566 download_size: 1875007453 dataset_size: 3281488758 - config_name: fi-es splits: - name: train num_bytes: 2992243747 num_examples: 24504287 download_size: 1727509864 dataset_size: 2992243747 - config_name: fi-et splits: - name: train num_bytes: 928354157 num_examples: 9504879 download_size: 650014718 dataset_size: 928354157 - config_name: fi-eu splits: - name: train num_bytes: 55744616 num_examples: 581609 download_size: 39155592 dataset_size: 55744616 - config_name: fi-fa splits: - name: train num_bytes: 546302288 num_examples: 4438697 download_size: 338017500 dataset_size: 546302288 - config_name: fi-fr splits: - name: train num_bytes: 2354314648 num_examples: 19251543 download_size: 1349346757 dataset_size: 2354314648 - config_name: fi-gl splits: - name: train num_bytes: 6685192 num_examples: 56397 download_size: 4012657 dataset_size: 6685192 - config_name: fi-he splits: - name: train num_bytes: 2424689154 num_examples: 17820487 download_size: 1300474929 dataset_size: 2424689154 - config_name: fi-hi splits: - name: train num_bytes: 9051984 num_examples: 55597 download_size: 4418448 dataset_size: 9051984 - config_name: fi-hr splits: - name: train num_bytes: 2204125772 num_examples: 18675631 download_size: 1296657586 dataset_size: 2204125772 - config_name: fi-hu splits: - name: train num_bytes: 2532136957 num_examples: 20920774 download_size: 1486286684 dataset_size: 2532136957 - config_name: fi-hy splits: - name: train num_bytes: 159759 num_examples: 869 download_size: 96981 dataset_size: 159759 - config_name: fi-id splits: - name: train num_bytes: 700816481 num_examples: 5806363 download_size: 393605935 dataset_size: 700816481 - config_name: fi-is splits: - name: train num_bytes: 160767184 num_examples: 1387160 download_size: 94144541 dataset_size: 160767184 - config_name: fi-it splits: - name: train num_bytes: 2114857449 num_examples: 17183232 download_size: 1225916500 dataset_size: 2114857449 - config_name: fi-ja splits: - name: train num_bytes: 198778616 num_examples: 1599718 download_size: 112414550 dataset_size: 198778616 - config_name: fi-ka splits: - name: train num_bytes: 23710150 num_examples: 148416 download_size: 11445754 dataset_size: 23710150 - config_name: fi-kk splits: - name: train num_bytes: 424865 num_examples: 3406 download_size: 236372 dataset_size: 424865 - config_name: fi-ko splits: - name: train num_bytes: 106913692 num_examples: 859310 download_size: 62736603 dataset_size: 106913692 - config_name: fi-lt splits: - name: train num_bytes: 133587965 num_examples: 1181965 download_size: 80442219 dataset_size: 133587965 - config_name: fi-lv splits: - name: train num_bytes: 52217413 num_examples: 462498 download_size: 31261563 dataset_size: 52217413 - config_name: fi-mk splits: - name: train num_bytes: 368107065 num_examples: 2624789 download_size: 195591258 dataset_size: 368107065 - config_name: fi-ml splits: - name: train num_bytes: 44141339 num_examples: 232475 download_size: 20214579 dataset_size: 44141339 - config_name: fi-ms splits: - name: train num_bytes: 148663375 num_examples: 1276963 download_size: 84059521 dataset_size: 148663375 - config_name: fi-nl splits: - name: train num_bytes: 2542834578 num_examples: 21067707 download_size: 1457985861 dataset_size: 2542834578 - config_name: fi-no splits: - name: train num_bytes: 876009036 num_examples: 7741367 download_size: 503210712 dataset_size: 876009036 - config_name: fi-pl splits: - name: train num_bytes: 2631187698 num_examples: 21727045 download_size: 1540894499 dataset_size: 2631187698 - config_name: fi-pt splits: - name: train num_bytes: 2370161017 num_examples: 19740360 download_size: 1364103529 dataset_size: 2370161017 - config_name: fi-ro splits: - name: train num_bytes: 2853204810 num_examples: 23464056 download_size: 1650735423 dataset_size: 2853204810 - config_name: fi-ru splits: - name: train num_bytes: 1825427582 num_examples: 12321319 download_size: 981648021 dataset_size: 1825427582 - config_name: fi-si splits: - name: train num_bytes: 66518418 num_examples: 391991 download_size: 32070537 dataset_size: 66518418 - config_name: fi-sk splits: - name: train num_bytes: 608740847 num_examples: 5114203 download_size: 361318790 dataset_size: 608740847 - config_name: fi-sl splits: - name: train num_bytes: 1565930924 num_examples: 13576268 download_size: 922410965 dataset_size: 1565930924 - config_name: fi-sq splits: - name: train num_bytes: 175036983 num_examples: 1480967 download_size: 102174254 dataset_size: 175036983 - config_name: fi-sr splits: - name: train num_bytes: 2425281902 num_examples: 20217691 download_size: 1418788373 dataset_size: 2425281902 - config_name: fi-sv splits: - name: train num_bytes: 1672821349 num_examples: 14356833 download_size: 955354120 dataset_size: 1672821349 - config_name: fi-ta splits: - name: train num_bytes: 3686065 num_examples: 20077 download_size: 1674299 dataset_size: 3686065 - config_name: fi-te splits: - name: train num_bytes: 2903233 num_examples: 17129 download_size: 1410151 dataset_size: 2903233 - config_name: fi-th splits: - name: train num_bytes: 405574379 num_examples: 2288653 download_size: 194766529 dataset_size: 405574379 - config_name: fi-tl splits: - name: train num_bytes: 720568 num_examples: 5912 download_size: 429850 dataset_size: 720568 - config_name: fi-tr splits: - name: train num_bytes: 2738401163 num_examples: 22492040 download_size: 1574691928 dataset_size: 2738401163 - config_name: fi-uk splits: - name: train num_bytes: 75185052 num_examples: 527306 download_size: 41030733 dataset_size: 75185052 - config_name: fi-ur splits: - name: train num_bytes: 2810381 num_examples: 19434 download_size: 1534592 dataset_size: 2810381 - config_name: fi-vi splits: - name: train num_bytes: 327130397 num_examples: 2517078 download_size: 181127385 dataset_size: 327130397 - config_name: fr-af splits: - name: train num_bytes: 2228040 num_examples: 18563 download_size: 1323878 dataset_size: 2228040 - config_name: fr-ar splits: - name: train num_bytes: 2851130938 num_examples: 20181740 download_size: 1539009600 dataset_size: 2851130938 - config_name: fr-bg splits: - name: train num_bytes: 3685021850 num_examples: 25448304 download_size: 1938105262 dataset_size: 3685021850 - config_name: fr-bn splits: - name: train num_bytes: 44187141 num_examples: 276210 download_size: 21530347 dataset_size: 44187141 - config_name: fr-bs splits: - name: train num_bytes: 1088530465 num_examples: 9151041 download_size: 637723975 dataset_size: 1088530465 - config_name: fr-ca splits: - name: train num_bytes: 47423771 num_examples: 392159 download_size: 27287084 dataset_size: 47423771 - config_name: fr-cs splits: - name: train num_bytes: 2809433818 num_examples: 27447126 download_size: 1943360811 dataset_size: 2809433818 - config_name: fr-da splits: - name: train num_bytes: 1311015663 num_examples: 10961829 download_size: 747814349 dataset_size: 1311015663 - config_name: fr-de splits: - name: train num_bytes: 1670916661 num_examples: 15994801 download_size: 1124495566 dataset_size: 1670916661 - config_name: fr-el splits: - name: train num_bytes: 3555042834 num_examples: 27288201 download_size: 2150547430 dataset_size: 3555042834 - config_name: fr-en splits: - name: train num_bytes: 5008106734 num_examples: 41763488 download_size: 2819512415 dataset_size: 5008106734 - config_name: fr-es splits: - name: train num_bytes: 3730212988 num_examples: 36142601 download_size: 2523824648 dataset_size: 3730212988 - config_name: fr-et splits: - name: train num_bytes: 935249797 num_examples: 9402906 download_size: 647149319 dataset_size: 935249797 - config_name: fr-eu splits: - name: train num_bytes: 62514817 num_examples: 636163 download_size: 43328430 dataset_size: 62514817 - config_name: fr-fa splits: - name: train num_bytes: 544898161 num_examples: 4391138 download_size: 335117913 dataset_size: 544898161 - config_name: fr-fi splits: - name: train num_bytes: 1980233347 num_examples: 19251543 download_size: 1349346757 dataset_size: 1980233347 - config_name: fr-gl splits: - name: train num_bytes: 15922247 num_examples: 131961 download_size: 9381543 dataset_size: 15922247 - config_name: fr-he splits: - name: train num_bytes: 2910110691 num_examples: 21218393 download_size: 1546196884 dataset_size: 2910110691 - config_name: fr-hi splits: - name: train num_bytes: 9087824 num_examples: 54720 download_size: 4402853 dataset_size: 9087824 - config_name: fr-hr splits: - name: train num_bytes: 2728962199 num_examples: 22738110 download_size: 1584761881 dataset_size: 2728962199 - config_name: fr-hu splits: - name: train num_bytes: 3473508659 num_examples: 28319528 download_size: 2009978734 dataset_size: 3473508659 - config_name: fr-hy splits: - name: train num_bytes: 132430 num_examples: 668 download_size: 81770 dataset_size: 132430 - config_name: fr-id splits: - name: train num_bytes: 751820179 num_examples: 6195915 download_size: 417900010 dataset_size: 751820179 - config_name: fr-is splits: - name: train num_bytes: 155872712 num_examples: 1330231 download_size: 90643773 dataset_size: 155872712 - config_name: fr-it splits: - name: train num_bytes: 2878720322 num_examples: 23099708 download_size: 1647897770 dataset_size: 2878720322 - config_name: fr-ja splits: - name: train num_bytes: 202987393 num_examples: 1601666 download_size: 114003817 dataset_size: 202987393 - config_name: fr-ka splits: - name: train num_bytes: 22617114 num_examples: 139626 download_size: 10912737 dataset_size: 22617114 - config_name: fr-kk splits: - name: train num_bytes: 153751 num_examples: 1336 download_size: 90158 dataset_size: 153751 - config_name: fr-ko splits: - name: train num_bytes: 125012256 num_examples: 979642 download_size: 72608896 dataset_size: 125012256 - config_name: fr-lt splits: - name: train num_bytes: 135371739 num_examples: 1183584 download_size: 80724884 dataset_size: 135371739 - config_name: fr-lv splits: - name: train num_bytes: 49000094 num_examples: 428815 download_size: 29140668 dataset_size: 49000094 - config_name: fr-mk splits: - name: train num_bytes: 373046032 num_examples: 2652402 download_size: 196917290 dataset_size: 373046032 - config_name: fr-ml splits: - name: train num_bytes: 50578439 num_examples: 265993 download_size: 23079546 dataset_size: 50578439 - config_name: fr-ms splits: - name: train num_bytes: 166280312 num_examples: 1414481 download_size: 93055506 dataset_size: 166280312 - config_name: fr-nl splits: - name: train num_bytes: 3118986373 num_examples: 25330038 download_size: 1769479686 dataset_size: 3118986373 - config_name: fr-no splits: - name: train num_bytes: 747933681 num_examples: 6324199 download_size: 428745376 dataset_size: 747933681 - config_name: fr-pl splits: - name: train num_bytes: 3514179003 num_examples: 28457951 download_size: 2031672762 dataset_size: 3514179003 - config_name: fr-pt splits: - name: train num_bytes: 2852278612 num_examples: 23387253 download_size: 1622783241 dataset_size: 2852278612 - config_name: fr-ro splits: - name: train num_bytes: 3727207847 num_examples: 30446215 download_size: 2128744727 dataset_size: 3727207847 - config_name: fr-ru splits: - name: train num_bytes: 2457580238 num_examples: 16752259 download_size: 1307520392 dataset_size: 2457580238 - config_name: fr-si splits: - name: train num_bytes: 66445847 num_examples: 393481 download_size: 32060790 dataset_size: 66445847 - config_name: fr-sk splits: - name: train num_bytes: 712758434 num_examples: 5922885 download_size: 418014848 dataset_size: 712758434 - config_name: fr-sl splits: - name: train num_bytes: 1711482681 num_examples: 14517180 download_size: 997076832 dataset_size: 1711482681 - config_name: fr-sq splits: - name: train num_bytes: 175302865 num_examples: 1474957 download_size: 101403285 dataset_size: 175302865 - config_name: fr-sr splits: - name: train num_bytes: 3310744904 num_examples: 27256897 download_size: 1913557283 dataset_size: 3310744904 - config_name: fr-sv splits: - name: train num_bytes: 1543030020 num_examples: 12708915 download_size: 877896446 dataset_size: 1543030020 - config_name: fr-ta splits: - name: train num_bytes: 3225678 num_examples: 17791 download_size: 1476658 dataset_size: 3225678 - config_name: fr-te splits: - name: train num_bytes: 1987575 num_examples: 11929 download_size: 960722 dataset_size: 1987575 - config_name: fr-th splits: - name: train num_bytes: 406222789 num_examples: 2318980 download_size: 194945994 dataset_size: 406222789 - config_name: fr-tl splits: - name: train num_bytes: 1596520 num_examples: 13176 download_size: 880547 dataset_size: 1596520 - config_name: fr-tr splits: - name: train num_bytes: 3574951864 num_examples: 29089409 download_size: 2031550045 dataset_size: 3574951864 - config_name: fr-uk splits: - name: train num_bytes: 88514049 num_examples: 632491 download_size: 47765131 dataset_size: 88514049 - config_name: fr-ur splits: - name: train num_bytes: 3017683 num_examples: 21346 download_size: 1636608 dataset_size: 3017683 - config_name: fr-vi splits: - name: train num_bytes: 358238205 num_examples: 2752315 download_size: 196637011 dataset_size: 358238205 - config_name: gl-ar splits: - name: train num_bytes: 9203553 num_examples: 77718 download_size: 5995937 dataset_size: 9203553 - config_name: gl-bg splits: - name: train num_bytes: 9782016 num_examples: 69047 download_size: 5322244 dataset_size: 9782016 - config_name: gl-bn splits: - name: train num_bytes: 1153952 num_examples: 7062 download_size: 568647 dataset_size: 1153952 - config_name: gl-bs splits: - name: train num_bytes: 4593381 num_examples: 38761 download_size: 2797117 dataset_size: 4593381 - config_name: gl-ca splits: - name: train num_bytes: 1960267 num_examples: 16379 download_size: 1157012 dataset_size: 1960267 - config_name: gl-cs splits: - name: train num_bytes: 11309443 num_examples: 96875 download_size: 6858740 dataset_size: 11309443 - config_name: gl-da splits: - name: train num_bytes: 5081892 num_examples: 42876 download_size: 3005975 dataset_size: 5081892 - config_name: gl-de splits: - name: train num_bytes: 9238905 num_examples: 77160 download_size: 5447997 dataset_size: 9238905 - config_name: gl-el splits: - name: train num_bytes: 15637046 num_examples: 105495 download_size: 8532242 dataset_size: 15637046 - config_name: gl-en splits: - name: train num_bytes: 24667369 num_examples: 203550 download_size: 14416268 dataset_size: 24667369 - config_name: gl-es splits: - name: train num_bytes: 22061356 num_examples: 216940 download_size: 15383010 dataset_size: 22061356 - config_name: gl-et splits: - name: train num_bytes: 4811548 num_examples: 49199 download_size: 3456028 dataset_size: 4811548 - config_name: gl-eu splits: - name: train num_bytes: 1273488 num_examples: 12391 download_size: 894771 dataset_size: 1273488 - config_name: gl-fa splits: - name: train num_bytes: 4325998 num_examples: 34289 download_size: 2727894 dataset_size: 4325998 - config_name: gl-fi splits: - name: train num_bytes: 5630407 num_examples: 56397 download_size: 4012657 dataset_size: 5630407 - config_name: gl-fr splits: - name: train num_bytes: 13427141 num_examples: 131961 download_size: 9381543 dataset_size: 13427141 - config_name: gl-he splits: - name: train num_bytes: 6984479 num_examples: 52252 download_size: 3867896 dataset_size: 6984479 - config_name: gl-hi splits: - name: train num_bytes: 92539 num_examples: 547 download_size: 49259 dataset_size: 92539 - config_name: gl-hr splits: - name: train num_bytes: 9085106 num_examples: 78281 download_size: 5512156 dataset_size: 9085106 - config_name: gl-hu splits: - name: train num_bytes: 9703945 num_examples: 82630 download_size: 5852939 dataset_size: 9703945 - config_name: gl-id splits: - name: train num_bytes: 3662424 num_examples: 30746 download_size: 2100503 dataset_size: 3662424 - config_name: gl-is splits: - name: train num_bytes: 465844 num_examples: 4048 download_size: 287673 dataset_size: 465844 - config_name: gl-it splits: - name: train num_bytes: 13927256 num_examples: 114094 download_size: 8290641 dataset_size: 13927256 - config_name: gl-ja splits: - name: train num_bytes: 2699867 num_examples: 21395 download_size: 1564764 dataset_size: 2699867 - config_name: gl-ka splits: - name: train num_bytes: 299213 num_examples: 1814 download_size: 152611 dataset_size: 299213 - config_name: gl-ko splits: - name: train num_bytes: 1136223 num_examples: 8981 download_size: 711346 dataset_size: 1136223 - config_name: gl-lt splits: - name: train num_bytes: 1616588 num_examples: 14151 download_size: 993170 dataset_size: 1616588 - config_name: gl-lv splits: - name: train num_bytes: 562418 num_examples: 5191 download_size: 352410 dataset_size: 562418 - config_name: gl-mk splits: - name: train num_bytes: 3874349 num_examples: 27702 download_size: 2087533 dataset_size: 3874349 - config_name: gl-ml splits: - name: train num_bytes: 890028 num_examples: 5223 download_size: 414591 dataset_size: 890028 - config_name: gl-ms splits: - name: train num_bytes: 1385473 num_examples: 12039 download_size: 815693 dataset_size: 1385473 - config_name: gl-nl splits: - name: train num_bytes: 8264867 num_examples: 69195 download_size: 4903574 dataset_size: 8264867 - config_name: gl-no splits: - name: train num_bytes: 4044384 num_examples: 35138 download_size: 2396760 dataset_size: 4044384 - config_name: gl-pl splits: - name: train num_bytes: 11147504 num_examples: 92916 download_size: 6720319 dataset_size: 11147504 - config_name: gl-pt splits: - name: train num_bytes: 10215264 num_examples: 86232 download_size: 6061271 dataset_size: 10215264 - config_name: gl-ro splits: - name: train num_bytes: 11642857 num_examples: 99549 download_size: 6942287 dataset_size: 11642857 - config_name: gl-ru splits: - name: train num_bytes: 9703710 num_examples: 66369 download_size: 5336708 dataset_size: 9703710 - config_name: gl-si splits: - name: train num_bytes: 662297 num_examples: 4162 download_size: 338642 dataset_size: 662297 - config_name: gl-sk splits: - name: train num_bytes: 2582050 num_examples: 22737 download_size: 1591134 dataset_size: 2582050 - config_name: gl-sl splits: - name: train num_bytes: 6629311 num_examples: 57054 download_size: 4036786 dataset_size: 6629311 - config_name: gl-sq splits: - name: train num_bytes: 1431243 num_examples: 12458 download_size: 866314 dataset_size: 1431243 - config_name: gl-sr splits: - name: train num_bytes: 12473896 num_examples: 104409 download_size: 7458752 dataset_size: 12473896 - config_name: gl-sv splits: - name: train num_bytes: 5435889 num_examples: 46351 download_size: 3235424 dataset_size: 5435889 - config_name: gl-th splits: - name: train num_bytes: 3720097 num_examples: 21253 download_size: 1826270 dataset_size: 3720097 - config_name: gl-tr splits: - name: train num_bytes: 15482246 num_examples: 125932 download_size: 9209958 dataset_size: 15482246 - config_name: gl-uk splits: - name: train num_bytes: 1046239 num_examples: 7572 download_size: 587049 dataset_size: 1046239 - config_name: gl-ur splits: - name: train num_bytes: 170522 num_examples: 1291 download_size: 98971 dataset_size: 170522 - config_name: gl-vi splits: - name: train num_bytes: 3463570 num_examples: 26267 download_size: 1955774 dataset_size: 3463570 - config_name: he-af splits: - name: train num_bytes: 1852213 num_examples: 14534 download_size: 1036476 dataset_size: 1852213 - config_name: he-ar splits: - name: train num_bytes: 3152249989 num_examples: 20577019 download_size: 1606988089 dataset_size: 3152249989 - config_name: he-bg splits: - name: train num_bytes: 3603224502 num_examples: 22887401 download_size: 1795565813 dataset_size: 3603224502 - config_name: he-bn splits: - name: train num_bytes: 51619098 num_examples: 302547 download_size: 24060846 dataset_size: 51619098 - config_name: he-bs splits: - name: train num_bytes: 1128616959 num_examples: 8603247 download_size: 617122802 dataset_size: 1128616959 - config_name: he-ca splits: - name: train num_bytes: 48887291 num_examples: 369068 download_size: 26369119 dataset_size: 48887291 - config_name: he-cs splits: - name: train num_bytes: 3280075533 num_examples: 24503253 download_size: 1786583742 dataset_size: 3280075533 - config_name: he-da splits: - name: train num_bytes: 1460114748 num_examples: 10992441 download_size: 779437750 dataset_size: 1460114748 - config_name: he-de splits: - name: train num_bytes: 1745743632 num_examples: 12751222 download_size: 932940603 dataset_size: 1745743632 - config_name: he-el splits: - name: train num_bytes: 3566328893 num_examples: 22042563 download_size: 1782039798 dataset_size: 3566328893 - config_name: he-en splits: - name: train num_bytes: 3937594473 num_examples: 29887386 download_size: 2079882854 dataset_size: 3937594473 - config_name: he-es splits: - name: train num_bytes: 3132575433 num_examples: 27059535 download_size: 1958564684 dataset_size: 3132575433 - config_name: he-et splits: - name: train num_bytes: 1090677466 num_examples: 9814491 download_size: 694847554 dataset_size: 1090677466 - config_name: he-eu splits: - name: train num_bytes: 62732680 num_examples: 566714 download_size: 39959198 dataset_size: 62732680 - config_name: he-fa splits: - name: train num_bytes: 664735929 num_examples: 4944773 download_size: 383412688 dataset_size: 664735929 - config_name: he-fi splits: - name: train num_bytes: 2076099830 num_examples: 17820487 download_size: 1300474929 dataset_size: 2076099830 - config_name: he-fr splits: - name: train num_bytes: 2493188517 num_examples: 21218393 download_size: 1546196884 dataset_size: 2493188517 - config_name: he-gl splits: - name: train num_bytes: 5991806 num_examples: 52252 download_size: 3867896 dataset_size: 5991806 - config_name: he-hi splits: - name: train num_bytes: 9988314 num_examples: 57847 download_size: 4624074 dataset_size: 9988314 - config_name: he-hr splits: - name: train num_bytes: 2704970069 num_examples: 20511439 download_size: 1468277577 dataset_size: 2704970069 - config_name: he-hu splits: - name: train num_bytes: 3219748284 num_examples: 23959866 download_size: 1746287843 dataset_size: 3219748284 - config_name: he-hy splits: - name: train num_bytes: 226069 num_examples: 1243 download_size: 127040 dataset_size: 226069 - config_name: he-id splits: - name: train num_bytes: 844717659 num_examples: 6362286 download_size: 440779497 dataset_size: 844717659 - config_name: he-is splits: - name: train num_bytes: 163596049 num_examples: 1256013 download_size: 89215698 dataset_size: 163596049 - config_name: he-it splits: - name: train num_bytes: 2719208860 num_examples: 19908473 download_size: 1461101288 dataset_size: 2719208860 - config_name: he-ja splits: - name: train num_bytes: 232709314 num_examples: 1683291 download_size: 122554723 dataset_size: 232709314 - config_name: he-ka splits: - name: train num_bytes: 25817437 num_examples: 149062 download_size: 11892349 dataset_size: 25817437 - config_name: he-kk splits: - name: train num_bytes: 313740 num_examples: 2376 download_size: 166445 dataset_size: 313740 - config_name: he-ko splits: - name: train num_bytes: 152831015 num_examples: 1094716 download_size: 83198339 dataset_size: 152831015 - config_name: he-lt splits: - name: train num_bytes: 153498087 num_examples: 1220910 download_size: 85483389 dataset_size: 153498087 - config_name: he-lv splits: - name: train num_bytes: 58383023 num_examples: 461814 download_size: 32413400 dataset_size: 58383023 - config_name: he-mk splits: - name: train num_bytes: 421964200 num_examples: 2766101 download_size: 211090133 dataset_size: 421964200 - config_name: he-ml splits: - name: train num_bytes: 50446385 num_examples: 250067 download_size: 22218604 dataset_size: 50446385 - config_name: he-ms splits: - name: train num_bytes: 186774584 num_examples: 1455610 download_size: 98066940 dataset_size: 186774584 - config_name: he-nl splits: - name: train num_bytes: 3038751419 num_examples: 22186572 download_size: 1615591627 dataset_size: 3038751419 - config_name: he-no splits: - name: train num_bytes: 819402718 num_examples: 6213374 download_size: 439100512 dataset_size: 819402718 - config_name: he-pl splits: - name: train num_bytes: 3374035814 num_examples: 24813352 download_size: 1826488724 dataset_size: 3374035814 - config_name: he-pt splits: - name: train num_bytes: 2842381792 num_examples: 21226267 download_size: 1516205541 dataset_size: 2842381792 - config_name: he-ro splits: - name: train num_bytes: 3553247731 num_examples: 26370152 download_size: 1901646704 dataset_size: 3553247731 - config_name: he-ru splits: - name: train num_bytes: 2360433687 num_examples: 14873399 download_size: 1192068865 dataset_size: 2360433687 - config_name: he-si splits: - name: train num_bytes: 77642623 num_examples: 435865 download_size: 35869186 dataset_size: 77642623 - config_name: he-sk splits: - name: train num_bytes: 737560614 num_examples: 5616165 download_size: 404611922 dataset_size: 737560614 - config_name: he-sl splits: - name: train num_bytes: 1878776461 num_examples: 14422346 download_size: 1021548451 dataset_size: 1878776461 - config_name: he-sq splits: - name: train num_bytes: 202278128 num_examples: 1557714 download_size: 109561454 dataset_size: 202278128 - config_name: he-sr splits: - name: train num_bytes: 2901939549 num_examples: 21620148 download_size: 1567275465 dataset_size: 2901939549 - config_name: he-sv splits: - name: train num_bytes: 1661562661 num_examples: 12304434 download_size: 884094810 dataset_size: 1661562661 - config_name: he-ta splits: - name: train num_bytes: 4547167 num_examples: 23994 download_size: 1983254 dataset_size: 4547167 - config_name: he-te splits: - name: train num_bytes: 3254416 num_examples: 18651 download_size: 1502025 dataset_size: 3254416 - config_name: he-th splits: - name: train num_bytes: 502279597 num_examples: 2665997 download_size: 230468545 dataset_size: 502279597 - config_name: he-tl splits: - name: train num_bytes: 930470 num_examples: 6583 download_size: 522892 dataset_size: 930470 - config_name: he-tr splits: - name: train num_bytes: 3394858605 num_examples: 25179227 download_size: 1808232687 dataset_size: 3394858605 - config_name: he-uk splits: - name: train num_bytes: 90878453 num_examples: 593074 download_size: 46534583 dataset_size: 90878453 - config_name: he-ur splits: - name: train num_bytes: 3206973 num_examples: 20559 download_size: 1646292 dataset_size: 3206973 - config_name: he-vi splits: - name: train num_bytes: 399289605 num_examples: 2813730 download_size: 206730057 dataset_size: 399289605 - config_name: hi-af splits: - name: train num_bytes: 162416 num_examples: 1146 download_size: 84533 dataset_size: 162416 - config_name: hi-ar splits: - name: train num_bytes: 12445990 num_examples: 70935 download_size: 5829567 dataset_size: 12445990 - config_name: hi-bg splits: - name: train num_bytes: 10832360 num_examples: 58902 download_size: 4997481 dataset_size: 10832360 - config_name: hi-bn splits: - name: train num_bytes: 2517417 num_examples: 12713 download_size: 1086684 dataset_size: 2517417 - config_name: hi-bs splits: - name: train num_bytes: 4365187 num_examples: 27317 download_size: 2177685 dataset_size: 4365187 - config_name: hi-ca splits: - name: train num_bytes: 834266 num_examples: 5430 download_size: 398082 dataset_size: 834266 - config_name: hi-cs splits: - name: train num_bytes: 9679955 num_examples: 61895 download_size: 4770873 dataset_size: 9679955 - config_name: hi-da splits: - name: train num_bytes: 8250390 num_examples: 51829 download_size: 3984917 dataset_size: 8250390 - config_name: hi-de splits: - name: train num_bytes: 9081927 num_examples: 56047 download_size: 4407106 dataset_size: 9081927 - config_name: hi-el splits: - name: train num_bytes: 11434968 num_examples: 62014 download_size: 5291694 dataset_size: 11434968 - config_name: hi-en splits: - name: train num_bytes: 13845344 num_examples: 93016 download_size: 6595622 dataset_size: 13845344 - config_name: hi-es splits: - name: train num_bytes: 9908349 num_examples: 71209 download_size: 5460637 dataset_size: 9908349 - config_name: hi-et splits: - name: train num_bytes: 6208897 num_examples: 43981 download_size: 3448393 dataset_size: 6208897 - config_name: hi-eu splits: - name: train num_bytes: 1433853 num_examples: 9977 download_size: 796314 dataset_size: 1433853 - config_name: hi-fa splits: - name: train num_bytes: 7506328 num_examples: 48917 download_size: 3916312 dataset_size: 7506328 - config_name: hi-fi splits: - name: train num_bytes: 8045357 num_examples: 55597 download_size: 4418448 dataset_size: 8045357 - config_name: hi-fr splits: - name: train num_bytes: 8071713 num_examples: 54720 download_size: 4402853 dataset_size: 8071713 - config_name: hi-gl splits: - name: train num_bytes: 83627 num_examples: 547 download_size: 49259 dataset_size: 83627 - config_name: hi-he splits: - name: train num_bytes: 8882547 num_examples: 57847 download_size: 4624074 dataset_size: 8882547 - config_name: hi-hr splits: - name: train num_bytes: 9356909 num_examples: 59919 download_size: 4607609 dataset_size: 9356909 - config_name: hi-hu splits: - name: train num_bytes: 9551782 num_examples: 60025 download_size: 4702872 dataset_size: 9551782 - config_name: hi-id splits: - name: train num_bytes: 7583628 num_examples: 48392 download_size: 3592492 dataset_size: 7583628 - config_name: hi-is splits: - name: train num_bytes: 738048 num_examples: 4562 download_size: 364946 dataset_size: 738048 - config_name: hi-it splits: - name: train num_bytes: 7707878 num_examples: 47494 download_size: 3763698 dataset_size: 7707878 - config_name: hi-ja splits: - name: train num_bytes: 5460034 num_examples: 33672 download_size: 2586335 dataset_size: 5460034 - config_name: hi-ka splits: - name: train num_bytes: 187469 num_examples: 796 download_size: 84783 dataset_size: 187469 - config_name: hi-ko splits: - name: train num_bytes: 3325820 num_examples: 21188 download_size: 1603610 dataset_size: 3325820 - config_name: hi-lt splits: - name: train num_bytes: 3599806 num_examples: 23673 download_size: 1778821 dataset_size: 3599806 - config_name: hi-lv splits: - name: train num_bytes: 1829355 num_examples: 12605 download_size: 899842 dataset_size: 1829355 - config_name: hi-mk splits: - name: train num_bytes: 3171731 num_examples: 16986 download_size: 1465548 dataset_size: 3171731 - config_name: hi-ml splits: - name: train num_bytes: 1137993 num_examples: 5332 download_size: 474643 dataset_size: 1137993 - config_name: hi-ms splits: - name: train num_bytes: 4341050 num_examples: 27906 download_size: 2067105 dataset_size: 4341050 - config_name: hi-nl splits: - name: train num_bytes: 9185691 num_examples: 55034 download_size: 4438322 dataset_size: 9185691 - config_name: hi-no splits: - name: train num_bytes: 6448603 num_examples: 41599 download_size: 3109542 dataset_size: 6448603 - config_name: hi-pl splits: - name: train num_bytes: 9706804 num_examples: 59261 download_size: 4775395 dataset_size: 9706804 - config_name: hi-pt splits: - name: train num_bytes: 7803770 num_examples: 49716 download_size: 3798868 dataset_size: 7803770 - config_name: hi-ro splits: - name: train num_bytes: 11247826 num_examples: 70415 download_size: 5476088 dataset_size: 11247826 - config_name: hi-ru splits: - name: train num_bytes: 7424144 num_examples: 42212 download_size: 3483720 dataset_size: 7424144 - config_name: hi-si splits: - name: train num_bytes: 2405564 num_examples: 11405 download_size: 1057541 dataset_size: 2405564 - config_name: hi-sk splits: - name: train num_bytes: 4893412 num_examples: 31179 download_size: 2437751 dataset_size: 4893412 - config_name: hi-sl splits: - name: train num_bytes: 6810276 num_examples: 43385 download_size: 3338906 dataset_size: 6810276 - config_name: hi-sq splits: - name: train num_bytes: 4423950 num_examples: 27301 download_size: 2149717 dataset_size: 4423950 - config_name: hi-sr splits: - name: train num_bytes: 10554867 num_examples: 63706 download_size: 5108540 dataset_size: 10554867 - config_name: hi-sv splits: - name: train num_bytes: 8370311 num_examples: 52116 download_size: 4030313 dataset_size: 8370311 - config_name: hi-ta splits: - name: train num_bytes: 5488358 num_examples: 28531 download_size: 2229363 dataset_size: 5488358 - config_name: hi-te splits: - name: train num_bytes: 4125248 num_examples: 23181 download_size: 1737544 dataset_size: 4125248 - config_name: hi-th splits: - name: train num_bytes: 5351584 num_examples: 25057 download_size: 2295480 dataset_size: 5351584 - config_name: hi-tl splits: - name: train num_bytes: 77691 num_examples: 506 download_size: 40618 dataset_size: 77691 - config_name: hi-tr splits: - name: train num_bytes: 10265044 num_examples: 64085 download_size: 4968851 dataset_size: 10265044 - config_name: hi-uk splits: - name: train num_bytes: 847513 num_examples: 5724 download_size: 455597 dataset_size: 847513 - config_name: hi-ur splits: - name: train num_bytes: 227573 num_examples: 1259 download_size: 110769 dataset_size: 227573 - config_name: hi-vi splits: - name: train num_bytes: 5497555 num_examples: 32991 download_size: 2609815 dataset_size: 5497555 - config_name: hr-af splits: - name: train num_bytes: 1734617 num_examples: 14796 download_size: 1056803 dataset_size: 1734617 - config_name: hr-ar splits: - name: train num_bytes: 2724982376 num_examples: 20034003 download_size: 1500575586 dataset_size: 2724982376 - config_name: hr-bg splits: - name: train num_bytes: 3445341125 num_examples: 24640509 download_size: 1846791655 dataset_size: 3445341125 - config_name: hr-bn splits: - name: train num_bytes: 47244970 num_examples: 303620 download_size: 23322427 dataset_size: 47244970 - config_name: hr-bs splits: - name: train num_bytes: 1140384114 num_examples: 10476787 download_size: 672084885 dataset_size: 1140384114 - config_name: hr-ca splits: - name: train num_bytes: 42721456 num_examples: 368058 download_size: 25193360 dataset_size: 42721456 - config_name: hr-cs splits: - name: train num_bytes: 3024289366 num_examples: 25833568 download_size: 1800612714 dataset_size: 3024289366 - config_name: hr-da splits: - name: train num_bytes: 1315370139 num_examples: 11400846 download_size: 768447855 dataset_size: 1315370139 - config_name: hr-de splits: - name: train num_bytes: 1655626799 num_examples: 13798486 download_size: 963557732 dataset_size: 1655626799 - config_name: hr-el splits: - name: train num_bytes: 3546784465 num_examples: 24498188 download_size: 1901454018 dataset_size: 3546784465 - config_name: hr-en splits: - name: train num_bytes: 4070450945 num_examples: 35131729 download_size: 2344656938 dataset_size: 4070450945 - config_name: hr-es splits: - name: train num_bytes: 3049445960 num_examples: 30622533 download_size: 2124168311 dataset_size: 3049445960 - config_name: hr-et splits: - name: train num_bytes: 946542342 num_examples: 9977630 download_size: 674180740 dataset_size: 946542342 - config_name: hr-eu splits: - name: train num_bytes: 54396776 num_examples: 579474 download_size: 38799388 dataset_size: 54396776 - config_name: hr-fa splits: - name: train num_bytes: 565721214 num_examples: 4763534 download_size: 356358920 dataset_size: 565721214 - config_name: hr-fi splits: - name: train num_bytes: 1844359930 num_examples: 18675631 download_size: 1296657586 dataset_size: 1844359930 - config_name: hr-fr splits: - name: train num_bytes: 2290433190 num_examples: 22738110 download_size: 1584761881 dataset_size: 2290433190 - config_name: hr-gl splits: - name: train num_bytes: 7625053 num_examples: 78281 download_size: 5512156 dataset_size: 7625053 - config_name: hr-he splits: - name: train num_bytes: 2303387015 num_examples: 20511439 download_size: 1468277577 dataset_size: 2303387015 - config_name: hr-hi splits: - name: train num_bytes: 8255002 num_examples: 59919 download_size: 4607609 dataset_size: 8255002 - config_name: hr-hu splits: - name: train num_bytes: 2996916397 num_examples: 25439687 download_size: 1776232001 dataset_size: 2996916397 - config_name: hr-hy splits: - name: train num_bytes: 297165 num_examples: 1660 download_size: 180026 dataset_size: 297165 - config_name: hr-id splits: - name: train num_bytes: 754139448 num_examples: 6474908 download_size: 428181456 dataset_size: 754139448 - config_name: hr-is splits: - name: train num_bytes: 153359640 num_examples: 1353321 download_size: 90842241 dataset_size: 153359640 - config_name: hr-it splits: - name: train num_bytes: 2500714658 num_examples: 20807805 download_size: 1466880309 dataset_size: 2500714658 - config_name: hr-ja splits: - name: train num_bytes: 202107660 num_examples: 1663017 download_size: 116108395 dataset_size: 202107660 - config_name: hr-ka splits: - name: train num_bytes: 24632322 num_examples: 158093 download_size: 12001696 dataset_size: 24632322 - config_name: hr-kk splits: - name: train num_bytes: 257172 num_examples: 2154 download_size: 147155 dataset_size: 257172 - config_name: hr-ko splits: - name: train num_bytes: 128910300 num_examples: 1056375 download_size: 76734274 dataset_size: 128910300 - config_name: hr-lt splits: - name: train num_bytes: 138099708 num_examples: 1254801 download_size: 83874979 dataset_size: 138099708 - config_name: hr-lv splits: - name: train num_bytes: 50241726 num_examples: 456464 download_size: 30457455 dataset_size: 50241726 - config_name: hr-mk splits: - name: train num_bytes: 409257811 num_examples: 3061339 download_size: 218837531 dataset_size: 409257811 - config_name: hr-ml splits: - name: train num_bytes: 47037464 num_examples: 255893 download_size: 21783448 dataset_size: 47037464 - config_name: hr-ms splits: - name: train num_bytes: 166425077 num_examples: 1469356 download_size: 95064865 dataset_size: 166425077 - config_name: hr-nl splits: - name: train num_bytes: 2928157586 num_examples: 24557331 download_size: 1702960839 dataset_size: 2928157586 - config_name: hr-no splits: - name: train num_bytes: 748258842 num_examples: 6565140 download_size: 439095326 dataset_size: 748258842 - config_name: hr-pl splits: - name: train num_bytes: 3100199387 num_examples: 26123255 download_size: 1837533036 dataset_size: 3100199387 - config_name: hr-pt splits: - name: train num_bytes: 2607378438 num_examples: 22175260 download_size: 1518295990 dataset_size: 2607378438 - config_name: hr-ro splits: - name: train num_bytes: 3487160879 num_examples: 29598040 download_size: 2035804010 dataset_size: 3487160879 - config_name: hr-ru splits: - name: train num_bytes: 2177709494 num_examples: 15288049 download_size: 1182200301 dataset_size: 2177709494 - config_name: hr-si splits: - name: train num_bytes: 69526977 num_examples: 428365 download_size: 34003251 dataset_size: 69526977 - config_name: hr-sk splits: - name: train num_bytes: 695516533 num_examples: 6019369 download_size: 416873699 dataset_size: 695516533 - config_name: hr-sl splits: - name: train num_bytes: 1747642664 num_examples: 15636933 download_size: 1038713163 dataset_size: 1747642664 - config_name: hr-sq splits: - name: train num_bytes: 182156405 num_examples: 1607729 download_size: 107067565 dataset_size: 182156405 - config_name: hr-sr splits: - name: train num_bytes: 2955309013 num_examples: 26643127 download_size: 1718353413 dataset_size: 2955309013 - config_name: hr-sv splits: - name: train num_bytes: 1517568386 num_examples: 12981919 download_size: 884182004 dataset_size: 1517568386 - config_name: hr-ta splits: - name: train num_bytes: 3638670 num_examples: 21298 download_size: 1684087 dataset_size: 3638670 - config_name: hr-te splits: - name: train num_bytes: 3072107 num_examples: 19063 download_size: 1501516 dataset_size: 3072107 - config_name: hr-th splits: - name: train num_bytes: 412557577 num_examples: 2404014 download_size: 201065178 dataset_size: 412557577 - config_name: hr-tl splits: - name: train num_bytes: 862761 num_examples: 7386 download_size: 523492 dataset_size: 862761 - config_name: hr-tr splits: - name: train num_bytes: 3173563090 num_examples: 26805517 download_size: 1844359679 dataset_size: 3173563090 - config_name: hr-uk splits: - name: train num_bytes: 80629274 num_examples: 589286 download_size: 44423103 dataset_size: 80629274 - config_name: hr-ur splits: - name: train num_bytes: 3037110 num_examples: 21869 download_size: 1672479 dataset_size: 3037110 - config_name: hr-vi splits: - name: train num_bytes: 357164637 num_examples: 2842528 download_size: 200024269 dataset_size: 357164637 - config_name: hu-af splits: - name: train num_bytes: 1905522 num_examples: 16317 download_size: 1170404 dataset_size: 1905522 - config_name: hu-ar splits: - name: train num_bytes: 3301465865 num_examples: 23767831 download_size: 1817080056 dataset_size: 3301465865 - config_name: hu-bg splits: - name: train num_bytes: 4258070135 num_examples: 29923273 download_size: 2280298751 dataset_size: 4258070135 - config_name: hu-bn splits: - name: train num_bytes: 49308557 num_examples: 317418 download_size: 24418557 dataset_size: 49308557 - config_name: hu-bs splits: - name: train num_bytes: 1204441141 num_examples: 10315616 download_size: 720105363 dataset_size: 1204441141 - config_name: hu-ca splits: - name: train num_bytes: 49130221 num_examples: 416729 download_size: 28833679 dataset_size: 49130221 - config_name: hu-cs splits: - name: train num_bytes: 3880458592 num_examples: 32559149 download_size: 2298745342 dataset_size: 3880458592 - config_name: hu-da splits: - name: train num_bytes: 1410868629 num_examples: 11972539 download_size: 823174488 dataset_size: 1410868629 - config_name: hu-de splits: - name: train num_bytes: 1929800058 num_examples: 15764968 download_size: 1121269163 dataset_size: 1929800058 - config_name: hu-el splits: - name: train num_bytes: 4149925369 num_examples: 28253647 download_size: 2222302392 dataset_size: 4149925369 - config_name: hu-en splits: - name: train num_bytes: 5027017815 num_examples: 42655519 download_size: 2884293234 dataset_size: 5027017815 - config_name: hu-es splits: - name: train num_bytes: 4494867342 num_examples: 37100976 download_size: 2609202590 dataset_size: 4494867342 - config_name: hu-et splits: - name: train num_bytes: 1225453310 num_examples: 10559714 download_size: 725560854 dataset_size: 1225453310 - config_name: hu-eu splits: - name: train num_bytes: 75954918 num_examples: 663683 download_size: 45277429 dataset_size: 75954918 - config_name: hu-fa splits: - name: train num_bytes: 725429558 num_examples: 5201071 download_size: 394375094 dataset_size: 725429558 - config_name: hu-fi splits: - name: train num_bytes: 2532136957 num_examples: 20920774 download_size: 1486286684 dataset_size: 2532136957 - config_name: hu-fr splits: - name: train num_bytes: 3473508659 num_examples: 28319528 download_size: 2009978734 dataset_size: 3473508659 - config_name: hu-gl splits: - name: train num_bytes: 9703945 num_examples: 82630 download_size: 5852939 dataset_size: 9703945 - config_name: hu-he splits: - name: train num_bytes: 3219748284 num_examples: 23959866 download_size: 1746287843 dataset_size: 3219748284 - config_name: hu-hi splits: - name: train num_bytes: 9551782 num_examples: 60025 download_size: 4702872 dataset_size: 9551782 - config_name: hu-hr splits: - name: train num_bytes: 2503172526 num_examples: 25439687 download_size: 1776232001 dataset_size: 2503172526 - config_name: hu-hy splits: - name: train num_bytes: 212604 num_examples: 1309 download_size: 128347 dataset_size: 212604 - config_name: hu-id splits: - name: train num_bytes: 859323254 num_examples: 7253258 download_size: 487122636 dataset_size: 859323254 - config_name: hu-is splits: - name: train num_bytes: 156090256 num_examples: 1369398 download_size: 92491454 dataset_size: 156090256 - config_name: hu-it splits: - name: train num_bytes: 3268332441 num_examples: 26750936 download_size: 1907932145 dataset_size: 3268332441 - config_name: hu-ja splits: - name: train num_bytes: 219200396 num_examples: 1767552 download_size: 125650541 dataset_size: 219200396 - config_name: hu-ka splits: - name: train num_bytes: 26151880 num_examples: 165843 download_size: 12843399 dataset_size: 26151880 - config_name: hu-kk splits: - name: train num_bytes: 312402 num_examples: 2577 download_size: 177896 dataset_size: 312402 - config_name: hu-ko splits: - name: train num_bytes: 145528536 num_examples: 1168211 download_size: 86326388 dataset_size: 145528536 - config_name: hu-lt splits: - name: train num_bytes: 143894505 num_examples: 1295287 download_size: 87344929 dataset_size: 143894505 - config_name: hu-lv splits: - name: train num_bytes: 53952635 num_examples: 482310 download_size: 32670338 dataset_size: 53952635 - config_name: hu-mk splits: - name: train num_bytes: 399542576 num_examples: 2907735 download_size: 214955225 dataset_size: 399542576 - config_name: hu-ml splits: - name: train num_bytes: 51606467 num_examples: 279127 download_size: 23953648 dataset_size: 51606467 - config_name: hu-ms splits: - name: train num_bytes: 181546076 num_examples: 1581433 download_size: 103501072 dataset_size: 181546076 - config_name: hu-nl splits: - name: train num_bytes: 3337372681 num_examples: 27342219 download_size: 1936401203 dataset_size: 3337372681 - config_name: hu-no splits: - name: train num_bytes: 798165644 num_examples: 6855909 download_size: 467762414 dataset_size: 798165644 - config_name: hu-pl splits: - name: train num_bytes: 3923574798 num_examples: 32352502 download_size: 2315938647 dataset_size: 3923574798 - config_name: hu-pt splits: - name: train num_bytes: 3012533336 num_examples: 25146790 download_size: 1749810979 dataset_size: 3012533336 - config_name: hu-ro splits: - name: train num_bytes: 4102974579 num_examples: 34126008 download_size: 2388399672 dataset_size: 4102974579 - config_name: hu-ru splits: - name: train num_bytes: 2758725843 num_examples: 19139907 download_size: 1493407470 dataset_size: 2758725843 - config_name: hu-si splits: - name: train num_bytes: 76230892 num_examples: 460994 download_size: 37382220 dataset_size: 76230892 - config_name: hu-sk splits: - name: train num_bytes: 819006347 num_examples: 6978464 download_size: 488927989 dataset_size: 819006347 - config_name: hu-sl splits: - name: train num_bytes: 1850844233 num_examples: 16006005 download_size: 1101304863 dataset_size: 1850844233 - config_name: hu-sq splits: - name: train num_bytes: 191707585 num_examples: 1656527 download_size: 112931859 dataset_size: 191707585 - config_name: hu-sr splits: - name: train num_bytes: 3556699160 num_examples: 29841465 download_size: 2096524852 dataset_size: 3556699160 - config_name: hu-sv splits: - name: train num_bytes: 1652873404 num_examples: 13811059 download_size: 961782112 dataset_size: 1652873404 - config_name: hu-ta splits: - name: train num_bytes: 3572000 num_examples: 20629 download_size: 1646241 dataset_size: 3572000 - config_name: hu-te splits: - name: train num_bytes: 2840063 num_examples: 17572 download_size: 1394510 dataset_size: 2840063 - config_name: hu-th splits: - name: train num_bytes: 495340888 num_examples: 2867234 download_size: 241515546 dataset_size: 495340888 - config_name: hu-tl splits: - name: train num_bytes: 1259364 num_examples: 10790 download_size: 719603 dataset_size: 1259364 - config_name: hu-tr splits: - name: train num_bytes: 3908387371 num_examples: 32479149 download_size: 2262726187 dataset_size: 3908387371 - config_name: hu-uk splits: - name: train num_bytes: 94475716 num_examples: 685291 download_size: 52028384 dataset_size: 94475716 - config_name: hu-ur splits: - name: train num_bytes: 3153133 num_examples: 23064 download_size: 1733934 dataset_size: 3153133 - config_name: hu-vi splits: - name: train num_bytes: 378504911 num_examples: 2974607 download_size: 212133098 dataset_size: 378504911 - config_name: hy-ar splits: - name: train num_bytes: 419022 num_examples: 2308 download_size: 231120 dataset_size: 419022 - config_name: hy-bg splits: - name: train num_bytes: 189659 num_examples: 1119 download_size: 101518 dataset_size: 189659 - config_name: hy-bs splits: - name: train num_bytes: 7962 num_examples: 71 download_size: 8990 dataset_size: 7962 - config_name: hy-cs splits: - name: train num_bytes: 524997 num_examples: 3263 download_size: 308355 dataset_size: 524997 - config_name: hy-de splits: - name: train num_bytes: 365023 num_examples: 2057 download_size: 215829 dataset_size: 365023 - config_name: hy-el splits: - name: train num_bytes: 735920 num_examples: 3235 download_size: 350099 dataset_size: 735920 - config_name: hy-en splits: - name: train num_bytes: 535378 num_examples: 3390 download_size: 304684 dataset_size: 535378 - config_name: hy-es splits: - name: train num_bytes: 554671 num_examples: 3304 download_size: 317793 dataset_size: 554671 - config_name: hy-et splits: - name: train num_bytes: 21963 num_examples: 149 download_size: 17260 dataset_size: 21963 - config_name: hy-fi splits: - name: train num_bytes: 159759 num_examples: 869 download_size: 96981 dataset_size: 159759 - config_name: hy-fr splits: - name: train num_bytes: 132430 num_examples: 668 download_size: 81770 dataset_size: 132430 - config_name: hy-he splits: - name: train num_bytes: 226069 num_examples: 1243 download_size: 127040 dataset_size: 226069 - config_name: hy-hr splits: - name: train num_bytes: 269303 num_examples: 1660 download_size: 180026 dataset_size: 269303 - config_name: hy-hu splits: - name: train num_bytes: 188952 num_examples: 1309 download_size: 128347 dataset_size: 188952 - config_name: hy-id splits: - name: train num_bytes: 18273 num_examples: 129 download_size: 15122 dataset_size: 18273 - config_name: hy-it splits: - name: train num_bytes: 36752 num_examples: 265 download_size: 24916 dataset_size: 36752 - config_name: hy-mk splits: - name: train num_bytes: 26331 num_examples: 152 download_size: 18853 dataset_size: 26331 - config_name: hy-ml splits: - name: train num_bytes: 7540 num_examples: 43 download_size: 8253 dataset_size: 7540 - config_name: hy-nl splits: - name: train num_bytes: 173308 num_examples: 982 download_size: 101299 dataset_size: 173308 - config_name: hy-pl splits: - name: train num_bytes: 509351 num_examples: 3006 download_size: 296479 dataset_size: 509351 - config_name: hy-pt splits: - name: train num_bytes: 376006 num_examples: 2328 download_size: 215620 dataset_size: 376006 - config_name: hy-ro splits: - name: train num_bytes: 553282 num_examples: 3355 download_size: 322671 dataset_size: 553282 - config_name: hy-ru splits: - name: train num_bytes: 173828 num_examples: 780 download_size: 98673 dataset_size: 173828 - config_name: hy-sk splits: - name: train num_bytes: 6489 num_examples: 48 download_size: 8659 dataset_size: 6489 - config_name: hy-sl splits: - name: train num_bytes: 22697 num_examples: 166 download_size: 17625 dataset_size: 22697 - config_name: hy-sq splits: - name: train num_bytes: 17233 num_examples: 120 download_size: 14588 dataset_size: 17233 - config_name: hy-sr splits: - name: train num_bytes: 516507 num_examples: 3118 download_size: 301729 dataset_size: 516507 - config_name: hy-sv splits: - name: train num_bytes: 176116 num_examples: 995 download_size: 104059 dataset_size: 176116 - config_name: hy-tr splits: - name: train num_bytes: 552121 num_examples: 3271 download_size: 317775 dataset_size: 552121 - config_name: id-af splits: - name: train num_bytes: 510009 num_examples: 4555 download_size: 300481 dataset_size: 510009 - config_name: id-ar splits: - name: train num_bytes: 950577208 num_examples: 6950290 download_size: 503674983 dataset_size: 950577208 - config_name: id-bg splits: - name: train num_bytes: 985401445 num_examples: 7017758 download_size: 509187812 dataset_size: 985401445 - config_name: id-bn splits: - name: train num_bytes: 49238140 num_examples: 318214 download_size: 23402049 dataset_size: 49238140 - config_name: id-bs splits: - name: train num_bytes: 354311060 num_examples: 3038174 download_size: 202408931 dataset_size: 354311060 - config_name: id-ca splits: - name: train num_bytes: 26965121 num_examples: 223678 download_size: 15050721 dataset_size: 26965121 - config_name: id-cs splits: - name: train num_bytes: 855326519 num_examples: 7261871 download_size: 487112887 dataset_size: 855326519 - config_name: id-da splits: - name: train num_bytes: 573521735 num_examples: 4844433 download_size: 319235326 dataset_size: 573521735 - config_name: id-de splits: - name: train num_bytes: 572278171 num_examples: 4668250 download_size: 319120129 dataset_size: 572278171 - config_name: id-el splits: - name: train num_bytes: 1207943843 num_examples: 7095656 download_size: 535432550 dataset_size: 1207943843 - config_name: id-en splits: - name: train num_bytes: 1055673441 num_examples: 9268181 download_size: 578401178 dataset_size: 1055673441 - config_name: id-es splits: - name: train num_bytes: 946159723 num_examples: 7971638 download_size: 527208058 dataset_size: 946159723 - config_name: id-et splits: - name: train num_bytes: 501018578 num_examples: 4282230 download_size: 283886176 dataset_size: 501018578 - config_name: id-eu splits: - name: train num_bytes: 34937246 num_examples: 307850 download_size: 19862779 dataset_size: 34937246 - config_name: id-fa splits: - name: train num_bytes: 418200692 num_examples: 3014021 download_size: 217778258 dataset_size: 418200692 - config_name: id-fi splits: - name: train num_bytes: 700816481 num_examples: 5806363 download_size: 393605935 dataset_size: 700816481 - config_name: id-fr splits: - name: train num_bytes: 751820179 num_examples: 6195915 download_size: 417900010 dataset_size: 751820179 - config_name: id-gl splits: - name: train num_bytes: 3662424 num_examples: 30746 download_size: 2100503 dataset_size: 3662424 - config_name: id-he splits: - name: train num_bytes: 844717659 num_examples: 6362286 download_size: 440779497 dataset_size: 844717659 - config_name: id-hi splits: - name: train num_bytes: 7583628 num_examples: 48392 download_size: 3592492 dataset_size: 7583628 - config_name: id-hr splits: - name: train num_bytes: 630721381 num_examples: 6474908 download_size: 428181456 dataset_size: 630721381 - config_name: id-hu splits: - name: train num_bytes: 719511934 num_examples: 7253258 download_size: 487122636 dataset_size: 719511934 - config_name: id-hy splits: - name: train num_bytes: 16548 num_examples: 129 download_size: 15122 dataset_size: 16548 - config_name: id-is splits: - name: train num_bytes: 71941646 num_examples: 603955 download_size: 40939741 dataset_size: 71941646 - config_name: id-it splits: - name: train num_bytes: 701773216 num_examples: 5801967 download_size: 393193590 dataset_size: 701773216 - config_name: id-ja splits: - name: train num_bytes: 155192362 num_examples: 1252604 download_size: 84860147 dataset_size: 155192362 - config_name: id-ka splits: - name: train num_bytes: 13494232 num_examples: 85069 download_size: 6378131 dataset_size: 13494232 - config_name: id-kk splits: - name: train num_bytes: 125062 num_examples: 1034 download_size: 72302 dataset_size: 125062 - config_name: id-ko splits: - name: train num_bytes: 71712401 num_examples: 586876 download_size: 40701641 dataset_size: 71712401 - config_name: id-lt splits: - name: train num_bytes: 98435760 num_examples: 855428 download_size: 57251404 dataset_size: 98435760 - config_name: id-lv splits: - name: train num_bytes: 38714083 num_examples: 342357 download_size: 22393707 dataset_size: 38714083 - config_name: id-mk splits: - name: train num_bytes: 217438692 num_examples: 1543878 download_size: 112513694 dataset_size: 217438692 - config_name: id-ml splits: - name: train num_bytes: 35350259 num_examples: 193248 download_size: 15870074 dataset_size: 35350259 - config_name: id-ms splits: - name: train num_bytes: 180449798 num_examples: 1589953 download_size: 97188266 dataset_size: 180449798 - config_name: id-nl splits: - name: train num_bytes: 776081408 num_examples: 6413754 download_size: 431759172 dataset_size: 776081408 - config_name: id-pl splits: - name: train num_bytes: 829869833 num_examples: 6914619 download_size: 470877668 dataset_size: 829869833 - config_name: id-pt splits: - name: train num_bytes: 809121646 num_examples: 6798556 download_size: 450766628 dataset_size: 809121646 - config_name: id-ro splits: - name: train num_bytes: 934915192 num_examples: 7915743 download_size: 521917939 dataset_size: 934915192 - config_name: id-ru splits: - name: train num_bytes: 680572668 num_examples: 4802176 download_size: 355555825 dataset_size: 680572668 - config_name: id-si splits: - name: train num_bytes: 59489616 num_examples: 366000 download_size: 28222675 dataset_size: 59489616 - config_name: id-sk splits: - name: train num_bytes: 277169625 num_examples: 2366850 download_size: 158339131 dataset_size: 277169625 - config_name: id-sl splits: - name: train num_bytes: 624128623 num_examples: 5398476 download_size: 354674891 dataset_size: 624128623 - config_name: id-sq splits: - name: train num_bytes: 127530097 num_examples: 1085165 download_size: 71944675 dataset_size: 127530097 - config_name: id-sr splits: - name: train num_bytes: 792744886 num_examples: 6631037 download_size: 445349843 dataset_size: 792744886 - config_name: id-sv splits: - name: train num_bytes: 578895375 num_examples: 4819662 download_size: 321728791 dataset_size: 578895375 - config_name: id-ta splits: - name: train num_bytes: 2849256 num_examples: 16659 download_size: 1271639 dataset_size: 2849256 - config_name: id-te splits: - name: train num_bytes: 2138400 num_examples: 13304 download_size: 1018809 dataset_size: 2138400 - config_name: id-th splits: - name: train num_bytes: 261981204 num_examples: 1544231 download_size: 123447658 dataset_size: 261981204 - config_name: id-tl splits: - name: train num_bytes: 857904 num_examples: 7800 download_size: 487404 dataset_size: 857904 - config_name: id-tr splits: - name: train num_bytes: 925131692 num_examples: 7764058 download_size: 514333569 dataset_size: 925131692 - config_name: id-uk splits: - name: train num_bytes: 36770942 num_examples: 266344 download_size: 19522918 dataset_size: 36770942 - config_name: id-ur splits: - name: train num_bytes: 3135190 num_examples: 22830 download_size: 1657125 dataset_size: 3135190 - config_name: id-vi splits: - name: train num_bytes: 264798782 num_examples: 2081700 download_size: 141956684 dataset_size: 264798782 - config_name: is-ar splits: - name: train num_bytes: 152493701 num_examples: 1105868 download_size: 84397158 dataset_size: 152493701 - config_name: is-bg splits: - name: train num_bytes: 185320318 num_examples: 1356280 download_size: 99892656 dataset_size: 185320318 - config_name: is-bn splits: - name: train num_bytes: 6371171 num_examples: 38272 download_size: 3130910 dataset_size: 6371171 - config_name: is-bs splits: - name: train num_bytes: 79823224 num_examples: 692517 download_size: 47747110 dataset_size: 79823224 - config_name: is-ca splits: - name: train num_bytes: 2713362 num_examples: 22386 download_size: 1638036 dataset_size: 2713362 - config_name: is-cs splits: - name: train num_bytes: 163474316 num_examples: 1433036 download_size: 97115756 dataset_size: 163474316 - config_name: is-da splits: - name: train num_bytes: 142283151 num_examples: 1232620 download_size: 82440528 dataset_size: 142283151 - config_name: is-de splits: - name: train num_bytes: 124447746 num_examples: 1063594 download_size: 72329044 dataset_size: 124447746 - config_name: is-el splits: - name: train num_bytes: 229896228 num_examples: 1393628 download_size: 105702340 dataset_size: 229896228 - config_name: is-en splits: - name: train num_bytes: 179506522 num_examples: 1569189 download_size: 103091458 dataset_size: 179506522 - config_name: is-es splits: - name: train num_bytes: 172524015 num_examples: 1502402 download_size: 100419287 dataset_size: 172524015 - config_name: is-et splits: - name: train num_bytes: 128275388 num_examples: 1110083 download_size: 75724039 dataset_size: 128275388 - config_name: is-eu splits: - name: train num_bytes: 5796399 num_examples: 48947 download_size: 3435615 dataset_size: 5796399 - config_name: is-fa splits: - name: train num_bytes: 40825950 num_examples: 282354 download_size: 22308706 dataset_size: 40825950 - config_name: is-fi splits: - name: train num_bytes: 160767184 num_examples: 1387160 download_size: 94144541 dataset_size: 160767184 - config_name: is-fr splits: - name: train num_bytes: 155872712 num_examples: 1330231 download_size: 90643773 dataset_size: 155872712 - config_name: is-gl splits: - name: train num_bytes: 465844 num_examples: 4048 download_size: 287673 dataset_size: 465844 - config_name: is-he splits: - name: train num_bytes: 163596049 num_examples: 1256013 download_size: 89215698 dataset_size: 163596049 - config_name: is-hi splits: - name: train num_bytes: 738048 num_examples: 4562 download_size: 364946 dataset_size: 738048 - config_name: is-hr splits: - name: train num_bytes: 128987668 num_examples: 1353321 download_size: 90842241 dataset_size: 128987668 - config_name: is-hu splits: - name: train num_bytes: 131277424 num_examples: 1369398 download_size: 92491454 dataset_size: 131277424 - config_name: is-id splits: - name: train num_bytes: 61117516 num_examples: 603955 download_size: 40939741 dataset_size: 61117516 - config_name: is-it splits: - name: train num_bytes: 124092723 num_examples: 1070424 download_size: 72749283 dataset_size: 124092723 - config_name: is-ja splits: - name: train num_bytes: 13079782 num_examples: 105093 download_size: 7585855 dataset_size: 13079782 - config_name: is-ka splits: - name: train num_bytes: 2559585 num_examples: 15410 download_size: 1266624 dataset_size: 2559585 - config_name: is-kk splits: - name: train num_bytes: 163484 num_examples: 1283 download_size: 91350 dataset_size: 163484 - config_name: is-ko splits: - name: train num_bytes: 5649085 num_examples: 45678 download_size: 3353533 dataset_size: 5649085 - config_name: is-lt splits: - name: train num_bytes: 35683253 num_examples: 313750 download_size: 21261092 dataset_size: 35683253 - config_name: is-lv splits: - name: train num_bytes: 14124918 num_examples: 127778 download_size: 8378184 dataset_size: 14124918 - config_name: is-mk splits: - name: train num_bytes: 57583622 num_examples: 410244 download_size: 30918514 dataset_size: 57583622 - config_name: is-ml splits: - name: train num_bytes: 5738670 num_examples: 29016 download_size: 2638158 dataset_size: 5738670 - config_name: is-ms splits: - name: train num_bytes: 27316732 num_examples: 231959 download_size: 15558688 dataset_size: 27316732 - config_name: is-nl splits: - name: train num_bytes: 158070965 num_examples: 1354992 download_size: 91955616 dataset_size: 158070965 - config_name: is-no splits: - name: train num_bytes: 95613355 num_examples: 838262 download_size: 55606804 dataset_size: 95613355 - config_name: is-pl splits: - name: train num_bytes: 161273537 num_examples: 1395047 download_size: 95528902 dataset_size: 161273537 - config_name: is-pt splits: - name: train num_bytes: 167871044 num_examples: 1459837 download_size: 97489185 dataset_size: 167871044 - config_name: is-ro splits: - name: train num_bytes: 170663101 num_examples: 1469037 download_size: 99628042 dataset_size: 170663101 - config_name: is-ru splits: - name: train num_bytes: 109923106 num_examples: 796822 download_size: 60185582 dataset_size: 109923106 - config_name: is-si splits: - name: train num_bytes: 6861993 num_examples: 41951 download_size: 3403076 dataset_size: 6861993 - config_name: is-sk splits: - name: train num_bytes: 50563162 num_examples: 436582 download_size: 30286130 dataset_size: 50563162 - config_name: is-sl splits: - name: train num_bytes: 144379426 num_examples: 1297198 download_size: 85505628 dataset_size: 144379426 - config_name: is-sq splits: - name: train num_bytes: 41881250 num_examples: 346875 download_size: 24437562 dataset_size: 41881250 - config_name: is-sr splits: - name: train num_bytes: 156311618 num_examples: 1350101 download_size: 92187916 dataset_size: 156311618 - config_name: is-sv splits: - name: train num_bytes: 140723499 num_examples: 1206627 download_size: 81632497 dataset_size: 140723499 - config_name: is-ta splits: - name: train num_bytes: 352088 num_examples: 1982 download_size: 167329 dataset_size: 352088 - config_name: is-th splits: - name: train num_bytes: 25164485 num_examples: 143848 download_size: 12433501 dataset_size: 25164485 - config_name: is-tl splits: - name: train num_bytes: 143624 num_examples: 1244 download_size: 89792 dataset_size: 143624 - config_name: is-tr splits: - name: train num_bytes: 173191926 num_examples: 1503742 download_size: 100393884 dataset_size: 173191926 - config_name: is-uk splits: - name: train num_bytes: 5013720 num_examples: 36356 download_size: 2801714 dataset_size: 5013720 - config_name: is-ur splits: - name: train num_bytes: 162324 num_examples: 1161 download_size: 93494 dataset_size: 162324 - config_name: is-vi splits: - name: train num_bytes: 43505715 num_examples: 336571 download_size: 24327280 dataset_size: 43505715 - config_name: it-af splits: - name: train num_bytes: 1814935 num_examples: 15006 download_size: 1104117 dataset_size: 1814935 - config_name: it-ar splits: - name: train num_bytes: 2827223716 num_examples: 20022861 download_size: 1542861122 dataset_size: 2827223716 - config_name: it-bg splits: - name: train num_bytes: 3352642463 num_examples: 23136590 download_size: 1781578123 dataset_size: 3352642463 - config_name: it-bn splits: - name: train num_bytes: 41447909 num_examples: 261904 download_size: 20332175 dataset_size: 41447909 - config_name: it-bs splits: - name: train num_bytes: 945468774 num_examples: 7934886 download_size: 559322827 dataset_size: 945468774 - config_name: it-ca splits: - name: train num_bytes: 47175349 num_examples: 391293 download_size: 27407000 dataset_size: 47175349 - config_name: it-cs splits: - name: train num_bytes: 2717640981 num_examples: 26628326 download_size: 1900423982 dataset_size: 2717640981 - config_name: it-da splits: - name: train num_bytes: 1179129267 num_examples: 9834003 download_size: 679224508 dataset_size: 1179129267 - config_name: it-de splits: - name: train num_bytes: 1675546221 num_examples: 13566188 download_size: 961354594 dataset_size: 1675546221 - config_name: it-el splits: - name: train num_bytes: 4006127637 num_examples: 23064600 download_size: 1832561012 dataset_size: 4006127637 - config_name: it-en splits: - name: train num_bytes: 4240969886 num_examples: 35216229 download_size: 2413457956 dataset_size: 4240969886 - config_name: it-es splits: - name: train num_bytes: 3765943929 num_examples: 30595034 download_size: 2165528825 dataset_size: 3765943929 - config_name: it-et splits: - name: train num_bytes: 966506113 num_examples: 8146755 download_size: 565966199 dataset_size: 966506113 - config_name: it-eu splits: - name: train num_bytes: 66332976 num_examples: 568661 download_size: 39045887 dataset_size: 66332976 - config_name: it-fa splits: - name: train num_bytes: 594692793 num_examples: 4172470 download_size: 319966892 dataset_size: 594692793 - config_name: it-fi splits: - name: train num_bytes: 2114857449 num_examples: 17183232 download_size: 1225916500 dataset_size: 2114857449 - config_name: it-fr splits: - name: train num_bytes: 2878720322 num_examples: 23099708 download_size: 1647897770 dataset_size: 2878720322 - config_name: it-gl splits: - name: train num_bytes: 13927256 num_examples: 114094 download_size: 8290641 dataset_size: 13927256 - config_name: it-he splits: - name: train num_bytes: 2719208860 num_examples: 19908473 download_size: 1461101288 dataset_size: 2719208860 - config_name: it-hi splits: - name: train num_bytes: 7707878 num_examples: 47494 download_size: 3763698 dataset_size: 7707878 - config_name: it-hr splits: - name: train num_bytes: 2097351208 num_examples: 20807805 download_size: 1466880309 dataset_size: 2097351208 - config_name: it-hu splits: - name: train num_bytes: 2743711707 num_examples: 26750936 download_size: 1907932145 dataset_size: 2743711707 - config_name: it-hy splits: - name: train num_bytes: 31666 num_examples: 265 download_size: 24916 dataset_size: 31666 - config_name: it-id splits: - name: train num_bytes: 589858625 num_examples: 5801967 download_size: 393193590 dataset_size: 589858625 - config_name: it-is splits: - name: train num_bytes: 124092723 num_examples: 1070424 download_size: 72749283 dataset_size: 124092723 - config_name: it-ja splits: - name: train num_bytes: 202609617 num_examples: 1606704 download_size: 114654245 dataset_size: 202609617 - config_name: it-ka splits: - name: train num_bytes: 17171549 num_examples: 106703 download_size: 8310304 dataset_size: 17171549 - config_name: it-kk splits: - name: train num_bytes: 309549 num_examples: 2539 download_size: 174941 dataset_size: 309549 - config_name: it-ko splits: - name: train num_bytes: 141620710 num_examples: 1116636 download_size: 83067717 dataset_size: 141620710 - config_name: it-lt splits: - name: train num_bytes: 118969031 num_examples: 1051116 download_size: 71592666 dataset_size: 118969031 - config_name: it-lv splits: - name: train num_bytes: 46717988 num_examples: 410149 download_size: 27977982 dataset_size: 46717988 - config_name: it-mk splits: - name: train num_bytes: 323229469 num_examples: 2307953 download_size: 172150302 dataset_size: 323229469 - config_name: it-ml splits: - name: train num_bytes: 41254866 num_examples: 223484 download_size: 19048419 dataset_size: 41254866 - config_name: it-ms splits: - name: train num_bytes: 146722324 num_examples: 1261501 download_size: 82799145 dataset_size: 146722324 - config_name: it-nl splits: - name: train num_bytes: 2773932505 num_examples: 22308465 download_size: 1591405127 dataset_size: 2773932505 - config_name: it-no splits: - name: train num_bytes: 714230421 num_examples: 6021240 download_size: 412543271 dataset_size: 714230421 - config_name: it-pl splits: - name: train num_bytes: 3200084801 num_examples: 25998905 download_size: 1869172216 dataset_size: 3200084801 - config_name: it-pt splits: - name: train num_bytes: 2449298359 num_examples: 20164630 download_size: 1406570669 dataset_size: 2449298359 - config_name: it-ro splits: - name: train num_bytes: 3327768795 num_examples: 27147041 download_size: 1920212878 dataset_size: 3327768795 - config_name: it-ru splits: - name: train num_bytes: 2575734602 num_examples: 17490692 download_size: 1384159600 dataset_size: 2575734602 - config_name: it-si splits: - name: train num_bytes: 61080429 num_examples: 366973 download_size: 29735399 dataset_size: 61080429 - config_name: it-sk splits: - name: train num_bytes: 670523336 num_examples: 5596374 download_size: 395868066 dataset_size: 670523336 - config_name: it-sl splits: - name: train num_bytes: 1469188163 num_examples: 12452119 download_size: 865250270 dataset_size: 1469188163 - config_name: it-sq splits: - name: train num_bytes: 154767933 num_examples: 1317228 download_size: 90237983 dataset_size: 154767933 - config_name: it-sr splits: - name: train num_bytes: 2662719247 num_examples: 21905809 download_size: 1554998886 dataset_size: 2662719247 - config_name: it-sv splits: - name: train num_bytes: 1385767185 num_examples: 11395212 download_size: 796539907 dataset_size: 1385767185 - config_name: it-ta splits: - name: train num_bytes: 1927308 num_examples: 11149 download_size: 898123 dataset_size: 1927308 - config_name: it-te splits: - name: train num_bytes: 1623218 num_examples: 9926 download_size: 800137 dataset_size: 1623218 - config_name: it-th splits: - name: train num_bytes: 431366483 num_examples: 2441314 download_size: 208348638 dataset_size: 431366483 - config_name: it-tl splits: - name: train num_bytes: 1576367 num_examples: 13299 download_size: 876367 dataset_size: 1576367 - config_name: it-tr splits: - name: train num_bytes: 2647099188 num_examples: 25666917 download_size: 1806781258 dataset_size: 2647099188 - config_name: it-uk splits: - name: train num_bytes: 88451469 num_examples: 629710 download_size: 48268530 dataset_size: 88451469 - config_name: it-ur splits: - name: train num_bytes: 2604652 num_examples: 18354 download_size: 1428782 dataset_size: 2604652 - config_name: it-vi splits: - name: train num_bytes: 329570848 num_examples: 2542407 download_size: 182792757 dataset_size: 329570848 - config_name: ja-af splits: - name: train num_bytes: 228949 num_examples: 1984 download_size: 139600 dataset_size: 228949 - config_name: ja-ar splits: - name: train num_bytes: 252130055 num_examples: 1834940 download_size: 134194769 dataset_size: 252130055 - config_name: ja-bg splits: - name: train num_bytes: 242458219 num_examples: 1652166 download_size: 126453806 dataset_size: 242458219 - config_name: ja-bn splits: - name: train num_bytes: 20957102 num_examples: 129721 download_size: 10095911 dataset_size: 20957102 - config_name: ja-bs splits: - name: train num_bytes: 87191652 num_examples: 717200 download_size: 50556829 dataset_size: 87191652 - config_name: ja-ca splits: - name: train num_bytes: 14732331 num_examples: 116270 download_size: 8364225 dataset_size: 14732331 - config_name: ja-cs splits: - name: train num_bytes: 220884618 num_examples: 1793821 download_size: 127193197 dataset_size: 220884618 - config_name: ja-da splits: - name: train num_bytes: 168347839 num_examples: 1381611 download_size: 94322518 dataset_size: 168347839 - config_name: ja-de splits: - name: train num_bytes: 184347104 num_examples: 1447086 download_size: 103584890 dataset_size: 184347104 - config_name: ja-el splits: - name: train num_bytes: 309569068 num_examples: 1740082 download_size: 139182373 dataset_size: 309569068 - config_name: ja-en splits: - name: train num_bytes: 253948077 num_examples: 2083600 download_size: 141348223 dataset_size: 253948077 - config_name: ja-es splits: - name: train num_bytes: 240567826 num_examples: 1922477 download_size: 135241937 dataset_size: 240567826 - config_name: ja-et splits: - name: train num_bytes: 143038594 num_examples: 1176436 download_size: 81982436 dataset_size: 143038594 - config_name: ja-eu splits: - name: train num_bytes: 16374502 num_examples: 139143 download_size: 9460546 dataset_size: 16374502 - config_name: ja-fa splits: - name: train num_bytes: 138339335 num_examples: 1003540 download_size: 72642936 dataset_size: 138339335 - config_name: ja-fi splits: - name: train num_bytes: 198778616 num_examples: 1599718 download_size: 112414550 dataset_size: 198778616 - config_name: ja-fr splits: - name: train num_bytes: 202987393 num_examples: 1601666 download_size: 114003817 dataset_size: 202987393 - config_name: ja-gl splits: - name: train num_bytes: 2699867 num_examples: 21395 download_size: 1564764 dataset_size: 2699867 - config_name: ja-he splits: - name: train num_bytes: 232709314 num_examples: 1683291 download_size: 122554723 dataset_size: 232709314 - config_name: ja-hi splits: - name: train num_bytes: 5460034 num_examples: 33672 download_size: 2586335 dataset_size: 5460034 - config_name: ja-hr splits: - name: train num_bytes: 170504120 num_examples: 1663017 download_size: 116108395 dataset_size: 170504120 - config_name: ja-hu splits: - name: train num_bytes: 185270633 num_examples: 1767552 download_size: 125650541 dataset_size: 185270633 - config_name: ja-id splits: - name: train num_bytes: 131317166 num_examples: 1252604 download_size: 84860147 dataset_size: 131317166 - config_name: ja-is splits: - name: train num_bytes: 11228107 num_examples: 105093 download_size: 7585855 dataset_size: 11228107 - config_name: ja-it splits: - name: train num_bytes: 171627407 num_examples: 1606704 download_size: 114654245 dataset_size: 171627407 - config_name: ja-ka splits: - name: train num_bytes: 5680432 num_examples: 35371 download_size: 2696204 dataset_size: 5680432 - config_name: ja-kk splits: - name: train num_bytes: 202192 num_examples: 1205 download_size: 120237 dataset_size: 202192 - config_name: ja-ko splits: - name: train num_bytes: 36899474 num_examples: 302063 download_size: 20878508 dataset_size: 36899474 - config_name: ja-lt splits: - name: train num_bytes: 33612330 num_examples: 281664 download_size: 19803856 dataset_size: 33612330 - config_name: ja-lv splits: - name: train num_bytes: 11781147 num_examples: 99884 download_size: 6992121 dataset_size: 11781147 - config_name: ja-mk splits: - name: train num_bytes: 76637072 num_examples: 522853 download_size: 39956181 dataset_size: 76637072 - config_name: ja-ml splits: - name: train num_bytes: 12680535 num_examples: 67305 download_size: 5756663 dataset_size: 12680535 - config_name: ja-ms splits: - name: train num_bytes: 59010539 num_examples: 483095 download_size: 32506111 dataset_size: 59010539 - config_name: ja-nl splits: - name: train num_bytes: 214110502 num_examples: 1697628 download_size: 120037111 dataset_size: 214110502 - config_name: ja-no splits: - name: train num_bytes: 113488498 num_examples: 943397 download_size: 64092342 dataset_size: 113488498 - config_name: ja-pl splits: - name: train num_bytes: 257585745 num_examples: 1723779 download_size: 123156850 dataset_size: 257585745 - config_name: ja-pt splits: - name: train num_bytes: 262193106 num_examples: 1750528 download_size: 123329354 dataset_size: 262193106 - config_name: ja-ro splits: - name: train num_bytes: 275151988 num_examples: 1836903 download_size: 130036875 dataset_size: 275151988 - config_name: ja-ru splits: - name: train num_bytes: 249810274 num_examples: 1441087 download_size: 113437671 dataset_size: 249810274 - config_name: ja-si splits: - name: train num_bytes: 31421638 num_examples: 162957 download_size: 13120285 dataset_size: 31421638 - config_name: ja-sk splits: - name: train num_bytes: 85369404 num_examples: 579888 download_size: 41335203 dataset_size: 85369404 - config_name: ja-sl splits: - name: train num_bytes: 209268379 num_examples: 1430110 download_size: 99921885 dataset_size: 209268379 - config_name: ja-sq splits: - name: train num_bytes: 41730136 num_examples: 280216 download_size: 19959571 dataset_size: 41730136 - config_name: ja-sr splits: - name: train num_bytes: 240386033 num_examples: 1597196 download_size: 114056604 dataset_size: 240386033 - config_name: ja-sv splits: - name: train num_bytes: 207842670 num_examples: 1395574 download_size: 97089521 dataset_size: 207842670 - config_name: ja-ta splits: - name: train num_bytes: 2554541 num_examples: 12683 download_size: 1013357 dataset_size: 2554541 - config_name: ja-te splits: - name: train num_bytes: 1053991 num_examples: 5676 download_size: 441927 dataset_size: 1053991 - config_name: ja-th splits: - name: train num_bytes: 120373283 num_examples: 626022 download_size: 49751127 dataset_size: 120373283 - config_name: ja-tl splits: - name: train num_bytes: 1530657 num_examples: 10064 download_size: 691756 dataset_size: 1530657 - config_name: ja-tr splits: - name: train num_bytes: 276196852 num_examples: 1840439 download_size: 129931022 dataset_size: 276196852 - config_name: ja-uk splits: - name: train num_bytes: 22345382 num_examples: 132404 download_size: 10219814 dataset_size: 22345382 - config_name: ja-ur splits: - name: train num_bytes: 1555332 num_examples: 11517 download_size: 826093 dataset_size: 1555332 - config_name: ja-vi splits: - name: train num_bytes: 91098274 num_examples: 679305 download_size: 49452044 dataset_size: 91098274 - config_name: ka-ar splits: - name: train num_bytes: 27849923 num_examples: 161654 download_size: 12980173 dataset_size: 27849923 - config_name: ka-bg splits: - name: train num_bytes: 32999054 num_examples: 193270 download_size: 14988462 dataset_size: 32999054 - config_name: ka-bn splits: - name: train num_bytes: 1660197 num_examples: 8681 download_size: 726062 dataset_size: 1660197 - config_name: ka-bs splits: - name: train num_bytes: 14689224 num_examples: 94897 download_size: 7219698 dataset_size: 14689224 - config_name: ka-ca splits: - name: train num_bytes: 992947 num_examples: 5858 download_size: 473398 dataset_size: 992947 - config_name: ka-cs splits: - name: train num_bytes: 25681863 num_examples: 163352 download_size: 12657473 dataset_size: 25681863 - config_name: ka-da splits: - name: train num_bytes: 21512391 num_examples: 135036 download_size: 10282056 dataset_size: 21512391 - config_name: ka-de splits: - name: train num_bytes: 19636234 num_examples: 123121 download_size: 9475825 dataset_size: 19636234 - config_name: ka-el splits: - name: train num_bytes: 35346282 num_examples: 167388 download_size: 14414629 dataset_size: 35346282 - config_name: ka-en splits: - name: train num_bytes: 30705227 num_examples: 199978 download_size: 14731620 dataset_size: 30705227 - config_name: ka-es splits: - name: train num_bytes: 28745504 num_examples: 181190 download_size: 13931405 dataset_size: 28745504 - config_name: ka-et splits: - name: train num_bytes: 17300813 num_examples: 110021 download_size: 8480614 dataset_size: 17300813 - config_name: ka-eu splits: - name: train num_bytes: 1522347 num_examples: 9416 download_size: 738495 dataset_size: 1522347 - config_name: ka-fa splits: - name: train num_bytes: 16916468 num_examples: 96319 download_size: 7817398 dataset_size: 16916468 - config_name: ka-fi splits: - name: train num_bytes: 23710150 num_examples: 148416 download_size: 11445754 dataset_size: 23710150 - config_name: ka-fr splits: - name: train num_bytes: 22617114 num_examples: 139626 download_size: 10912737 dataset_size: 22617114 - config_name: ka-gl splits: - name: train num_bytes: 299213 num_examples: 1814 download_size: 152611 dataset_size: 299213 - config_name: ka-he splits: - name: train num_bytes: 25817437 num_examples: 149062 download_size: 11892349 dataset_size: 25817437 - config_name: ka-hi splits: - name: train num_bytes: 187469 num_examples: 796 download_size: 84783 dataset_size: 187469 - config_name: ka-hr splits: - name: train num_bytes: 21601649 num_examples: 158093 download_size: 12001696 dataset_size: 21601649 - config_name: ka-hu splits: - name: train num_bytes: 22974735 num_examples: 165843 download_size: 12843399 dataset_size: 22974735 - config_name: ka-id splits: - name: train num_bytes: 11881898 num_examples: 85069 download_size: 6378131 dataset_size: 11881898 - config_name: ka-is splits: - name: train num_bytes: 2293766 num_examples: 15410 download_size: 1266624 dataset_size: 2293766 - config_name: ka-it splits: - name: train num_bytes: 15069733 num_examples: 106703 download_size: 8310304 dataset_size: 15069733 - config_name: ka-ja splits: - name: train num_bytes: 5013067 num_examples: 35371 download_size: 2696204 dataset_size: 5013067 - config_name: ka-ko splits: - name: train num_bytes: 2918810 num_examples: 17134 download_size: 1398993 dataset_size: 2918810 - config_name: ka-lt splits: - name: train num_bytes: 4756920 num_examples: 30493 download_size: 2363621 dataset_size: 4756920 - config_name: ka-lv splits: - name: train num_bytes: 1616008 num_examples: 10709 download_size: 801910 dataset_size: 1616008 - config_name: ka-mk splits: - name: train num_bytes: 13257142 num_examples: 75478 download_size: 6012887 dataset_size: 13257142 - config_name: ka-ml splits: - name: train num_bytes: 1470317 num_examples: 6559 download_size: 615557 dataset_size: 1470317 - config_name: ka-ms splits: - name: train num_bytes: 4980627 num_examples: 31860 download_size: 2364579 dataset_size: 4980627 - config_name: ka-nl splits: - name: train num_bytes: 25211029 num_examples: 155101 download_size: 12100194 dataset_size: 25211029 - config_name: ka-no splits: - name: train num_bytes: 14043195 num_examples: 88898 download_size: 6733785 dataset_size: 14043195 - config_name: ka-pl splits: - name: train num_bytes: 28660921 num_examples: 152660 download_size: 12168136 dataset_size: 28660921 - config_name: ka-pt splits: - name: train num_bytes: 30223485 num_examples: 165003 download_size: 12654990 dataset_size: 30223485 - config_name: ka-ro splits: - name: train num_bytes: 33429489 num_examples: 182789 download_size: 14045813 dataset_size: 33429489 - config_name: ka-ru splits: - name: train num_bytes: 21524490 num_examples: 104823 download_size: 8862581 dataset_size: 21524490 - config_name: ka-si splits: - name: train num_bytes: 1784023 num_examples: 7962 download_size: 678723 dataset_size: 1784023 - config_name: ka-sk splits: - name: train num_bytes: 7190107 num_examples: 39971 download_size: 3085666 dataset_size: 7190107 - config_name: ka-sl splits: - name: train num_bytes: 25105183 num_examples: 138944 download_size: 10561598 dataset_size: 25105183 - config_name: ka-sq splits: - name: train num_bytes: 8307069 num_examples: 44751 download_size: 3516832 dataset_size: 8307069 - config_name: ka-sr splits: - name: train num_bytes: 30950809 num_examples: 167048 download_size: 13008013 dataset_size: 30950809 - config_name: ka-sv splits: - name: train num_bytes: 24400376 num_examples: 131075 download_size: 10105756 dataset_size: 24400376 - config_name: ka-th splits: - name: train num_bytes: 10196337 num_examples: 43371 download_size: 3967045 dataset_size: 10196337 - config_name: ka-tl splits: - name: train num_bytes: 221269 num_examples: 1273 download_size: 97913 dataset_size: 221269 - config_name: ka-tr splits: - name: train num_bytes: 32816587 num_examples: 178791 download_size: 13764367 dataset_size: 32816587 - config_name: ka-uk splits: - name: train num_bytes: 2106429 num_examples: 10491 download_size: 872568 dataset_size: 2106429 - config_name: ka-ur splits: - name: train num_bytes: 415236 num_examples: 1983 download_size: 195889 dataset_size: 415236 - config_name: ka-vi splits: - name: train num_bytes: 9098633 num_examples: 53575 download_size: 4299661 dataset_size: 9098633 - config_name: kk-ar splits: - name: train num_bytes: 165315 num_examples: 1279 download_size: 92500 dataset_size: 165315 - config_name: kk-bg splits: - name: train num_bytes: 465678 num_examples: 3395 download_size: 242222 dataset_size: 465678 - config_name: kk-bs splits: - name: train num_bytes: 142991 num_examples: 1019 download_size: 83562 dataset_size: 142991 - config_name: kk-cs splits: - name: train num_bytes: 143586 num_examples: 1264 download_size: 85825 dataset_size: 143586 - config_name: kk-da splits: - name: train num_bytes: 422750 num_examples: 3447 download_size: 232114 dataset_size: 422750 - config_name: kk-de splits: - name: train num_bytes: 469048 num_examples: 3715 download_size: 257788 dataset_size: 469048 - config_name: kk-el splits: - name: train num_bytes: 383466 num_examples: 2333 download_size: 173224 dataset_size: 383466 - config_name: kk-en splits: - name: train num_bytes: 451923 num_examples: 3712 download_size: 247679 dataset_size: 451923 - config_name: kk-es splits: - name: train num_bytes: 305455 num_examples: 2479 download_size: 173386 dataset_size: 305455 - config_name: kk-et splits: - name: train num_bytes: 126502 num_examples: 1140 download_size: 76326 dataset_size: 126502 - config_name: kk-fa splits: - name: train num_bytes: 145968 num_examples: 1010 download_size: 80333 dataset_size: 145968 - config_name: kk-fi splits: - name: train num_bytes: 424865 num_examples: 3406 download_size: 236372 dataset_size: 424865 - config_name: kk-fr splits: - name: train num_bytes: 153751 num_examples: 1336 download_size: 90158 dataset_size: 153751 - config_name: kk-he splits: - name: train num_bytes: 313740 num_examples: 2376 download_size: 166445 dataset_size: 313740 - config_name: kk-hr splits: - name: train num_bytes: 219421 num_examples: 2154 download_size: 147155 dataset_size: 219421 - config_name: kk-hu splits: - name: train num_bytes: 265441 num_examples: 2577 download_size: 177896 dataset_size: 265441 - config_name: kk-id splits: - name: train num_bytes: 107681 num_examples: 1034 download_size: 72302 dataset_size: 107681 - config_name: kk-is splits: - name: train num_bytes: 138172 num_examples: 1283 download_size: 91350 dataset_size: 138172 - config_name: kk-it splits: - name: train num_bytes: 263867 num_examples: 2539 download_size: 174941 dataset_size: 263867 - config_name: kk-ja splits: - name: train num_bytes: 182987 num_examples: 1205 download_size: 120237 dataset_size: 182987 - config_name: kk-lt splits: - name: train num_bytes: 101766 num_examples: 830 download_size: 61904 dataset_size: 101766 - config_name: kk-lv splits: - name: train num_bytes: 128929 num_examples: 1134 download_size: 77252 dataset_size: 128929 - config_name: kk-ms splits: - name: train num_bytes: 136950 num_examples: 1122 download_size: 79906 dataset_size: 136950 - config_name: kk-nl splits: - name: train num_bytes: 240721 num_examples: 1853 download_size: 136139 dataset_size: 240721 - config_name: kk-no splits: - name: train num_bytes: 293390 num_examples: 2365 download_size: 164755 dataset_size: 293390 - config_name: kk-pl splits: - name: train num_bytes: 528471 num_examples: 3366 download_size: 252551 dataset_size: 528471 - config_name: kk-pt splits: - name: train num_bytes: 506193 num_examples: 3351 download_size: 236486 dataset_size: 506193 - config_name: kk-ro splits: - name: train num_bytes: 350558 num_examples: 2348 download_size: 167181 dataset_size: 350558 - config_name: kk-ru splits: - name: train num_bytes: 367693 num_examples: 2223 download_size: 168183 dataset_size: 367693 - config_name: kk-sk splits: - name: train num_bytes: 142109 num_examples: 981 download_size: 70307 dataset_size: 142109 - config_name: kk-sl splits: - name: train num_bytes: 315809 num_examples: 2157 download_size: 151509 dataset_size: 315809 - config_name: kk-sr splits: - name: train num_bytes: 535995 num_examples: 3424 download_size: 250723 dataset_size: 535995 - config_name: kk-sv splits: - name: train num_bytes: 474672 num_examples: 3039 download_size: 222379 dataset_size: 474672 - config_name: kk-th splits: - name: train num_bytes: 173102 num_examples: 927 download_size: 74704 dataset_size: 173102 - config_name: kk-tr splits: - name: train num_bytes: 380385 num_examples: 2594 download_size: 177915 dataset_size: 380385 - config_name: kk-uk splits: - name: train num_bytes: 183520 num_examples: 1300 download_size: 97660 dataset_size: 183520 - config_name: kk-vi splits: - name: train num_bytes: 142116 num_examples: 1178 download_size: 81276 dataset_size: 142116 - config_name: ko-ar splits: - name: train num_bytes: 171750308 num_examples: 1249195 download_size: 94461318 dataset_size: 171750308 - config_name: ko-bg splits: - name: train num_bytes: 155582107 num_examples: 1050636 download_size: 83837542 dataset_size: 155582107 - config_name: ko-bn splits: - name: train num_bytes: 12309598 num_examples: 78656 download_size: 6094055 dataset_size: 12309598 - config_name: ko-bs splits: - name: train num_bytes: 47717701 num_examples: 384696 download_size: 28489370 dataset_size: 47717701 - config_name: ko-ca splits: - name: train num_bytes: 5006944 num_examples: 39433 download_size: 2979399 dataset_size: 5006944 - config_name: ko-cs splits: - name: train num_bytes: 148137487 num_examples: 1186980 download_size: 88268017 dataset_size: 148137487 - config_name: ko-da splits: - name: train num_bytes: 87164739 num_examples: 705508 download_size: 50687872 dataset_size: 87164739 - config_name: ko-de splits: - name: train num_bytes: 96371270 num_examples: 750914 download_size: 56195729 dataset_size: 96371270 - config_name: ko-el splits: - name: train num_bytes: 201457405 num_examples: 1129226 download_size: 93368158 dataset_size: 201457405 - config_name: ko-en splits: - name: train num_bytes: 171059418 num_examples: 1391190 download_size: 99008025 dataset_size: 171059418 - config_name: ko-es splits: - name: train num_bytes: 152524323 num_examples: 1214195 download_size: 89100823 dataset_size: 152524323 - config_name: ko-et splits: - name: train num_bytes: 59663642 num_examples: 492792 download_size: 35451791 dataset_size: 59663642 - config_name: ko-eu splits: - name: train num_bytes: 8519715 num_examples: 72165 download_size: 5035001 dataset_size: 8519715 - config_name: ko-fa splits: - name: train num_bytes: 77184218 num_examples: 559268 download_size: 41696782 dataset_size: 77184218 - config_name: ko-fi splits: - name: train num_bytes: 106913692 num_examples: 859310 download_size: 62736603 dataset_size: 106913692 - config_name: ko-fr splits: - name: train num_bytes: 125012256 num_examples: 979642 download_size: 72608896 dataset_size: 125012256 - config_name: ko-gl splits: - name: train num_bytes: 1136223 num_examples: 8981 download_size: 711346 dataset_size: 1136223 - config_name: ko-he splits: - name: train num_bytes: 152831015 num_examples: 1094716 download_size: 83198339 dataset_size: 152831015 - config_name: ko-hi splits: - name: train num_bytes: 3325820 num_examples: 21188 download_size: 1603610 dataset_size: 3325820 - config_name: ko-hr splits: - name: train num_bytes: 109037279 num_examples: 1056375 download_size: 76734274 dataset_size: 109037279 - config_name: ko-hu splits: - name: train num_bytes: 123412103 num_examples: 1168211 download_size: 86326388 dataset_size: 123412103 - config_name: ko-id splits: - name: train num_bytes: 60770148 num_examples: 586876 download_size: 40701641 dataset_size: 60770148 - config_name: ko-is splits: - name: train num_bytes: 4824947 num_examples: 45678 download_size: 3353533 dataset_size: 4824947 - config_name: ko-it splits: - name: train num_bytes: 120358448 num_examples: 1116636 download_size: 83067717 dataset_size: 120358448 - config_name: ko-ja splits: - name: train num_bytes: 31244581 num_examples: 302063 download_size: 20878508 dataset_size: 31244581 - config_name: ko-ka splits: - name: train num_bytes: 2560047 num_examples: 17134 download_size: 1398993 dataset_size: 2560047 - config_name: ko-lt splits: - name: train num_bytes: 17255038 num_examples: 148544 download_size: 10477558 dataset_size: 17255038 - config_name: ko-lv splits: - name: train num_bytes: 6479963 num_examples: 57097 download_size: 3954053 dataset_size: 6479963 - config_name: ko-mk splits: - name: train num_bytes: 29548975 num_examples: 202446 download_size: 15942845 dataset_size: 29548975 - config_name: ko-ml splits: - name: train num_bytes: 5763947 num_examples: 30444 download_size: 2660568 dataset_size: 5763947 - config_name: ko-ms splits: - name: train num_bytes: 33951558 num_examples: 285016 download_size: 19270742 dataset_size: 33951558 - config_name: ko-nl splits: - name: train num_bytes: 139413398 num_examples: 1100911 download_size: 81012605 dataset_size: 139413398 - config_name: ko-no splits: - name: train num_bytes: 64003445 num_examples: 525062 download_size: 37310289 dataset_size: 64003445 - config_name: ko-pl splits: - name: train num_bytes: 170018400 num_examples: 1124166 download_size: 84179417 dataset_size: 170018400 - config_name: ko-pt splits: - name: train num_bytes: 167789602 num_examples: 1112125 download_size: 81694331 dataset_size: 167789602 - config_name: ko-ro splits: - name: train num_bytes: 186649454 num_examples: 1236461 download_size: 91220128 dataset_size: 186649454 - config_name: ko-ru splits: - name: train num_bytes: 158429051 num_examples: 907004 download_size: 74193775 dataset_size: 158429051 - config_name: ko-si splits: - name: train num_bytes: 11281220 num_examples: 58662 download_size: 4864805 dataset_size: 11281220 - config_name: ko-sk splits: - name: train num_bytes: 44661401 num_examples: 306616 download_size: 22306050 dataset_size: 44661401 - config_name: ko-sl splits: - name: train num_bytes: 111726172 num_examples: 759146 download_size: 55313166 dataset_size: 111726172 - config_name: ko-sq splits: - name: train num_bytes: 13862949 num_examples: 93617 download_size: 6773153 dataset_size: 13862949 - config_name: ko-sr splits: - name: train num_bytes: 112345679 num_examples: 975262 download_size: 72716732 dataset_size: 112345679 - config_name: ko-sv splits: - name: train num_bytes: 114621994 num_examples: 769024 download_size: 55346953 dataset_size: 114621994 - config_name: ko-ta splits: - name: train num_bytes: 1495117 num_examples: 7501 download_size: 614733 dataset_size: 1495117 - config_name: ko-te splits: - name: train num_bytes: 164520 num_examples: 931 download_size: 73173 dataset_size: 164520 - config_name: ko-th splits: - name: train num_bytes: 42607354 num_examples: 224600 download_size: 18223559 dataset_size: 42607354 - config_name: ko-tl splits: - name: train num_bytes: 167951 num_examples: 1213 download_size: 86502 dataset_size: 167951 - config_name: ko-tr splits: - name: train num_bytes: 136103318 num_examples: 1190399 download_size: 87398515 dataset_size: 136103318 - config_name: ko-uk splits: - name: train num_bytes: 7354116 num_examples: 51313 download_size: 4075603 dataset_size: 7354116 - config_name: ko-ur splits: - name: train num_bytes: 1001455 num_examples: 7094 download_size: 549507 dataset_size: 1001455 - config_name: ko-vi splits: - name: train num_bytes: 46223328 num_examples: 345791 download_size: 25870504 dataset_size: 46223328 - config_name: lt-af splits: - name: train num_bytes: 70520 num_examples: 651 download_size: 47969 dataset_size: 70520 - config_name: lt-ar splits: - name: train num_bytes: 155538454 num_examples: 1177564 download_size: 87885663 dataset_size: 155538454 - config_name: lt-bg splits: - name: train num_bytes: 166414335 num_examples: 1247963 download_size: 91754661 dataset_size: 166414335 - config_name: lt-bn splits: - name: train num_bytes: 15141512 num_examples: 96242 download_size: 7551474 dataset_size: 15141512 - config_name: lt-bs splits: - name: train num_bytes: 79067945 num_examples: 704369 download_size: 48407755 dataset_size: 79067945 - config_name: lt-ca splits: - name: train num_bytes: 4272009 num_examples: 36118 download_size: 2590152 dataset_size: 4272009 - config_name: lt-cs splits: - name: train num_bytes: 145265360 num_examples: 1307483 download_size: 88496159 dataset_size: 145265360 - config_name: lt-da splits: - name: train num_bytes: 123680581 num_examples: 1094095 download_size: 73808231 dataset_size: 123680581 - config_name: lt-de splits: - name: train num_bytes: 111711755 num_examples: 975756 download_size: 66594609 dataset_size: 111711755 - config_name: lt-el splits: - name: train num_bytes: 207497198 num_examples: 1277881 download_size: 97033589 dataset_size: 207497198 - config_name: lt-en splits: - name: train num_bytes: 155535702 num_examples: 1415961 download_size: 91241899 dataset_size: 155535702 - config_name: lt-es splits: - name: train num_bytes: 150566139 num_examples: 1345855 download_size: 89812715 dataset_size: 150566139 - config_name: lt-et splits: - name: train num_bytes: 126901863 num_examples: 1136463 download_size: 76823587 dataset_size: 126901863 - config_name: lt-eu splits: - name: train num_bytes: 12064105 num_examples: 108116 download_size: 7249924 dataset_size: 12064105 - config_name: lt-fa splits: - name: train num_bytes: 83557934 num_examples: 615776 download_size: 46404602 dataset_size: 83557934 - config_name: lt-fi splits: - name: train num_bytes: 133587965 num_examples: 1181965 download_size: 80442219 dataset_size: 133587965 - config_name: lt-fr splits: - name: train num_bytes: 135371739 num_examples: 1183584 download_size: 80724884 dataset_size: 135371739 - config_name: lt-gl splits: - name: train num_bytes: 1616588 num_examples: 14151 download_size: 993170 dataset_size: 1616588 - config_name: lt-he splits: - name: train num_bytes: 153498087 num_examples: 1220910 download_size: 85483389 dataset_size: 153498087 - config_name: lt-hi splits: - name: train num_bytes: 3599806 num_examples: 23673 download_size: 1778821 dataset_size: 3599806 - config_name: lt-hr splits: - name: train num_bytes: 115395551 num_examples: 1254801 download_size: 83874979 dataset_size: 115395551 - config_name: lt-hu splits: - name: train num_bytes: 120327630 num_examples: 1295287 download_size: 87344929 dataset_size: 120327630 - config_name: lt-id splits: - name: train num_bytes: 82911751 num_examples: 855428 download_size: 57251404 dataset_size: 82911751 - config_name: lt-is splits: - name: train num_bytes: 30042246 num_examples: 313750 download_size: 21261092 dataset_size: 30042246 - config_name: lt-it splits: - name: train num_bytes: 99924367 num_examples: 1051116 download_size: 71592666 dataset_size: 99924367 - config_name: lt-ja splits: - name: train num_bytes: 28608860 num_examples: 281664 download_size: 19803856 dataset_size: 28608860 - config_name: lt-ka splits: - name: train num_bytes: 4197999 num_examples: 30493 download_size: 2363621 dataset_size: 4197999 - config_name: lt-kk splits: - name: train num_bytes: 88046 num_examples: 830 download_size: 61904 dataset_size: 88046 - config_name: lt-ko splits: - name: train num_bytes: 14638305 num_examples: 148544 download_size: 10477558 dataset_size: 14638305 - config_name: lt-lv splits: - name: train num_bytes: 23954297 num_examples: 219617 download_size: 14533267 dataset_size: 23954297 - config_name: lt-mk splits: - name: train num_bytes: 65734632 num_examples: 476383 download_size: 36037748 dataset_size: 65734632 - config_name: lt-ml splits: - name: train num_bytes: 12438711 num_examples: 66400 download_size: 5829741 dataset_size: 12438711 - config_name: lt-ms splits: - name: train num_bytes: 44664620 num_examples: 393888 download_size: 25961704 dataset_size: 44664620 - config_name: lt-nl splits: - name: train num_bytes: 137425817 num_examples: 1205932 download_size: 82041226 dataset_size: 137425817 - config_name: lt-no splits: - name: train num_bytes: 87125834 num_examples: 783086 download_size: 52149155 dataset_size: 87125834 - config_name: lt-pl splits: - name: train num_bytes: 173965447 num_examples: 1263859 download_size: 87132855 dataset_size: 173965447 - config_name: lt-pt splits: - name: train num_bytes: 177914098 num_examples: 1300122 download_size: 87334862 dataset_size: 177914098 - config_name: lt-ro splits: - name: train num_bytes: 184923193 num_examples: 1351611 download_size: 90965924 dataset_size: 184923193 - config_name: lt-ru splits: - name: train num_bytes: 148567538 num_examples: 947235 download_size: 70252792 dataset_size: 148567538 - config_name: lt-si splits: - name: train num_bytes: 19758460 num_examples: 106533 download_size: 8561344 dataset_size: 19758460 - config_name: lt-sk splits: - name: train num_bytes: 77516074 num_examples: 566875 download_size: 39102225 dataset_size: 77516074 - config_name: lt-sl splits: - name: train num_bytes: 163274512 num_examples: 1222145 download_size: 81350181 dataset_size: 163274512 - config_name: lt-sq splits: - name: train num_bytes: 56487293 num_examples: 400606 download_size: 27863354 dataset_size: 56487293 - config_name: lt-sr splits: - name: train num_bytes: 174067870 num_examples: 1250593 download_size: 86121552 dataset_size: 174067870 - config_name: lt-sv splits: - name: train num_bytes: 112522730 num_examples: 1076857 download_size: 73451729 dataset_size: 112522730 - config_name: lt-ta splits: - name: train num_bytes: 2568758 num_examples: 13042 download_size: 1045380 dataset_size: 2568758 - config_name: lt-te splits: - name: train num_bytes: 1527763 num_examples: 9706 download_size: 799236 dataset_size: 1527763 - config_name: lt-th splits: - name: train num_bytes: 41292264 num_examples: 263891 download_size: 21947444 dataset_size: 41292264 - config_name: lt-tl splits: - name: train num_bytes: 177583 num_examples: 1359 download_size: 92731 dataset_size: 177583 - config_name: lt-tr splits: - name: train num_bytes: 186280937 num_examples: 1360008 download_size: 91221893 dataset_size: 186280937 - config_name: lt-uk splits: - name: train num_bytes: 11654448 num_examples: 87824 download_size: 6600052 dataset_size: 11654448 - config_name: lt-ur splits: - name: train num_bytes: 601064 num_examples: 4473 download_size: 334141 dataset_size: 601064 - config_name: lt-vi splits: - name: train num_bytes: 60762149 num_examples: 486839 download_size: 34736116 dataset_size: 60762149 - config_name: lv-af splits: - name: train num_bytes: 115757 num_examples: 1084 download_size: 76282 dataset_size: 115757 - config_name: lv-ar splits: - name: train num_bytes: 58183218 num_examples: 433544 download_size: 32858968 dataset_size: 58183218 - config_name: lv-bg splits: - name: train num_bytes: 60581955 num_examples: 454286 download_size: 33299414 dataset_size: 60581955 - config_name: lv-bn splits: - name: train num_bytes: 6292016 num_examples: 41206 download_size: 3115498 dataset_size: 6292016 - config_name: lv-bs splits: - name: train num_bytes: 29849186 num_examples: 265709 download_size: 18261146 dataset_size: 29849186 - config_name: lv-ca splits: - name: train num_bytes: 1387340 num_examples: 12919 download_size: 843739 dataset_size: 1387340 - config_name: lv-cs splits: - name: train num_bytes: 53887385 num_examples: 482427 download_size: 32754813 dataset_size: 53887385 - config_name: lv-da splits: - name: train num_bytes: 45116352 num_examples: 400915 download_size: 26817997 dataset_size: 45116352 - config_name: lv-de splits: - name: train num_bytes: 45899031 num_examples: 401817 download_size: 27191631 dataset_size: 45899031 - config_name: lv-el splits: - name: train num_bytes: 75055620 num_examples: 458340 download_size: 35113730 dataset_size: 75055620 - config_name: lv-en splits: - name: train num_bytes: 57245192 num_examples: 519553 download_size: 33566084 dataset_size: 57245192 - config_name: lv-es splits: - name: train num_bytes: 55795703 num_examples: 494900 download_size: 33232435 dataset_size: 55795703 - config_name: lv-et splits: - name: train num_bytes: 44678131 num_examples: 407588 download_size: 26981034 dataset_size: 44678131 - config_name: lv-eu splits: - name: train num_bytes: 4124882 num_examples: 36813 download_size: 2504925 dataset_size: 4124882 - config_name: lv-fa splits: - name: train num_bytes: 30704766 num_examples: 228404 download_size: 17073357 dataset_size: 30704766 - config_name: lv-fi splits: - name: train num_bytes: 52217413 num_examples: 462498 download_size: 31261563 dataset_size: 52217413 - config_name: lv-fr splits: - name: train num_bytes: 49000094 num_examples: 428815 download_size: 29140668 dataset_size: 49000094 - config_name: lv-gl splits: - name: train num_bytes: 562418 num_examples: 5191 download_size: 352410 dataset_size: 562418 - config_name: lv-he splits: - name: train num_bytes: 58383023 num_examples: 461814 download_size: 32413400 dataset_size: 58383023 - config_name: lv-hi splits: - name: train num_bytes: 1829355 num_examples: 12605 download_size: 899842 dataset_size: 1829355 - config_name: lv-hr splits: - name: train num_bytes: 41937533 num_examples: 456464 download_size: 30457455 dataset_size: 41937533 - config_name: lv-hu splits: - name: train num_bytes: 45125133 num_examples: 482310 download_size: 32670338 dataset_size: 45125133 - config_name: lv-id splits: - name: train num_bytes: 32447046 num_examples: 342357 download_size: 22393707 dataset_size: 32447046 - config_name: lv-is splits: - name: train num_bytes: 11794316 num_examples: 127778 download_size: 8378184 dataset_size: 11794316 - config_name: lv-it splits: - name: train num_bytes: 39228010 num_examples: 410149 download_size: 27977982 dataset_size: 39228010 - config_name: lv-ja splits: - name: train num_bytes: 10022682 num_examples: 99884 download_size: 6992121 dataset_size: 10022682 - config_name: lv-ka splits: - name: train num_bytes: 1417795 num_examples: 10709 download_size: 801910 dataset_size: 1417795 - config_name: lv-kk splits: - name: train num_bytes: 109347 num_examples: 1134 download_size: 77252 dataset_size: 109347 - config_name: lv-ko splits: - name: train num_bytes: 5487065 num_examples: 57097 download_size: 3954053 dataset_size: 5487065 - config_name: lv-lt splits: - name: train num_bytes: 19938971 num_examples: 219617 download_size: 14533267 dataset_size: 19938971 - config_name: lv-mk splits: - name: train num_bytes: 22346583 num_examples: 163905 download_size: 12205915 dataset_size: 22346583 - config_name: lv-ml splits: - name: train num_bytes: 4482546 num_examples: 23317 download_size: 2102994 dataset_size: 4482546 - config_name: lv-ms splits: - name: train num_bytes: 18020225 num_examples: 163282 download_size: 10428244 dataset_size: 18020225 - config_name: lv-nl splits: - name: train num_bytes: 52239615 num_examples: 457454 download_size: 31069474 dataset_size: 52239615 - config_name: lv-no splits: - name: train num_bytes: 34073367 num_examples: 311867 download_size: 20270299 dataset_size: 34073367 - config_name: lv-pl splits: - name: train num_bytes: 64220390 num_examples: 465244 download_size: 32143498 dataset_size: 64220390 - config_name: lv-pt splits: - name: train num_bytes: 66140209 num_examples: 482874 download_size: 32409650 dataset_size: 66140209 - config_name: lv-ro splits: - name: train num_bytes: 67667599 num_examples: 492195 download_size: 33310752 dataset_size: 67667599 - config_name: lv-ru splits: - name: train num_bytes: 67503971 num_examples: 430002 download_size: 31696579 dataset_size: 67503971 - config_name: lv-si splits: - name: train num_bytes: 6460487 num_examples: 34416 download_size: 2809860 dataset_size: 6460487 - config_name: lv-sk splits: - name: train num_bytes: 30412290 num_examples: 224153 download_size: 15280371 dataset_size: 30412290 - config_name: lv-sl splits: - name: train num_bytes: 53642020 num_examples: 405728 download_size: 26677047 dataset_size: 53642020 - config_name: lv-sq splits: - name: train num_bytes: 16618177 num_examples: 157229 download_size: 10835312 dataset_size: 16618177 - config_name: lv-sr splits: - name: train num_bytes: 47382619 num_examples: 454845 download_size: 31181641 dataset_size: 47382619 - config_name: lv-sv splits: - name: train num_bytes: 50065656 num_examples: 367455 download_size: 24417311 dataset_size: 50065656 - config_name: lv-ta splits: - name: train num_bytes: 616365 num_examples: 4102 download_size: 308674 dataset_size: 616365 - config_name: lv-te splits: - name: train num_bytes: 723302 num_examples: 4007 download_size: 310051 dataset_size: 723302 - config_name: lv-th splits: - name: train num_bytes: 20592098 num_examples: 108918 download_size: 9011460 dataset_size: 20592098 - config_name: lv-tr splits: - name: train num_bytes: 68438127 num_examples: 498299 download_size: 33507937 dataset_size: 68438127 - config_name: lv-uk splits: - name: train num_bytes: 4931677 num_examples: 39240 download_size: 2790990 dataset_size: 4931677 - config_name: lv-ur splits: - name: train num_bytes: 131865 num_examples: 1080 download_size: 78991 dataset_size: 131865 - config_name: lv-vi splits: - name: train num_bytes: 25632280 num_examples: 209398 download_size: 14659253 dataset_size: 25632280 - config_name: mk-af splits: - name: train num_bytes: 547612 num_examples: 3821 download_size: 304271 dataset_size: 547612 - config_name: mk-ar splits: - name: train num_bytes: 374232684 num_examples: 2699946 download_size: 216218190 dataset_size: 374232684 - config_name: mk-bg splits: - name: train num_bytes: 478221322 num_examples: 3017933 download_size: 237086711 dataset_size: 478221322 - config_name: mk-bn splits: - name: train num_bytes: 29823897 num_examples: 164359 download_size: 13832814 dataset_size: 29823897 - config_name: mk-bs splits: - name: train num_bytes: 229038925 num_examples: 1698831 download_size: 123374903 dataset_size: 229038925 - config_name: mk-ca splits: - name: train num_bytes: 17656794 num_examples: 126653 download_size: 9308586 dataset_size: 17656794 - config_name: mk-cs splits: - name: train num_bytes: 405932340 num_examples: 2962846 download_size: 219285568 dataset_size: 405932340 - config_name: mk-da splits: - name: train num_bytes: 323793502 num_examples: 2329327 download_size: 170531418 dataset_size: 323793502 - config_name: mk-de splits: - name: train num_bytes: 297927753 num_examples: 2097352 download_size: 157682179 dataset_size: 297927753 - config_name: mk-el splits: - name: train num_bytes: 562546442 num_examples: 2962520 download_size: 244467122 dataset_size: 562546442 - config_name: mk-en splits: - name: train num_bytes: 458454572 num_examples: 3401326 download_size: 240194746 dataset_size: 458454572 - config_name: mk-es splits: - name: train num_bytes: 441995857 num_examples: 3196372 download_size: 234093907 dataset_size: 441995857 - config_name: mk-et splits: - name: train num_bytes: 319552120 num_examples: 2331823 download_size: 171063902 dataset_size: 319552120 - config_name: mk-eu splits: - name: train num_bytes: 17492794 num_examples: 127072 download_size: 9422558 dataset_size: 17492794 - config_name: mk-fa splits: - name: train num_bytes: 182133904 num_examples: 1130441 download_size: 91441894 dataset_size: 182133904 - config_name: mk-fi splits: - name: train num_bytes: 368107065 num_examples: 2624789 download_size: 195591258 dataset_size: 368107065 - config_name: mk-fr splits: - name: train num_bytes: 373046032 num_examples: 2652402 download_size: 196917290 dataset_size: 373046032 - config_name: mk-gl splits: - name: train num_bytes: 3874349 num_examples: 27702 download_size: 2087533 dataset_size: 3874349 - config_name: mk-he splits: - name: train num_bytes: 421964200 num_examples: 2766101 download_size: 211090133 dataset_size: 421964200 - config_name: mk-hi splits: - name: train num_bytes: 3171731 num_examples: 16986 download_size: 1465548 dataset_size: 3171731 - config_name: mk-hr splits: - name: train num_bytes: 350279983 num_examples: 3061339 download_size: 218837531 dataset_size: 350279983 - config_name: mk-hu splits: - name: train num_bytes: 343719780 num_examples: 2907735 download_size: 214955225 dataset_size: 343719780 - config_name: mk-hy splits: - name: train num_bytes: 24321 num_examples: 152 download_size: 18853 dataset_size: 24321 - config_name: mk-id splits: - name: train num_bytes: 187927515 num_examples: 1543878 download_size: 112513694 dataset_size: 187927515 - config_name: mk-is splits: - name: train num_bytes: 50166332 num_examples: 410244 download_size: 30918514 dataset_size: 50166332 - config_name: mk-it splits: - name: train num_bytes: 278781989 num_examples: 2307953 download_size: 172150302 dataset_size: 278781989 - config_name: mk-ja splits: - name: train num_bytes: 66737520 num_examples: 522853 download_size: 39956181 dataset_size: 66737520 - config_name: mk-ka splits: - name: train num_bytes: 11762512 num_examples: 75478 download_size: 6012887 dataset_size: 11762512 - config_name: mk-ko splits: - name: train num_bytes: 25745217 num_examples: 202446 download_size: 15942845 dataset_size: 25745217 - config_name: mk-lt splits: - name: train num_bytes: 57057719 num_examples: 476383 download_size: 36037748 dataset_size: 57057719 - config_name: mk-lv splits: - name: train num_bytes: 19367553 num_examples: 163905 download_size: 12205915 dataset_size: 19367553 - config_name: mk-ml splits: - name: train num_bytes: 24920602 num_examples: 113049 download_size: 10974984 dataset_size: 24920602 - config_name: mk-ms splits: - name: train num_bytes: 95635628 num_examples: 591423 download_size: 42208282 dataset_size: 95635628 - config_name: mk-nl splits: - name: train num_bytes: 390871818 num_examples: 2779246 download_size: 206224989 dataset_size: 390871818 - config_name: mk-no splits: - name: train num_bytes: 207968702 num_examples: 1511243 download_size: 110015749 dataset_size: 207968702 - config_name: mk-pl splits: - name: train num_bytes: 484192378 num_examples: 2941612 download_size: 221731216 dataset_size: 484192378 - config_name: mk-pt splits: - name: train num_bytes: 483822959 num_examples: 2971694 download_size: 218128759 dataset_size: 483822959 - config_name: mk-ro splits: - name: train num_bytes: 526156422 num_examples: 3241598 download_size: 237980749 dataset_size: 526156422 - config_name: mk-ru splits: - name: train num_bytes: 371160788 num_examples: 2007204 download_size: 162675569 dataset_size: 371160788 - config_name: mk-si splits: - name: train num_bytes: 37126002 num_examples: 172595 download_size: 15221060 dataset_size: 37126002 - config_name: mk-sk splits: - name: train num_bytes: 172487559 num_examples: 1067661 download_size: 79786706 dataset_size: 172487559 - config_name: mk-sl splits: - name: train num_bytes: 444808579 num_examples: 2791500 download_size: 202154202 dataset_size: 444808579 - config_name: mk-sq splits: - name: train num_bytes: 114430301 num_examples: 696310 download_size: 52059527 dataset_size: 114430301 - config_name: mk-sr splits: - name: train num_bytes: 522671884 num_examples: 3223444 download_size: 235263670 dataset_size: 522671884 - config_name: mk-sv splits: - name: train num_bytes: 379728038 num_examples: 2286077 download_size: 170579304 dataset_size: 379728038 - config_name: mk-ta splits: - name: train num_bytes: 1145367 num_examples: 5185 download_size: 454573 dataset_size: 1145367 - config_name: mk-te splits: - name: train num_bytes: 484852 num_examples: 2374 download_size: 199484 dataset_size: 484852 - config_name: mk-th splits: - name: train num_bytes: 149942166 num_examples: 688925 download_size: 61004617 dataset_size: 149942166 - config_name: mk-tl splits: - name: train num_bytes: 635154 num_examples: 3644 download_size: 293613 dataset_size: 635154 - config_name: mk-tr splits: - name: train num_bytes: 509038938 num_examples: 3118787 download_size: 229400797 dataset_size: 509038938 - config_name: mk-uk splits: - name: train num_bytes: 25586675 num_examples: 161932 download_size: 13013873 dataset_size: 25586675 - config_name: mk-ur splits: - name: train num_bytes: 1793710 num_examples: 10853 download_size: 908669 dataset_size: 1793710 - config_name: mk-vi splits: - name: train num_bytes: 136198215 num_examples: 910783 download_size: 69998350 dataset_size: 136198215 - config_name: ml-af splits: - name: train num_bytes: 401309 num_examples: 2184 download_size: 197735 dataset_size: 401309 - config_name: ml-ar splits: - name: train num_bytes: 63250771 num_examples: 323386 download_size: 28207730 dataset_size: 63250771 - config_name: ml-bg splits: - name: train num_bytes: 56787827 num_examples: 271301 download_size: 24989585 dataset_size: 56787827 - config_name: ml-bn splits: - name: train num_bytes: 14630579 num_examples: 68197 download_size: 6092327 dataset_size: 14630579 - config_name: ml-bs splits: - name: train num_bytes: 27367058 num_examples: 145776 download_size: 12706118 dataset_size: 27367058 - config_name: ml-ca splits: - name: train num_bytes: 1185797 num_examples: 6293 download_size: 553556 dataset_size: 1185797 - config_name: ml-cs splits: - name: train num_bytes: 54499808 num_examples: 294981 download_size: 25363536 dataset_size: 54499808 - config_name: ml-da splits: - name: train num_bytes: 37961114 num_examples: 201499 download_size: 17321019 dataset_size: 37961114 - config_name: ml-de splits: - name: train num_bytes: 45593227 num_examples: 244269 download_size: 20973806 dataset_size: 45593227 - config_name: ml-el splits: - name: train num_bytes: 72077898 num_examples: 302851 download_size: 28584756 dataset_size: 72077898 - config_name: ml-en splits: - name: train num_bytes: 68004683 num_examples: 386868 download_size: 30848251 dataset_size: 68004683 - config_name: ml-es splits: - name: train num_bytes: 60628538 num_examples: 327235 download_size: 27796481 dataset_size: 60628538 - config_name: ml-et splits: - name: train num_bytes: 33773736 num_examples: 179992 download_size: 15670139 dataset_size: 33773736 - config_name: ml-eu splits: - name: train num_bytes: 7757339 num_examples: 42720 download_size: 3536579 dataset_size: 7757339 - config_name: ml-fa splits: - name: train num_bytes: 33886591 num_examples: 171298 download_size: 15042905 dataset_size: 33886591 - config_name: ml-fi splits: - name: train num_bytes: 44141339 num_examples: 232475 download_size: 20214579 dataset_size: 44141339 - config_name: ml-fr splits: - name: train num_bytes: 50578439 num_examples: 265993 download_size: 23079546 dataset_size: 50578439 - config_name: ml-gl splits: - name: train num_bytes: 890028 num_examples: 5223 download_size: 414591 dataset_size: 890028 - config_name: ml-he splits: - name: train num_bytes: 50446385 num_examples: 250067 download_size: 22218604 dataset_size: 50446385 - config_name: ml-hi splits: - name: train num_bytes: 1137993 num_examples: 5332 download_size: 474643 dataset_size: 1137993 - config_name: ml-hr splits: - name: train num_bytes: 42356328 num_examples: 255893 download_size: 21783448 dataset_size: 42356328 - config_name: ml-hu splits: - name: train num_bytes: 46470052 num_examples: 279127 download_size: 23953648 dataset_size: 46470052 - config_name: ml-hy splits: - name: train num_bytes: 7404 num_examples: 43 download_size: 8253 dataset_size: 7404 - config_name: ml-id splits: - name: train num_bytes: 31781958 num_examples: 193248 download_size: 15870074 dataset_size: 31781958 - config_name: ml-is splits: - name: train num_bytes: 5230011 num_examples: 29016 download_size: 2638158 dataset_size: 5230011 - config_name: ml-it splits: - name: train num_bytes: 37144662 num_examples: 223484 download_size: 19048419 dataset_size: 37144662 - config_name: ml-ja splits: - name: train num_bytes: 11491708 num_examples: 67305 download_size: 5756663 dataset_size: 11491708 - config_name: ml-ka splits: - name: train num_bytes: 1352482 num_examples: 6559 download_size: 615557 dataset_size: 1352482 - config_name: ml-ko splits: - name: train num_bytes: 5210726 num_examples: 30444 download_size: 2660568 dataset_size: 5210726 - config_name: ml-lt splits: - name: train num_bytes: 11256683 num_examples: 66400 download_size: 5829741 dataset_size: 11256683 - config_name: ml-lv splits: - name: train num_bytes: 4071030 num_examples: 23317 download_size: 2102994 dataset_size: 4071030 - config_name: ml-mk splits: - name: train num_bytes: 22901401 num_examples: 113049 download_size: 10974984 dataset_size: 22901401 - config_name: ml-ms splits: - name: train num_bytes: 16360458 num_examples: 89280 download_size: 7354861 dataset_size: 16360458 - config_name: ml-nl splits: - name: train num_bytes: 44180361 num_examples: 230683 download_size: 20121544 dataset_size: 44180361 - config_name: ml-no splits: - name: train num_bytes: 29805548 num_examples: 159071 download_size: 13573144 dataset_size: 29805548 - config_name: ml-pl splits: - name: train num_bytes: 57459762 num_examples: 266113 download_size: 23530753 dataset_size: 57459762 - config_name: ml-pt splits: - name: train num_bytes: 57065646 num_examples: 268145 download_size: 23195802 dataset_size: 57065646 - config_name: ml-ro splits: - name: train num_bytes: 65957623 num_examples: 313496 download_size: 26815291 dataset_size: 65957623 - config_name: ml-ru splits: - name: train num_bytes: 49199021 num_examples: 210812 download_size: 19642858 dataset_size: 49199021 - config_name: ml-si splits: - name: train num_bytes: 7333430 num_examples: 28013 download_size: 2791684 dataset_size: 7333430 - config_name: ml-sk splits: - name: train num_bytes: 19677520 num_examples: 92685 download_size: 8173584 dataset_size: 19677520 - config_name: ml-sl splits: - name: train num_bytes: 46421042 num_examples: 220558 download_size: 18940801 dataset_size: 46421042 - config_name: ml-sq splits: - name: train num_bytes: 14645597 num_examples: 67813 download_size: 5991789 dataset_size: 14645597 - config_name: ml-sr splits: - name: train num_bytes: 60325315 num_examples: 282168 download_size: 24547545 dataset_size: 60325315 - config_name: ml-sv splits: - name: train num_bytes: 49660019 num_examples: 230290 download_size: 19986072 dataset_size: 49660019 - config_name: ml-ta splits: - name: train num_bytes: 875894 num_examples: 3431 download_size: 329149 dataset_size: 875894 - config_name: ml-th splits: - name: train num_bytes: 17647798 num_examples: 68559 download_size: 6701881 dataset_size: 17647798 - config_name: ml-tl splits: - name: train num_bytes: 731992 num_examples: 3295 download_size: 295285 dataset_size: 731992 - config_name: ml-tr splits: - name: train num_bytes: 68300214 num_examples: 327000 download_size: 27659798 dataset_size: 68300214 - config_name: ml-uk splits: - name: train num_bytes: 2867827 num_examples: 13464 download_size: 1302429 dataset_size: 2867827 - config_name: ml-ur splits: - name: train num_bytes: 123564 num_examples: 748 download_size: 63127 dataset_size: 123564 - config_name: ml-vi splits: - name: train num_bytes: 24159902 num_examples: 124298 download_size: 10892819 dataset_size: 24159902 - config_name: ms-af splits: - name: train num_bytes: 126424 num_examples: 1308 download_size: 73962 dataset_size: 126424 - config_name: ms-ar splits: - name: train num_bytes: 206583889 num_examples: 1542856 download_size: 110085370 dataset_size: 206583889 - config_name: ms-bg splits: - name: train num_bytes: 203822838 num_examples: 1497927 download_size: 106058627 dataset_size: 203822838 - config_name: ms-bn splits: - name: train num_bytes: 29748935 num_examples: 191371 download_size: 14143297 dataset_size: 29748935 - config_name: ms-bs splits: - name: train num_bytes: 82700580 num_examples: 724398 download_size: 47616087 dataset_size: 82700580 - config_name: ms-ca splits: - name: train num_bytes: 5489853 num_examples: 46781 download_size: 3047263 dataset_size: 5489853 - config_name: ms-cs splits: - name: train num_bytes: 180186908 num_examples: 1576665 download_size: 103225596 dataset_size: 180186908 - config_name: ms-da splits: - name: train num_bytes: 144576253 num_examples: 1247971 download_size: 80934783 dataset_size: 144576253 - config_name: ms-de splits: - name: train num_bytes: 141273355 num_examples: 1194238 download_size: 79321363 dataset_size: 141273355 - config_name: ms-el splits: - name: train num_bytes: 256485741 num_examples: 1547627 download_size: 113766025 dataset_size: 256485741 - config_name: ms-en splits: - name: train num_bytes: 215138272 num_examples: 1928345 download_size: 118514831 dataset_size: 215138272 - config_name: ms-es splits: - name: train num_bytes: 198890804 num_examples: 1722512 download_size: 111352461 dataset_size: 198890804 - config_name: ms-et splits: - name: train num_bytes: 129364714 num_examples: 1135835 download_size: 73737881 dataset_size: 129364714 - config_name: ms-eu splits: - name: train num_bytes: 14381735 num_examples: 129201 download_size: 8176456 dataset_size: 14381735 - config_name: ms-fa splits: - name: train num_bytes: 136176676 num_examples: 1003616 download_size: 71398776 dataset_size: 136176676 - config_name: ms-fi splits: - name: train num_bytes: 148663375 num_examples: 1276963 download_size: 84059521 dataset_size: 148663375 - config_name: ms-fr splits: - name: train num_bytes: 166280312 num_examples: 1414481 download_size: 93055506 dataset_size: 166280312 - config_name: ms-gl splits: - name: train num_bytes: 1385473 num_examples: 12039 download_size: 815693 dataset_size: 1385473 - config_name: ms-he splits: - name: train num_bytes: 186774584 num_examples: 1455610 download_size: 98066940 dataset_size: 186774584 - config_name: ms-hi splits: - name: train num_bytes: 4341050 num_examples: 27906 download_size: 2067105 dataset_size: 4341050 - config_name: ms-hr splits: - name: train num_bytes: 139047480 num_examples: 1469356 download_size: 95064865 dataset_size: 139047480 - config_name: ms-hu splits: - name: train num_bytes: 151800288 num_examples: 1581433 download_size: 103501072 dataset_size: 151800288 - config_name: ms-id splits: - name: train num_bytes: 180449798 num_examples: 1589953 download_size: 97188266 dataset_size: 180449798 - config_name: ms-is splits: - name: train num_bytes: 23161291 num_examples: 231959 download_size: 15558688 dataset_size: 23161291 - config_name: ms-it splits: - name: train num_bytes: 123009824 num_examples: 1261501 download_size: 82799145 dataset_size: 123009824 - config_name: ms-ja splits: - name: train num_bytes: 50050949 num_examples: 483095 download_size: 32506111 dataset_size: 50050949 - config_name: ms-ka splits: - name: train num_bytes: 4387757 num_examples: 31860 download_size: 2364579 dataset_size: 4387757 - config_name: ms-kk splits: - name: train num_bytes: 118422 num_examples: 1122 download_size: 79906 dataset_size: 118422 - config_name: ms-ko splits: - name: train num_bytes: 28738685 num_examples: 285016 download_size: 19270742 dataset_size: 28738685 - config_name: ms-lt splits: - name: train num_bytes: 37557956 num_examples: 393888 download_size: 25961704 dataset_size: 37557956 - config_name: ms-lv splits: - name: train num_bytes: 15053854 num_examples: 163282 download_size: 10428244 dataset_size: 15053854 - config_name: ms-mk splits: - name: train num_bytes: 95635628 num_examples: 591423 download_size: 42208282 dataset_size: 95635628 - config_name: ms-ml splits: - name: train num_bytes: 14730313 num_examples: 89280 download_size: 7354861 dataset_size: 14730313 - config_name: ms-nl splits: - name: train num_bytes: 161789387 num_examples: 1384109 download_size: 90602829 dataset_size: 161789387 - config_name: ms-no splits: - name: train num_bytes: 101081718 num_examples: 884779 download_size: 56936024 dataset_size: 101081718 - config_name: ms-pl splits: - name: train num_bytes: 215506009 num_examples: 1525708 download_size: 101858292 dataset_size: 215506009 - config_name: ms-pt splits: - name: train num_bytes: 212534332 num_examples: 1515435 download_size: 98601321 dataset_size: 212534332 - config_name: ms-ro splits: - name: train num_bytes: 242025589 num_examples: 1723823 download_size: 112453322 dataset_size: 242025589 - config_name: ms-ru splits: - name: train num_bytes: 182484421 num_examples: 1140751 download_size: 81789248 dataset_size: 182484421 - config_name: ms-si splits: - name: train num_bytes: 37899575 num_examples: 204061 download_size: 15665534 dataset_size: 37899575 - config_name: ms-sk splits: - name: train num_bytes: 87399132 num_examples: 631114 download_size: 41690887 dataset_size: 87399132 - config_name: ms-sl splits: - name: train num_bytes: 175110545 num_examples: 1275879 download_size: 82506421 dataset_size: 175110545 - config_name: ms-sq splits: - name: train num_bytes: 57324616 num_examples: 403811 download_size: 26873094 dataset_size: 57324616 - config_name: ms-sr splits: - name: train num_bytes: 205703294 num_examples: 1444162 download_size: 96137539 dataset_size: 205703294 - config_name: ms-sv splits: - name: train num_bytes: 172187858 num_examples: 1212972 download_size: 79770776 dataset_size: 172187858 - config_name: ms-ta splits: - name: train num_bytes: 1746602 num_examples: 9008 download_size: 690305 dataset_size: 1746602 - config_name: ms-te splits: - name: train num_bytes: 896707 num_examples: 4700 download_size: 379124 dataset_size: 896707 - config_name: ms-th splits: - name: train num_bytes: 77586418 num_examples: 406935 download_size: 32247478 dataset_size: 77586418 - config_name: ms-tl splits: - name: train num_bytes: 982254 num_examples: 7258 download_size: 459124 dataset_size: 982254 - config_name: ms-tr splits: - name: train num_bytes: 235915355 num_examples: 1677177 download_size: 109357496 dataset_size: 235915355 - config_name: ms-uk splits: - name: train num_bytes: 12988887 num_examples: 98951 download_size: 6998791 dataset_size: 12988887 - config_name: ms-ur splits: - name: train num_bytes: 2634582 num_examples: 19054 download_size: 1384509 dataset_size: 2634582 - config_name: ms-vi splits: - name: train num_bytes: 90472325 num_examples: 851694 download_size: 57256701 dataset_size: 90472325 - config_name: nl-af splits: - name: train num_bytes: 2627143 num_examples: 22606 download_size: 1556449 dataset_size: 2627143 - config_name: nl-ar splits: - name: train num_bytes: 2997566468 num_examples: 21221483 download_size: 1618626545 dataset_size: 2997566468 - config_name: nl-bg splits: - name: train num_bytes: 3816167220 num_examples: 26311194 download_size: 2006975358 dataset_size: 3816167220 - config_name: nl-bn splits: - name: train num_bytes: 45017780 num_examples: 279962 download_size: 21852207 dataset_size: 45017780 - config_name: nl-bs splits: - name: train num_bytes: 1154797180 num_examples: 9753423 download_size: 677689849 dataset_size: 1154797180 - config_name: nl-ca splits: - name: train num_bytes: 46043243 num_examples: 382529 download_size: 26512627 dataset_size: 46043243 - config_name: nl-cs splits: - name: train num_bytes: 3323074342 num_examples: 27395678 download_size: 1938315831 dataset_size: 3323074342 - config_name: nl-da splits: - name: train num_bytes: 1437483648 num_examples: 12208535 download_size: 818731226 dataset_size: 1437483648 - config_name: nl-de splits: - name: train num_bytes: 1925454169 num_examples: 15665535 download_size: 1095338277 dataset_size: 1925454169 - config_name: nl-el splits: - name: train num_bytes: 4553680280 num_examples: 26138141 download_size: 2065117639 dataset_size: 4553680280 - config_name: nl-en splits: - name: train num_bytes: 4478308445 num_examples: 37200621 download_size: 2530643758 dataset_size: 4478308445 - config_name: nl-es splits: - name: train num_bytes: 4002246274 num_examples: 32561921 download_size: 2284595130 dataset_size: 4002246274 - config_name: nl-et splits: - name: train num_bytes: 1210250838 num_examples: 10251088 download_size: 702626738 dataset_size: 1210250838 - config_name: nl-eu splits: - name: train num_bytes: 71323302 num_examples: 619884 download_size: 41644849 dataset_size: 71323302 - config_name: nl-fa splits: - name: train num_bytes: 688115102 num_examples: 4803759 download_size: 366568756 dataset_size: 688115102 - config_name: nl-fi splits: - name: train num_bytes: 2542834578 num_examples: 21067707 download_size: 1457985861 dataset_size: 2542834578 - config_name: nl-fr splits: - name: train num_bytes: 3118986373 num_examples: 25330038 download_size: 1769479686 dataset_size: 3118986373 - config_name: nl-gl splits: - name: train num_bytes: 8264867 num_examples: 69195 download_size: 4903574 dataset_size: 8264867 - config_name: nl-he splits: - name: train num_bytes: 3038751419 num_examples: 22186572 download_size: 1615591627 dataset_size: 3038751419 - config_name: nl-hi splits: - name: train num_bytes: 9185691 num_examples: 55034 download_size: 4438322 dataset_size: 9185691 - config_name: nl-hr splits: - name: train num_bytes: 2454959035 num_examples: 24557331 download_size: 1702960839 dataset_size: 2454959035 - config_name: nl-hu splits: - name: train num_bytes: 2806075602 num_examples: 27342219 download_size: 1936401203 dataset_size: 2806075602 - config_name: nl-hy splits: - name: train num_bytes: 156412 num_examples: 982 download_size: 101299 dataset_size: 156412 - config_name: nl-id splits: - name: train num_bytes: 653932450 num_examples: 6413754 download_size: 431759172 dataset_size: 653932450 - config_name: nl-is splits: - name: train num_bytes: 133823993 num_examples: 1354992 download_size: 91955616 dataset_size: 133823993 - config_name: nl-it splits: - name: train num_bytes: 2340718060 num_examples: 22308465 download_size: 1591405127 dataset_size: 2340718060 - config_name: nl-ja splits: - name: train num_bytes: 181601107 num_examples: 1697628 download_size: 120037111 dataset_size: 181601107 - config_name: nl-ka splits: - name: train num_bytes: 22283828 num_examples: 155101 download_size: 12100194 dataset_size: 22283828 - config_name: nl-kk splits: - name: train num_bytes: 208602 num_examples: 1853 download_size: 136139 dataset_size: 208602 - config_name: nl-ko splits: - name: train num_bytes: 118606452 num_examples: 1100911 download_size: 81012605 dataset_size: 118606452 - config_name: nl-lt splits: - name: train num_bytes: 115747935 num_examples: 1205932 download_size: 82041226 dataset_size: 115747935 - config_name: nl-lv splits: - name: train num_bytes: 43963725 num_examples: 457454 download_size: 31069474 dataset_size: 43963725 - config_name: nl-mk splits: - name: train num_bytes: 337994308 num_examples: 2779246 download_size: 206224989 dataset_size: 337994308 - config_name: nl-ml splits: - name: train num_bytes: 40013711 num_examples: 230683 download_size: 20121544 dataset_size: 40013711 - config_name: nl-ms splits: - name: train num_bytes: 136056964 num_examples: 1384109 download_size: 90602829 dataset_size: 136056964 - config_name: nl-no splits: - name: train num_bytes: 801954845 num_examples: 6903536 download_size: 459370726 dataset_size: 801954845 - config_name: nl-pl splits: - name: train num_bytes: 4183226980 num_examples: 28349529 download_size: 2025173401 dataset_size: 4183226980 - config_name: nl-pt splits: - name: train num_bytes: 3588967852 num_examples: 24567302 download_size: 1706649428 dataset_size: 3588967852 - config_name: nl-ro splits: - name: train num_bytes: 4613911945 num_examples: 31320577 download_size: 2206854988 dataset_size: 4613911945 - config_name: nl-ru splits: - name: train num_bytes: 2820479927 num_examples: 16326144 download_size: 1293052874 dataset_size: 2820479927 - config_name: nl-si splits: - name: train num_bytes: 80378326 num_examples: 410919 download_size: 33830860 dataset_size: 80378326 - config_name: nl-sk splits: - name: train num_bytes: 896652608 num_examples: 6196568 download_size: 438561307 dataset_size: 896652608 - config_name: nl-sl splits: - name: train num_bytes: 2204849714 num_examples: 15568474 download_size: 1065859748 dataset_size: 2204849714 - config_name: nl-sq splits: - name: train num_bytes: 229813924 num_examples: 1601437 download_size: 110360527 dataset_size: 229813924 - config_name: nl-sr splits: - name: train num_bytes: 3841706813 num_examples: 26302268 download_size: 1853141017 dataset_size: 3841706813 - config_name: nl-sv splits: - name: train num_bytes: 2029650600 num_examples: 14067933 download_size: 959529946 dataset_size: 2029650600 - config_name: nl-ta splits: - name: train num_bytes: 4280539 num_examples: 20504 download_size: 1716607 dataset_size: 4280539 - config_name: nl-te splits: - name: train num_bytes: 3200739 num_examples: 16073 download_size: 1350913 dataset_size: 3200739 - config_name: nl-th splits: - name: train num_bytes: 514663017 num_examples: 2529429 download_size: 216398115 dataset_size: 514663017 - config_name: nl-tl splits: - name: train num_bytes: 1159161 num_examples: 8182 download_size: 561650 dataset_size: 1159161 - config_name: nl-tr splits: - name: train num_bytes: 4213929782 num_examples: 28559615 download_size: 2005730992 dataset_size: 4213929782 - config_name: nl-uk splits: - name: train num_bytes: 87269176 num_examples: 613782 download_size: 47172139 dataset_size: 87269176 - config_name: nl-ur splits: - name: train num_bytes: 3131880 num_examples: 21485 download_size: 1700786 dataset_size: 3131880 - config_name: nl-vi splits: - name: train num_bytes: 357575974 num_examples: 2748283 download_size: 196341160 dataset_size: 357575974 - config_name: no-af splits: - name: train num_bytes: 551517 num_examples: 5130 download_size: 338168 dataset_size: 551517 - config_name: no-ar splits: - name: train num_bytes: 814068826 num_examples: 5954781 download_size: 442560415 dataset_size: 814068826 - config_name: no-bg splits: - name: train num_bytes: 919120538 num_examples: 6615774 download_size: 487168253 dataset_size: 919120538 - config_name: no-bn splits: - name: train num_bytes: 36380592 num_examples: 230949 download_size: 17643199 dataset_size: 36380592 - config_name: no-bs splits: - name: train num_bytes: 328673210 num_examples: 2885057 download_size: 194207989 dataset_size: 328673210 - config_name: no-ca splits: - name: train num_bytes: 23195107 num_examples: 196221 download_size: 13382938 dataset_size: 23195107 - config_name: no-cs splits: - name: train num_bytes: 825472787 num_examples: 7135203 download_size: 485832305 dataset_size: 825472787 - config_name: no-da splits: - name: train num_bytes: 742282904 num_examples: 6690423 download_size: 419948754 dataset_size: 742282904 - config_name: no-de splits: - name: train num_bytes: 636738826 num_examples: 5383006 download_size: 364233509 dataset_size: 636738826 - config_name: no-el splits: - name: train num_bytes: 1155041215 num_examples: 6850704 download_size: 524232028 dataset_size: 1155041215 - config_name: no-en splits: - name: train num_bytes: 995456478 num_examples: 8624996 download_size: 566632578 dataset_size: 995456478 - config_name: no-es splits: - name: train num_bytes: 924514961 num_examples: 7892621 download_size: 532628997 dataset_size: 924514961 - config_name: no-et splits: - name: train num_bytes: 445334705 num_examples: 3895270 download_size: 260557025 dataset_size: 445334705 - config_name: no-eu splits: - name: train num_bytes: 38313391 num_examples: 344246 download_size: 22405880 dataset_size: 38313391 - config_name: no-fa splits: - name: train num_bytes: 331811948 num_examples: 2389634 download_size: 177331980 dataset_size: 331811948 - config_name: no-fi splits: - name: train num_bytes: 876009036 num_examples: 7741367 download_size: 503210712 dataset_size: 876009036 - config_name: no-fr splits: - name: train num_bytes: 747933681 num_examples: 6324199 download_size: 428745376 dataset_size: 747933681 - config_name: no-gl splits: - name: train num_bytes: 4044384 num_examples: 35138 download_size: 2396760 dataset_size: 4044384 - config_name: no-he splits: - name: train num_bytes: 819402718 num_examples: 6213374 download_size: 439100512 dataset_size: 819402718 - config_name: no-hi splits: - name: train num_bytes: 6448603 num_examples: 41599 download_size: 3109542 dataset_size: 6448603 - config_name: no-hr splits: - name: train num_bytes: 624280079 num_examples: 6565140 download_size: 439095326 dataset_size: 624280079 - config_name: no-hu splits: - name: train num_bytes: 667450464 num_examples: 6855909 download_size: 467762414 dataset_size: 667450464 - config_name: no-is splits: - name: train num_bytes: 80497847 num_examples: 838262 download_size: 55606804 dataset_size: 80497847 - config_name: no-it splits: - name: train num_bytes: 598618977 num_examples: 6021240 download_size: 412543271 dataset_size: 598618977 - config_name: no-ja splits: - name: train num_bytes: 95752150 num_examples: 943397 download_size: 64092342 dataset_size: 95752150 - config_name: no-ka splits: - name: train num_bytes: 12353255 num_examples: 88898 download_size: 6733785 dataset_size: 12353255 - config_name: no-kk splits: - name: train num_bytes: 251387 num_examples: 2365 download_size: 164755 dataset_size: 251387 - config_name: no-ko splits: - name: train num_bytes: 54141218 num_examples: 525062 download_size: 37310289 dataset_size: 54141218 - config_name: no-lt splits: - name: train num_bytes: 73021393 num_examples: 783086 download_size: 52149155 dataset_size: 73021393 - config_name: no-lv splits: - name: train num_bytes: 28417463 num_examples: 311867 download_size: 20270299 dataset_size: 28417463 - config_name: no-mk splits: - name: train num_bytes: 179503364 num_examples: 1511243 download_size: 110015749 dataset_size: 179503364 - config_name: no-ml splits: - name: train num_bytes: 26951372 num_examples: 159071 download_size: 13573144 dataset_size: 26951372 - config_name: no-ms splits: - name: train num_bytes: 84841750 num_examples: 884779 download_size: 56936024 dataset_size: 84841750 - config_name: no-nl splits: - name: train num_bytes: 671240109 num_examples: 6903536 download_size: 459370726 dataset_size: 671240109 - config_name: no-pl splits: - name: train num_bytes: 964585514 num_examples: 6814103 download_size: 467264962 dataset_size: 964585514 - config_name: no-pt splits: - name: train num_bytes: 959298855 num_examples: 6814852 download_size: 456553093 dataset_size: 959298855 - config_name: no-ro splits: - name: train num_bytes: 1073767176 num_examples: 7541069 download_size: 514649036 dataset_size: 1073767176 - config_name: no-ru splits: - name: train num_bytes: 760739190 num_examples: 4566927 download_size: 348678963 dataset_size: 760739190 - config_name: no-si splits: - name: train num_bytes: 49172140 num_examples: 261198 download_size: 20692726 dataset_size: 49172140 - config_name: no-sk splits: - name: train num_bytes: 250701379 num_examples: 1787610 download_size: 122783218 dataset_size: 250701379 - config_name: no-sl splits: - name: train num_bytes: 708474521 num_examples: 5166742 download_size: 342326878 dataset_size: 708474521 - config_name: no-sq splits: - name: train num_bytes: 117701159 num_examples: 835802 download_size: 56766057 dataset_size: 117701159 - config_name: no-sr splits: - name: train num_bytes: 927894774 num_examples: 6553801 download_size: 447123478 dataset_size: 927894774 - config_name: no-sv splits: - name: train num_bytes: 961631314 num_examples: 7025119 download_size: 449177330 dataset_size: 961631314 - config_name: no-ta splits: - name: train num_bytes: 2413147 num_examples: 12325 download_size: 954112 dataset_size: 2413147 - config_name: no-te splits: - name: train num_bytes: 2468110 num_examples: 13042 download_size: 1031507 dataset_size: 2468110 - config_name: no-th splits: - name: train num_bytes: 178887137 num_examples: 912368 download_size: 75346945 dataset_size: 178887137 - config_name: no-tl splits: - name: train num_bytes: 403664 num_examples: 3034 download_size: 199818 dataset_size: 403664 - config_name: no-tr splits: - name: train num_bytes: 1041876617 num_examples: 7313912 download_size: 496547963 dataset_size: 1041876617 - config_name: no-uk splits: - name: train num_bytes: 35993696 num_examples: 264774 download_size: 19627171 dataset_size: 35993696 - config_name: no-ur splits: - name: train num_bytes: 2427469 num_examples: 17247 download_size: 1316323 dataset_size: 2427469 - config_name: no-vi splits: - name: train num_bytes: 166963466 num_examples: 1541764 download_size: 107911512 dataset_size: 166963466 - config_name: pl-af splits: - name: train num_bytes: 1963735 num_examples: 16326 download_size: 1195507 dataset_size: 1963735 - config_name: pl-ar splits: - name: train num_bytes: 3359197556 num_examples: 24043342 download_size: 1845147561 dataset_size: 3359197556 - config_name: pl-bg splits: - name: train num_bytes: 4401912719 num_examples: 30589171 download_size: 2353745454 dataset_size: 4401912719 - config_name: pl-bn splits: - name: train num_bytes: 48871457 num_examples: 308384 download_size: 24116776 dataset_size: 48871457 - config_name: pl-bs splits: - name: train num_bytes: 1275648323 num_examples: 10823330 download_size: 762743643 dataset_size: 1275648323 - config_name: pl-ca splits: - name: train num_bytes: 48529888 num_examples: 407987 download_size: 28524973 dataset_size: 48529888 - config_name: pl-cs splits: - name: train num_bytes: 3975557611 num_examples: 33029487 download_size: 2357243437 dataset_size: 3975557611 - config_name: pl-da splits: - name: train num_bytes: 1443008450 num_examples: 12197406 download_size: 840550547 dataset_size: 1443008450 - config_name: pl-de splits: - name: train num_bytes: 1983979187 num_examples: 16076178 download_size: 1151054584 dataset_size: 1983979187 - config_name: pl-el splits: - name: train num_bytes: 4321466672 num_examples: 29058119 download_size: 2312266659 dataset_size: 4321466672 - config_name: pl-en splits: - name: train num_bytes: 5028902061 num_examples: 41998942 download_size: 2892850883 dataset_size: 5028902061 - config_name: pl-es splits: - name: train num_bytes: 4630342379 num_examples: 37756338 download_size: 2691429471 dataset_size: 4630342379 - config_name: pl-et splits: - name: train num_bytes: 1280684329 num_examples: 10903089 download_size: 757805613 dataset_size: 1280684329 - config_name: pl-eu splits: - name: train num_bytes: 73963554 num_examples: 644058 download_size: 44036892 dataset_size: 73963554 - config_name: pl-fa splits: - name: train num_bytes: 714495457 num_examples: 5037487 download_size: 387657665 dataset_size: 714495457 - config_name: pl-fi splits: - name: train num_bytes: 2631187698 num_examples: 21727045 download_size: 1540894499 dataset_size: 2631187698 - config_name: pl-fr splits: - name: train num_bytes: 3514179003 num_examples: 28457951 download_size: 2031672762 dataset_size: 3514179003 - config_name: pl-gl splits: - name: train num_bytes: 11147504 num_examples: 92916 download_size: 6720319 dataset_size: 11147504 - config_name: pl-he splits: - name: train num_bytes: 3374035814 num_examples: 24813352 download_size: 1826488724 dataset_size: 3374035814 - config_name: pl-hi splits: - name: train num_bytes: 9706804 num_examples: 59261 download_size: 4775395 dataset_size: 9706804 - config_name: pl-hr splits: - name: train num_bytes: 3100199387 num_examples: 26123255 download_size: 1837533036 dataset_size: 3100199387 - config_name: pl-hu splits: - name: train num_bytes: 3923574798 num_examples: 32352502 download_size: 2315938647 dataset_size: 3923574798 - config_name: pl-hy splits: - name: train num_bytes: 509351 num_examples: 3006 download_size: 296479 dataset_size: 509351 - config_name: pl-id splits: - name: train num_bytes: 829869833 num_examples: 6914619 download_size: 470877668 dataset_size: 829869833 - config_name: pl-is splits: - name: train num_bytes: 161273537 num_examples: 1395047 download_size: 95528902 dataset_size: 161273537 - config_name: pl-it splits: - name: train num_bytes: 3200084801 num_examples: 25998905 download_size: 1869172216 dataset_size: 3200084801 - config_name: pl-ja splits: - name: train num_bytes: 257585745 num_examples: 1723779 download_size: 123156850 dataset_size: 257585745 - config_name: pl-ka splits: - name: train num_bytes: 28660921 num_examples: 152660 download_size: 12168136 dataset_size: 28660921 - config_name: pl-kk splits: - name: train num_bytes: 528471 num_examples: 3366 download_size: 252551 dataset_size: 528471 - config_name: pl-ko splits: - name: train num_bytes: 170018400 num_examples: 1124166 download_size: 84179417 dataset_size: 170018400 - config_name: pl-lt splits: - name: train num_bytes: 173965447 num_examples: 1263859 download_size: 87132855 dataset_size: 173965447 - config_name: pl-lv splits: - name: train num_bytes: 64220390 num_examples: 465244 download_size: 32143498 dataset_size: 64220390 - config_name: pl-mk splits: - name: train num_bytes: 484192378 num_examples: 2941612 download_size: 221731216 dataset_size: 484192378 - config_name: pl-ml splits: - name: train num_bytes: 57459762 num_examples: 266113 download_size: 23530753 dataset_size: 57459762 - config_name: pl-ms splits: - name: train num_bytes: 215506009 num_examples: 1525708 download_size: 101858292 dataset_size: 215506009 - config_name: pl-nl splits: - name: train num_bytes: 4183226980 num_examples: 28349529 download_size: 2025173401 dataset_size: 4183226980 - config_name: pl-no splits: - name: train num_bytes: 964585514 num_examples: 6814103 download_size: 467264962 dataset_size: 964585514 - config_name: pl-pt splits: - name: train num_bytes: 3167648231 num_examples: 26177378 download_size: 1839671977 dataset_size: 3167648231 - config_name: pl-ro splits: - name: train num_bytes: 5202278369 num_examples: 35487497 download_size: 2529391160 dataset_size: 5202278369 - config_name: pl-ru splits: - name: train num_bytes: 3212296477 num_examples: 18820442 download_size: 1492064462 dataset_size: 3212296477 - config_name: pl-si splits: - name: train num_bytes: 87324110 num_examples: 448798 download_size: 37246728 dataset_size: 87324110 - config_name: pl-sk splits: - name: train num_bytes: 1008831523 num_examples: 7041596 download_size: 501078066 dataset_size: 1008831523 - config_name: pl-sl splits: - name: train num_bytes: 2369220483 num_examples: 16744288 download_size: 1166575859 dataset_size: 2369220483 - config_name: pl-sq splits: - name: train num_bytes: 236555537 num_examples: 1660405 download_size: 115604306 dataset_size: 236555537 - config_name: pl-sr splits: - name: train num_bytes: 4256902775 num_examples: 29228682 download_size: 2091199963 dataset_size: 4256902775 - config_name: pl-sv splits: - name: train num_bytes: 2025896315 num_examples: 13991753 download_size: 977917392 dataset_size: 2025896315 - config_name: pl-ta splits: - name: train num_bytes: 4172848 num_examples: 20849 download_size: 1676094 dataset_size: 4172848 - config_name: pl-te splits: - name: train num_bytes: 3415987 num_examples: 18176 download_size: 1453243 dataset_size: 3415987 - config_name: pl-th splits: - name: train num_bytes: 534530815 num_examples: 2650840 download_size: 228202097 dataset_size: 534530815 - config_name: pl-tl splits: - name: train num_bytes: 1128842 num_examples: 8032 download_size: 555957 dataset_size: 1128842 - config_name: pl-tr splits: - name: train num_bytes: 4844891904 num_examples: 33005803 download_size: 2344017268 dataset_size: 4844891904 - config_name: pl-uk splits: - name: train num_bytes: 95268355 num_examples: 682988 download_size: 52446037 dataset_size: 95268355 - config_name: pl-ur splits: - name: train num_bytes: 3174791 num_examples: 22642 download_size: 1750054 dataset_size: 3174791 - config_name: pl-vi splits: - name: train num_bytes: 376945252 num_examples: 2929816 download_size: 210882427 dataset_size: 376945252 - config_name: pt-af splits: - name: train num_bytes: 2662227 num_examples: 22683 download_size: 1590504 dataset_size: 2662227 - config_name: pt-ar splits: - name: train num_bytes: 2829576795 num_examples: 20343173 download_size: 1533941738 dataset_size: 2829576795 - config_name: pt-bg splits: - name: train num_bytes: 3636375619 num_examples: 25536097 download_size: 1916970379 dataset_size: 3636375619 - config_name: pt-bn splits: - name: train num_bytes: 47796750 num_examples: 303297 download_size: 23348691 dataset_size: 47796750 - config_name: pt-bs splits: - name: train num_bytes: 1110290608 num_examples: 9503027 download_size: 652143395 dataset_size: 1110290608 - config_name: pt-ca splits: - name: train num_bytes: 45705264 num_examples: 384142 download_size: 26394305 dataset_size: 45705264 - config_name: pt-cs splits: - name: train num_bytes: 3030385756 num_examples: 25436263 download_size: 1768515174 dataset_size: 3030385756 - config_name: pt-da splits: - name: train num_bytes: 1406981386 num_examples: 11989009 download_size: 804462230 dataset_size: 1406981386 - config_name: pt-de splits: - name: train num_bytes: 1795134904 num_examples: 14737184 download_size: 1023639421 dataset_size: 1795134904 - config_name: pt-el splits: - name: train num_bytes: 3629792970 num_examples: 24845250 download_size: 1914885055 dataset_size: 3629792970 - config_name: pt-en splits: - name: train num_bytes: 3926004451 num_examples: 33222606 download_size: 2218278440 dataset_size: 3926004451 - config_name: pt-es splits: - name: train num_bytes: 3637992322 num_examples: 30294749 download_size: 2076748936 dataset_size: 3637992322 - config_name: pt-et splits: - name: train num_bytes: 1217332722 num_examples: 10452309 download_size: 707187030 dataset_size: 1217332722 - config_name: pt-eu splits: - name: train num_bytes: 73488457 num_examples: 641295 download_size: 42988209 dataset_size: 73488457 - config_name: pt-fa splits: - name: train num_bytes: 692321360 num_examples: 4927719 download_size: 370190228 dataset_size: 692321360 - config_name: pt-fi splits: - name: train num_bytes: 2370161017 num_examples: 19740360 download_size: 1364103529 dataset_size: 2370161017 - config_name: pt-fr splits: - name: train num_bytes: 2852278612 num_examples: 23387253 download_size: 1622783241 dataset_size: 2852278612 - config_name: pt-gl splits: - name: train num_bytes: 10215264 num_examples: 86232 download_size: 6061271 dataset_size: 10215264 - config_name: pt-he splits: - name: train num_bytes: 2842381792 num_examples: 21226267 download_size: 1516205541 dataset_size: 2842381792 - config_name: pt-hi splits: - name: train num_bytes: 7803770 num_examples: 49716 download_size: 3798868 dataset_size: 7803770 - config_name: pt-hr splits: - name: train num_bytes: 2607378438 num_examples: 22175260 download_size: 1518295990 dataset_size: 2607378438 - config_name: pt-hu splits: - name: train num_bytes: 3012533336 num_examples: 25146790 download_size: 1749810979 dataset_size: 3012533336 - config_name: pt-hy splits: - name: train num_bytes: 376006 num_examples: 2328 download_size: 215620 dataset_size: 376006 - config_name: pt-id splits: - name: train num_bytes: 809121646 num_examples: 6798556 download_size: 450766628 dataset_size: 809121646 - config_name: pt-is splits: - name: train num_bytes: 167871044 num_examples: 1459837 download_size: 97489185 dataset_size: 167871044 - config_name: pt-it splits: - name: train num_bytes: 2449298359 num_examples: 20164630 download_size: 1406570669 dataset_size: 2449298359 - config_name: pt-ja splits: - name: train num_bytes: 262193106 num_examples: 1750528 download_size: 123329354 dataset_size: 262193106 - config_name: pt-ka splits: - name: train num_bytes: 30223485 num_examples: 165003 download_size: 12654990 dataset_size: 30223485 - config_name: pt-kk splits: - name: train num_bytes: 506193 num_examples: 3351 download_size: 236486 dataset_size: 506193 - config_name: pt-ko splits: - name: train num_bytes: 167789602 num_examples: 1112125 download_size: 81694331 dataset_size: 167789602 - config_name: pt-lt splits: - name: train num_bytes: 177914098 num_examples: 1300122 download_size: 87334862 dataset_size: 177914098 - config_name: pt-lv splits: - name: train num_bytes: 66140209 num_examples: 482874 download_size: 32409650 dataset_size: 66140209 - config_name: pt-mk splits: - name: train num_bytes: 483822959 num_examples: 2971694 download_size: 218128759 dataset_size: 483822959 - config_name: pt-ml splits: - name: train num_bytes: 57065646 num_examples: 268145 download_size: 23195802 dataset_size: 57065646 - config_name: pt-ms splits: - name: train num_bytes: 212534332 num_examples: 1515435 download_size: 98601321 dataset_size: 212534332 - config_name: pt-nl splits: - name: train num_bytes: 3588967852 num_examples: 24567302 download_size: 1706649428 dataset_size: 3588967852 - config_name: pt-no splits: - name: train num_bytes: 959298855 num_examples: 6814852 download_size: 456553093 dataset_size: 959298855 - config_name: pt-pl splits: - name: train num_bytes: 2659269581 num_examples: 26177378 download_size: 1839671977 dataset_size: 2659269581 - config_name: pt-ro splits: - name: train num_bytes: 4197886350 num_examples: 28968942 download_size: 2005336046 dataset_size: 4197886350 - config_name: pt-ru splits: - name: train num_bytes: 2461766450 num_examples: 14571176 download_size: 1128306975 dataset_size: 2461766450 - config_name: pt-si splits: - name: train num_bytes: 85746826 num_examples: 450398 download_size: 36114478 dataset_size: 85746826 - config_name: pt-sk splits: - name: train num_bytes: 837169151 num_examples: 5864940 download_size: 409375516 dataset_size: 837169151 - config_name: pt-sl splits: - name: train num_bytes: 2204993463 num_examples: 15691731 download_size: 1065122731 dataset_size: 2204993463 - config_name: pt-sq splits: - name: train num_bytes: 240482490 num_examples: 1703896 download_size: 115154737 dataset_size: 240482490 - config_name: pt-sr splits: - name: train num_bytes: 3503444935 num_examples: 24285032 download_size: 1688529380 dataset_size: 3503444935 - config_name: pt-sv splits: - name: train num_bytes: 1964346699 num_examples: 13618563 download_size: 932260128 dataset_size: 1964346699 - config_name: pt-ta splits: - name: train num_bytes: 3918915 num_examples: 20131 download_size: 1571464 dataset_size: 3918915 - config_name: pt-te splits: - name: train num_bytes: 3547355 num_examples: 19319 download_size: 1493732 dataset_size: 3547355 - config_name: pt-th splits: - name: train num_bytes: 507773214 num_examples: 2554853 download_size: 214327782 dataset_size: 507773214 - config_name: pt-tl splits: - name: train num_bytes: 1479476 num_examples: 10348 download_size: 707357 dataset_size: 1479476 - config_name: pt-tr splits: - name: train num_bytes: 3963515939 num_examples: 27334710 download_size: 1883668900 dataset_size: 3963515939 - config_name: pt-uk splits: - name: train num_bytes: 82582463 num_examples: 595055 download_size: 44838897 dataset_size: 82582463 - config_name: pt-ur splits: - name: train num_bytes: 3181007 num_examples: 22694 download_size: 1723612 dataset_size: 3181007 - config_name: pt-vi splits: - name: train num_bytes: 378714048 num_examples: 2963834 download_size: 208401056 dataset_size: 378714048 - config_name: ro-af splits: - name: train num_bytes: 3816150 num_examples: 32189 download_size: 2297432 dataset_size: 3816150 - config_name: ro-ar splits: - name: train num_bytes: 3623932117 num_examples: 26173933 download_size: 1969204810 dataset_size: 3623932117 - config_name: ro-bg splits: - name: train num_bytes: 4903954167 num_examples: 34484838 download_size: 2594839742 dataset_size: 4903954167 - config_name: ro-bn splits: - name: train num_bytes: 51044103 num_examples: 324680 download_size: 25005560 dataset_size: 51044103 - config_name: ro-bs splits: - name: train num_bytes: 1420770979 num_examples: 12154625 download_size: 835243980 dataset_size: 1420770979 - config_name: ro-ca splits: - name: train num_bytes: 50207827 num_examples: 423896 download_size: 29090640 dataset_size: 50207827 - config_name: ro-cs splits: - name: train num_bytes: 4120752955 num_examples: 34478606 download_size: 2410165855 dataset_size: 4120752955 - config_name: ro-da splits: - name: train num_bytes: 1554969828 num_examples: 13120495 download_size: 894137585 dataset_size: 1554969828 - config_name: ro-de splits: - name: train num_bytes: 2162906086 num_examples: 17591098 download_size: 1240337422 dataset_size: 2162906086 - config_name: ro-el splits: - name: train num_bytes: 5743725087 num_examples: 33491396 download_size: 2615079473 dataset_size: 5743725087 - config_name: ro-en splits: - name: train num_bytes: 5941123947 num_examples: 50693226 download_size: 3361333588 dataset_size: 5941123947 - config_name: ro-es splits: - name: train num_bytes: 5222773855 num_examples: 43203352 download_size: 2993415434 dataset_size: 5222773855 - config_name: ro-et splits: - name: train num_bytes: 1344532761 num_examples: 11541438 download_size: 783875397 dataset_size: 1344532761 - config_name: ro-eu splits: - name: train num_bytes: 82308461 num_examples: 715988 download_size: 48303764 dataset_size: 82308461 - config_name: ro-fa splits: - name: train num_bytes: 787583777 num_examples: 5646113 download_size: 422300987 dataset_size: 787583777 - config_name: ro-fi splits: - name: train num_bytes: 2853204810 num_examples: 23464056 download_size: 1650735423 dataset_size: 2853204810 - config_name: ro-fr splits: - name: train num_bytes: 3727207847 num_examples: 30446215 download_size: 2128744727 dataset_size: 3727207847 - config_name: ro-gl splits: - name: train num_bytes: 11642857 num_examples: 99549 download_size: 6942287 dataset_size: 11642857 - config_name: ro-he splits: - name: train num_bytes: 3553247731 num_examples: 26370152 download_size: 1901646704 dataset_size: 3553247731 - config_name: ro-hi splits: - name: train num_bytes: 11247826 num_examples: 70415 download_size: 5476088 dataset_size: 11247826 - config_name: ro-hr splits: - name: train num_bytes: 3487160879 num_examples: 29598040 download_size: 2035804010 dataset_size: 3487160879 - config_name: ro-hu splits: - name: train num_bytes: 4102974579 num_examples: 34126008 download_size: 2388399672 dataset_size: 4102974579 - config_name: ro-hy splits: - name: train num_bytes: 553282 num_examples: 3355 download_size: 322671 dataset_size: 553282 - config_name: ro-id splits: - name: train num_bytes: 934915192 num_examples: 7915743 download_size: 521917939 dataset_size: 934915192 - config_name: ro-is splits: - name: train num_bytes: 170663101 num_examples: 1469037 download_size: 99628042 dataset_size: 170663101 - config_name: ro-it splits: - name: train num_bytes: 3327768795 num_examples: 27147041 download_size: 1920212878 dataset_size: 3327768795 - config_name: ro-ja splits: - name: train num_bytes: 275151988 num_examples: 1836903 download_size: 130036875 dataset_size: 275151988 - config_name: ro-ka splits: - name: train num_bytes: 33429489 num_examples: 182789 download_size: 14045813 dataset_size: 33429489 - config_name: ro-kk splits: - name: train num_bytes: 350558 num_examples: 2348 download_size: 167181 dataset_size: 350558 - config_name: ro-ko splits: - name: train num_bytes: 186649454 num_examples: 1236461 download_size: 91220128 dataset_size: 186649454 - config_name: ro-lt splits: - name: train num_bytes: 184923193 num_examples: 1351611 download_size: 90965924 dataset_size: 184923193 - config_name: ro-lv splits: - name: train num_bytes: 67667599 num_examples: 492195 download_size: 33310752 dataset_size: 67667599 - config_name: ro-mk splits: - name: train num_bytes: 526156422 num_examples: 3241598 download_size: 237980749 dataset_size: 526156422 - config_name: ro-ml splits: - name: train num_bytes: 65957623 num_examples: 313496 download_size: 26815291 dataset_size: 65957623 - config_name: ro-ms splits: - name: train num_bytes: 242025589 num_examples: 1723823 download_size: 112453322 dataset_size: 242025589 - config_name: ro-nl splits: - name: train num_bytes: 4613911945 num_examples: 31320577 download_size: 2206854988 dataset_size: 4613911945 - config_name: ro-no splits: - name: train num_bytes: 1073767176 num_examples: 7541069 download_size: 514649036 dataset_size: 1073767176 - config_name: ro-pl splits: - name: train num_bytes: 5202278369 num_examples: 35487497 download_size: 2529391160 dataset_size: 5202278369 - config_name: ro-pt splits: - name: train num_bytes: 4197886350 num_examples: 28968942 download_size: 2005336046 dataset_size: 4197886350 - config_name: ro-ru splits: - name: train num_bytes: 3277408338 num_examples: 19483553 download_size: 1504469733 dataset_size: 3277408338 - config_name: ro-si splits: - name: train num_bytes: 82673216 num_examples: 504236 download_size: 40042131 dataset_size: 82673216 - config_name: ro-sk splits: - name: train num_bytes: 884528028 num_examples: 7527752 download_size: 521073099 dataset_size: 884528028 - config_name: ro-sl splits: - name: train num_bytes: 2108760709 num_examples: 18166994 download_size: 1235868699 dataset_size: 2108760709 - config_name: ro-sq splits: - name: train num_bytes: 208814677 num_examples: 1796074 download_size: 120900455 dataset_size: 208814677 - config_name: ro-sr splits: - name: train num_bytes: 4058923584 num_examples: 34011083 download_size: 2357548703 dataset_size: 4058923584 - config_name: ro-sv splits: - name: train num_bytes: 1844103632 num_examples: 15334946 download_size: 1057705496 dataset_size: 1844103632 - config_name: ro-ta splits: - name: train num_bytes: 4687022 num_examples: 27266 download_size: 2153715 dataset_size: 4687022 - config_name: ro-te splits: - name: train num_bytes: 3811997 num_examples: 24442 download_size: 1841560 dataset_size: 3811997 - config_name: ro-th splits: - name: train num_bytes: 496779235 num_examples: 2868498 download_size: 239896657 dataset_size: 496779235 - config_name: ro-tl splits: - name: train num_bytes: 1022700 num_examples: 8610 download_size: 608659 dataset_size: 1022700 - config_name: ro-tr splits: - name: train num_bytes: 4388637765 num_examples: 36441374 download_size: 2507484480 dataset_size: 4388637765 - config_name: ro-uk splits: - name: train num_bytes: 101048576 num_examples: 731321 download_size: 54982055 dataset_size: 101048576 - config_name: ro-ur splits: - name: train num_bytes: 3344973 num_examples: 23619 download_size: 1822105 dataset_size: 3344973 - config_name: ro-vi splits: - name: train num_bytes: 409312002 num_examples: 3207734 download_size: 226033059 dataset_size: 409312002 - config_name: ru-af splits: - name: train num_bytes: 2213299 num_examples: 15405 download_size: 1235679 dataset_size: 2213299 - config_name: ru-ar splits: - name: train num_bytes: 2433884060 num_examples: 14885701 download_size: 1248164093 dataset_size: 2433884060 - config_name: ru-bg splits: - name: train num_bytes: 2879531980 num_examples: 17340821 download_size: 1442195418 dataset_size: 2879531980 - config_name: ru-bn splits: - name: train num_bytes: 46991914 num_examples: 264225 download_size: 22207315 dataset_size: 46991914 - config_name: ru-bs splits: - name: train num_bytes: 846376890 num_examples: 5997006 download_size: 462746912 dataset_size: 846376890 - config_name: ru-ca splits: - name: train num_bytes: 46515636 num_examples: 333735 download_size: 24970370 dataset_size: 46515636 - config_name: ru-cs splits: - name: train num_bytes: 2775775666 num_examples: 19402678 download_size: 1508345822 dataset_size: 2775775666 - config_name: ru-da splits: - name: train num_bytes: 1082634005 num_examples: 7543012 download_size: 578802280 dataset_size: 1082634005 - config_name: ru-de splits: - name: train num_bytes: 1491789685 num_examples: 10191086 download_size: 798589305 dataset_size: 1491789685 - config_name: ru-el splits: - name: train num_bytes: 3356430222 num_examples: 17008094 download_size: 1481156139 dataset_size: 3356430222 - config_name: ru-en splits: - name: train num_bytes: 3621900455 num_examples: 25910105 download_size: 1912229277 dataset_size: 3621900455 - config_name: ru-es splits: - name: train num_bytes: 3193894825 num_examples: 22084962 download_size: 1703183259 dataset_size: 3193894825 - config_name: ru-et splits: - name: train num_bytes: 920209470 num_examples: 6571496 download_size: 499716413 dataset_size: 920209470 - config_name: ru-eu splits: - name: train num_bytes: 61034749 num_examples: 435117 download_size: 33483197 dataset_size: 61034749 - config_name: ru-fa splits: - name: train num_bytes: 590935582 num_examples: 3605885 download_size: 299648868 dataset_size: 590935582 - config_name: ru-fi splits: - name: train num_bytes: 1825427582 num_examples: 12321319 download_size: 981648021 dataset_size: 1825427582 - config_name: ru-fr splits: - name: train num_bytes: 2457580238 num_examples: 16752259 download_size: 1307520392 dataset_size: 2457580238 - config_name: ru-gl splits: - name: train num_bytes: 9703710 num_examples: 66369 download_size: 5336708 dataset_size: 9703710 - config_name: ru-he splits: - name: train num_bytes: 2360433687 num_examples: 14873399 download_size: 1192068865 dataset_size: 2360433687 - config_name: ru-hi splits: - name: train num_bytes: 7424144 num_examples: 42212 download_size: 3483720 dataset_size: 7424144 - config_name: ru-hr splits: - name: train num_bytes: 2177709494 num_examples: 15288049 download_size: 1182200301 dataset_size: 2177709494 - config_name: ru-hu splits: - name: train num_bytes: 2758725843 num_examples: 19139907 download_size: 1493407470 dataset_size: 2758725843 - config_name: ru-hy splits: - name: train num_bytes: 173828 num_examples: 780 download_size: 98673 dataset_size: 173828 - config_name: ru-id splits: - name: train num_bytes: 680572668 num_examples: 4802176 download_size: 355555825 dataset_size: 680572668 - config_name: ru-is splits: - name: train num_bytes: 109923106 num_examples: 796822 download_size: 60185582 dataset_size: 109923106 - config_name: ru-it splits: - name: train num_bytes: 2575734602 num_examples: 17490692 download_size: 1384159600 dataset_size: 2575734602 - config_name: ru-ja splits: - name: train num_bytes: 249810274 num_examples: 1441087 download_size: 113437671 dataset_size: 249810274 - config_name: ru-ka splits: - name: train num_bytes: 21524490 num_examples: 104823 download_size: 8862581 dataset_size: 21524490 - config_name: ru-kk splits: - name: train num_bytes: 367693 num_examples: 2223 download_size: 168183 dataset_size: 367693 - config_name: ru-ko splits: - name: train num_bytes: 158429051 num_examples: 907004 download_size: 74193775 dataset_size: 158429051 - config_name: ru-lt splits: - name: train num_bytes: 148567538 num_examples: 947235 download_size: 70252792 dataset_size: 148567538 - config_name: ru-lv splits: - name: train num_bytes: 67503971 num_examples: 430002 download_size: 31696579 dataset_size: 67503971 - config_name: ru-mk splits: - name: train num_bytes: 371160788 num_examples: 2007204 download_size: 162675569 dataset_size: 371160788 - config_name: ru-ml splits: - name: train num_bytes: 49199021 num_examples: 210812 download_size: 19642858 dataset_size: 49199021 - config_name: ru-ms splits: - name: train num_bytes: 182484421 num_examples: 1140751 download_size: 81789248 dataset_size: 182484421 - config_name: ru-nl splits: - name: train num_bytes: 2820479927 num_examples: 16326144 download_size: 1293052874 dataset_size: 2820479927 - config_name: ru-no splits: - name: train num_bytes: 760739190 num_examples: 4566927 download_size: 348678963 dataset_size: 760739190 - config_name: ru-pl splits: - name: train num_bytes: 3212296477 num_examples: 18820442 download_size: 1492064462 dataset_size: 3212296477 - config_name: ru-pt splits: - name: train num_bytes: 2461766450 num_examples: 14571176 download_size: 1128306975 dataset_size: 2461766450 - config_name: ru-ro splits: - name: train num_bytes: 3277408338 num_examples: 19483553 download_size: 1504469733 dataset_size: 3277408338 - config_name: ru-si splits: - name: train num_bytes: 64120258 num_examples: 340105 download_size: 30017442 dataset_size: 64120258 - config_name: ru-sk splits: - name: train num_bytes: 606429181 num_examples: 4320499 download_size: 331817964 dataset_size: 606429181 - config_name: ru-sl splits: - name: train num_bytes: 1351654761 num_examples: 9614740 download_size: 736165265 dataset_size: 1351654761 - config_name: ru-sq splits: - name: train num_bytes: 146189175 num_examples: 1056444 download_size: 79776110 dataset_size: 146189175 - config_name: ru-sr splits: - name: train num_bytes: 2273395571 num_examples: 15749685 download_size: 1228103655 dataset_size: 2273395571 - config_name: ru-sv splits: - name: train num_bytes: 1239094683 num_examples: 8498769 download_size: 661216655 dataset_size: 1239094683 - config_name: ru-ta splits: - name: train num_bytes: 2243661 num_examples: 11610 download_size: 995921 dataset_size: 2243661 - config_name: ru-te splits: - name: train num_bytes: 1961703 num_examples: 10802 download_size: 921812 dataset_size: 1961703 - config_name: ru-th splits: - name: train num_bytes: 422276412 num_examples: 2145025 download_size: 196150739 dataset_size: 422276412 - config_name: ru-tl splits: - name: train num_bytes: 1800615 num_examples: 13428 download_size: 943261 dataset_size: 1800615 - config_name: ru-tr splits: - name: train num_bytes: 2680480536 num_examples: 18608237 download_size: 1428483121 dataset_size: 2680480536 - config_name: ru-uk splits: - name: train num_bytes: 100180347 num_examples: 653020 download_size: 51265874 dataset_size: 100180347 - config_name: ru-ur splits: - name: train num_bytes: 2727936 num_examples: 16822 download_size: 1418314 dataset_size: 2727936 - config_name: ru-vi splits: - name: train num_bytes: 342610320 num_examples: 2289721 download_size: 178238738 dataset_size: 342610320 - config_name: si-af splits: - name: train num_bytes: 169031 num_examples: 977 download_size: 88372 dataset_size: 169031 - config_name: si-ar splits: - name: train num_bytes: 85928225 num_examples: 483959 download_size: 40260579 dataset_size: 85928225 - config_name: si-bg splits: - name: train num_bytes: 86234943 num_examples: 460504 download_size: 39669021 dataset_size: 86234943 - config_name: si-bn splits: - name: train num_bytes: 9399788 num_examples: 47488 download_size: 4113239 dataset_size: 9399788 - config_name: si-bs splits: - name: train num_bytes: 36193901 num_examples: 219197 download_size: 17777566 dataset_size: 36193901 - config_name: si-ca splits: - name: train num_bytes: 1346430 num_examples: 8488 download_size: 669721 dataset_size: 1346430 - config_name: si-cs splits: - name: train num_bytes: 74604851 num_examples: 454257 download_size: 36670813 dataset_size: 74604851 - config_name: si-da splits: - name: train num_bytes: 59345052 num_examples: 358142 download_size: 28476367 dataset_size: 59345052 - config_name: si-de splits: - name: train num_bytes: 55220250 num_examples: 324862 download_size: 26729654 dataset_size: 55220250 - config_name: si-el splits: - name: train num_bytes: 101008167 num_examples: 466440 download_size: 41444085 dataset_size: 101008167 - config_name: si-en splits: - name: train num_bytes: 95095311 num_examples: 601164 download_size: 45540052 dataset_size: 95095311 - config_name: si-es splits: - name: train num_bytes: 83786864 num_examples: 512221 download_size: 40580333 dataset_size: 83786864 - config_name: si-et splits: - name: train num_bytes: 54413124 num_examples: 331221 download_size: 26587114 dataset_size: 54413124 - config_name: si-eu splits: - name: train num_bytes: 5612825 num_examples: 34559 download_size: 2768890 dataset_size: 5612825 - config_name: si-fa splits: - name: train num_bytes: 51605023 num_examples: 292777 download_size: 23948500 dataset_size: 51605023 - config_name: si-fi splits: - name: train num_bytes: 66518418 num_examples: 391991 download_size: 32070537 dataset_size: 66518418 - config_name: si-fr splits: - name: train num_bytes: 66445847 num_examples: 393481 download_size: 32060790 dataset_size: 66445847 - config_name: si-gl splits: - name: train num_bytes: 662297 num_examples: 4162 download_size: 338642 dataset_size: 662297 - config_name: si-he splits: - name: train num_bytes: 77642623 num_examples: 435865 download_size: 35869186 dataset_size: 77642623 - config_name: si-hi splits: - name: train num_bytes: 2405564 num_examples: 11405 download_size: 1057541 dataset_size: 2405564 - config_name: si-hr splits: - name: train num_bytes: 69526977 num_examples: 428365 download_size: 34003251 dataset_size: 69526977 - config_name: si-hu splits: - name: train num_bytes: 76230892 num_examples: 460994 download_size: 37382220 dataset_size: 76230892 - config_name: si-id splits: - name: train num_bytes: 59489616 num_examples: 366000 download_size: 28222675 dataset_size: 59489616 - config_name: si-is splits: - name: train num_bytes: 6861993 num_examples: 41951 download_size: 3403076 dataset_size: 6861993 - config_name: si-it splits: - name: train num_bytes: 61080429 num_examples: 366973 download_size: 29735399 dataset_size: 61080429 - config_name: si-ja splits: - name: train num_bytes: 31421638 num_examples: 162957 download_size: 13120285 dataset_size: 31421638 - config_name: si-ka splits: - name: train num_bytes: 1784023 num_examples: 7962 download_size: 678723 dataset_size: 1784023 - config_name: si-ko splits: - name: train num_bytes: 11281220 num_examples: 58662 download_size: 4864805 dataset_size: 11281220 - config_name: si-lt splits: - name: train num_bytes: 19758460 num_examples: 106533 download_size: 8561344 dataset_size: 19758460 - config_name: si-lv splits: - name: train num_bytes: 6460487 num_examples: 34416 download_size: 2809860 dataset_size: 6460487 - config_name: si-mk splits: - name: train num_bytes: 37126002 num_examples: 172595 download_size: 15221060 dataset_size: 37126002 - config_name: si-ml splits: - name: train num_bytes: 7333430 num_examples: 28013 download_size: 2791684 dataset_size: 7333430 - config_name: si-ms splits: - name: train num_bytes: 37899575 num_examples: 204061 download_size: 15665534 dataset_size: 37899575 - config_name: si-nl splits: - name: train num_bytes: 80378326 num_examples: 410919 download_size: 33830860 dataset_size: 80378326 - config_name: si-no splits: - name: train num_bytes: 49172140 num_examples: 261198 download_size: 20692726 dataset_size: 49172140 - config_name: si-pl splits: - name: train num_bytes: 87324110 num_examples: 448798 download_size: 37246728 dataset_size: 87324110 - config_name: si-pt splits: - name: train num_bytes: 85746826 num_examples: 450398 download_size: 36114478 dataset_size: 85746826 - config_name: si-ro splits: - name: train num_bytes: 82673216 num_examples: 504236 download_size: 40042131 dataset_size: 82673216 - config_name: si-ru splits: - name: train num_bytes: 64120258 num_examples: 340105 download_size: 30017442 dataset_size: 64120258 - config_name: si-sk splits: - name: train num_bytes: 23066288 num_examples: 143855 download_size: 11469088 dataset_size: 23066288 - config_name: si-sl splits: - name: train num_bytes: 61867122 num_examples: 376756 download_size: 30115709 dataset_size: 61867122 - config_name: si-sq splits: - name: train num_bytes: 16869827 num_examples: 103115 download_size: 8307150 dataset_size: 16869827 - config_name: si-sr splits: - name: train num_bytes: 74979965 num_examples: 448397 download_size: 36404278 dataset_size: 74979965 - config_name: si-sv splits: - name: train num_bytes: 64274630 num_examples: 378086 download_size: 30744860 dataset_size: 64274630 - config_name: si-ta splits: - name: train num_bytes: 1385229 num_examples: 6331 download_size: 579247 dataset_size: 1385229 - config_name: si-te splits: - name: train num_bytes: 392313 num_examples: 1847 download_size: 173312 dataset_size: 392313 - config_name: si-th splits: - name: train num_bytes: 22625759 num_examples: 109375 download_size: 9837006 dataset_size: 22625759 - config_name: si-tl splits: - name: train num_bytes: 517937 num_examples: 3021 download_size: 264752 dataset_size: 517937 - config_name: si-tr splits: - name: train num_bytes: 80830433 num_examples: 492120 download_size: 39111144 dataset_size: 80830433 - config_name: si-uk splits: - name: train num_bytes: 3660670 num_examples: 20037 download_size: 1734956 dataset_size: 3660670 - config_name: si-ur splits: - name: train num_bytes: 881167 num_examples: 4949 download_size: 408476 dataset_size: 881167 - config_name: si-vi splits: - name: train num_bytes: 36256714 num_examples: 210151 download_size: 17211993 dataset_size: 36256714 - config_name: sk-af splits: - name: train num_bytes: 596140 num_examples: 4713 download_size: 368093 dataset_size: 596140 - config_name: sk-ar splits: - name: train num_bytes: 808043498 num_examples: 5914026 download_size: 449390568 dataset_size: 808043498 - config_name: sk-bg splits: - name: train num_bytes: 955986971 num_examples: 6861749 download_size: 517649170 dataset_size: 955986971 - config_name: sk-bn splits: - name: train num_bytes: 19230434 num_examples: 123258 download_size: 9605086 dataset_size: 19230434 - config_name: sk-bs splits: - name: train num_bytes: 296138066 num_examples: 2581382 download_size: 178448740 dataset_size: 296138066 - config_name: sk-ca splits: - name: train num_bytes: 11843684 num_examples: 103505 download_size: 7086512 dataset_size: 11843684 - config_name: sk-cs splits: - name: train num_bytes: 809217151 num_examples: 7060065 download_size: 482587712 dataset_size: 809217151 - config_name: sk-da splits: - name: train num_bytes: 404552954 num_examples: 3464136 download_size: 237985927 dataset_size: 404552954 - config_name: sk-de splits: - name: train num_bytes: 456501829 num_examples: 3764288 download_size: 268030078 dataset_size: 456501829 - config_name: sk-el splits: - name: train num_bytes: 1119152328 num_examples: 6600893 download_size: 519574339 dataset_size: 1119152328 - config_name: sk-en splits: - name: train num_bytes: 1016735266 num_examples: 8850871 download_size: 590210362 dataset_size: 1016735266 - config_name: sk-es splits: - name: train num_bytes: 934981285 num_examples: 7895925 download_size: 549355348 dataset_size: 934981285 - config_name: sk-et splits: - name: train num_bytes: 382375735 num_examples: 3309749 download_size: 227783019 dataset_size: 382375735 - config_name: sk-eu splits: - name: train num_bytes: 24486208 num_examples: 215375 download_size: 14796872 dataset_size: 24486208 - config_name: sk-fa splits: - name: train num_bytes: 209629756 num_examples: 1518563 download_size: 115176817 dataset_size: 209629756 - config_name: sk-fi splits: - name: train num_bytes: 608740847 num_examples: 5114203 download_size: 361318790 dataset_size: 608740847 - config_name: sk-fr splits: - name: train num_bytes: 712758434 num_examples: 5922885 download_size: 418014848 dataset_size: 712758434 - config_name: sk-gl splits: - name: train num_bytes: 2582050 num_examples: 22737 download_size: 1591134 dataset_size: 2582050 - config_name: sk-he splits: - name: train num_bytes: 737560614 num_examples: 5616165 download_size: 404611922 dataset_size: 737560614 - config_name: sk-hi splits: - name: train num_bytes: 4893412 num_examples: 31179 download_size: 2437751 dataset_size: 4893412 - config_name: sk-hr splits: - name: train num_bytes: 695516533 num_examples: 6019369 download_size: 416873699 dataset_size: 695516533 - config_name: sk-hu splits: - name: train num_bytes: 819006347 num_examples: 6978464 download_size: 488927989 dataset_size: 819006347 - config_name: sk-hy splits: - name: train num_bytes: 6489 num_examples: 48 download_size: 8659 dataset_size: 6489 - config_name: sk-id splits: - name: train num_bytes: 277169625 num_examples: 2366850 download_size: 158339131 dataset_size: 277169625 - config_name: sk-is splits: - name: train num_bytes: 50563162 num_examples: 436582 download_size: 30286130 dataset_size: 50563162 - config_name: sk-it splits: - name: train num_bytes: 670523336 num_examples: 5596374 download_size: 395868066 dataset_size: 670523336 - config_name: sk-ja splits: - name: train num_bytes: 85369404 num_examples: 579888 download_size: 41335203 dataset_size: 85369404 - config_name: sk-ka splits: - name: train num_bytes: 7190107 num_examples: 39971 download_size: 3085666 dataset_size: 7190107 - config_name: sk-kk splits: - name: train num_bytes: 142109 num_examples: 981 download_size: 70307 dataset_size: 142109 - config_name: sk-ko splits: - name: train num_bytes: 44661401 num_examples: 306616 download_size: 22306050 dataset_size: 44661401 - config_name: sk-lt splits: - name: train num_bytes: 77516074 num_examples: 566875 download_size: 39102225 dataset_size: 77516074 - config_name: sk-lv splits: - name: train num_bytes: 30412290 num_examples: 224153 download_size: 15280371 dataset_size: 30412290 - config_name: sk-mk splits: - name: train num_bytes: 172487559 num_examples: 1067661 download_size: 79786706 dataset_size: 172487559 - config_name: sk-ml splits: - name: train num_bytes: 19677520 num_examples: 92685 download_size: 8173584 dataset_size: 19677520 - config_name: sk-ms splits: - name: train num_bytes: 87399132 num_examples: 631114 download_size: 41690887 dataset_size: 87399132 - config_name: sk-nl splits: - name: train num_bytes: 896652608 num_examples: 6196568 download_size: 438561307 dataset_size: 896652608 - config_name: sk-no splits: - name: train num_bytes: 250701379 num_examples: 1787610 download_size: 122783218 dataset_size: 250701379 - config_name: sk-pl splits: - name: train num_bytes: 1008831523 num_examples: 7041596 download_size: 501078066 dataset_size: 1008831523 - config_name: sk-pt splits: - name: train num_bytes: 837169151 num_examples: 5864940 download_size: 409375516 dataset_size: 837169151 - config_name: sk-ro splits: - name: train num_bytes: 884528028 num_examples: 7527752 download_size: 521073099 dataset_size: 884528028 - config_name: sk-ru splits: - name: train num_bytes: 606429181 num_examples: 4320499 download_size: 331817964 dataset_size: 606429181 - config_name: sk-si splits: - name: train num_bytes: 23066288 num_examples: 143855 download_size: 11469088 dataset_size: 23066288 - config_name: sk-sl splits: - name: train num_bytes: 505988669 num_examples: 4433599 download_size: 304286405 dataset_size: 505988669 - config_name: sk-sq splits: - name: train num_bytes: 74677972 num_examples: 643261 download_size: 44499142 dataset_size: 74677972 - config_name: sk-sr splits: - name: train num_bytes: 728506659 num_examples: 6218597 download_size: 433916105 dataset_size: 728506659 - config_name: sk-sv splits: - name: train num_bytes: 429043159 num_examples: 3628761 download_size: 251947641 dataset_size: 429043159 - config_name: sk-ta splits: - name: train num_bytes: 1702138 num_examples: 9699 download_size: 793238 dataset_size: 1702138 - config_name: sk-te splits: - name: train num_bytes: 1420681 num_examples: 8215 download_size: 706234 dataset_size: 1420681 - config_name: sk-th splits: - name: train num_bytes: 166452065 num_examples: 977971 download_size: 81730102 dataset_size: 166452065 - config_name: sk-tl splits: - name: train num_bytes: 126393 num_examples: 1183 download_size: 76443 dataset_size: 126393 - config_name: sk-tr splits: - name: train num_bytes: 843834394 num_examples: 7149752 download_size: 494693796 dataset_size: 843834394 - config_name: sk-uk splits: - name: train num_bytes: 29189314 num_examples: 215353 download_size: 16219782 dataset_size: 29189314 - config_name: sk-ur splits: - name: train num_bytes: 1415304 num_examples: 9966 download_size: 787648 dataset_size: 1415304 - config_name: sk-vi splits: - name: train num_bytes: 122553287 num_examples: 972827 download_size: 69716226 dataset_size: 122553287 - config_name: sl-af splits: - name: train num_bytes: 1262930 num_examples: 11073 download_size: 772741 dataset_size: 1262930 - config_name: sl-ar splits: - name: train num_bytes: 1956994640 num_examples: 14469640 download_size: 1079805655 dataset_size: 1956994640 - config_name: sl-bg splits: - name: train num_bytes: 2288297404 num_examples: 16597568 download_size: 1229293152 dataset_size: 2288297404 - config_name: sl-bn splits: - name: train num_bytes: 43206384 num_examples: 278003 download_size: 21297300 dataset_size: 43206384 - config_name: sl-bs splits: - name: train num_bytes: 774023484 num_examples: 6947809 download_size: 463618907 dataset_size: 774023484 - config_name: sl-ca splits: - name: train num_bytes: 33857371 num_examples: 290837 download_size: 20012268 dataset_size: 33857371 - config_name: sl-cs splits: - name: train num_bytes: 1908319335 num_examples: 16605949 download_size: 1140671701 dataset_size: 1908319335 - config_name: sl-da splits: - name: train num_bytes: 1062503824 num_examples: 9368478 download_size: 621255310 dataset_size: 1062503824 - config_name: sl-de splits: - name: train num_bytes: 1161043365 num_examples: 9827875 download_size: 678336472 dataset_size: 1161043365 - config_name: sl-el splits: - name: train num_bytes: 2641161518 num_examples: 15731333 download_size: 1214813209 dataset_size: 2641161518 - config_name: sl-en splits: - name: train num_bytes: 2244809408 num_examples: 19641457 download_size: 1298201375 dataset_size: 2244809408 - config_name: sl-es splits: - name: train num_bytes: 2106853891 num_examples: 18058017 download_size: 1232811547 dataset_size: 2106853891 - config_name: sl-et splits: - name: train num_bytes: 975198798 num_examples: 8677449 download_size: 578467438 dataset_size: 975198798 - config_name: sl-eu splits: - name: train num_bytes: 55101632 num_examples: 495622 download_size: 32969200 dataset_size: 55101632 - config_name: sl-fa splits: - name: train num_bytes: 541297797 num_examples: 3932093 download_size: 293957151 dataset_size: 541297797 - config_name: sl-fi splits: - name: train num_bytes: 1565930924 num_examples: 13576268 download_size: 922410965 dataset_size: 1565930924 - config_name: sl-fr splits: - name: train num_bytes: 1711482681 num_examples: 14517180 download_size: 997076832 dataset_size: 1711482681 - config_name: sl-gl splits: - name: train num_bytes: 6629311 num_examples: 57054 download_size: 4036786 dataset_size: 6629311 - config_name: sl-he splits: - name: train num_bytes: 1878776461 num_examples: 14422346 download_size: 1021548451 dataset_size: 1878776461 - config_name: sl-hi splits: - name: train num_bytes: 6810276 num_examples: 43385 download_size: 3338906 dataset_size: 6810276 - config_name: sl-hr splits: - name: train num_bytes: 1747642664 num_examples: 15636933 download_size: 1038713163 dataset_size: 1747642664 - config_name: sl-hu splits: - name: train num_bytes: 1850844233 num_examples: 16006005 download_size: 1101304863 dataset_size: 1850844233 - config_name: sl-hy splits: - name: train num_bytes: 22697 num_examples: 166 download_size: 17625 dataset_size: 22697 - config_name: sl-id splits: - name: train num_bytes: 624128623 num_examples: 5398476 download_size: 354674891 dataset_size: 624128623 - config_name: sl-is splits: - name: train num_bytes: 144379426 num_examples: 1297198 download_size: 85505628 dataset_size: 144379426 - config_name: sl-it splits: - name: train num_bytes: 1469188163 num_examples: 12452119 download_size: 865250270 dataset_size: 1469188163 - config_name: sl-ja splits: - name: train num_bytes: 209268379 num_examples: 1430110 download_size: 99921885 dataset_size: 209268379 - config_name: sl-ka splits: - name: train num_bytes: 25105183 num_examples: 138944 download_size: 10561598 dataset_size: 25105183 - config_name: sl-kk splits: - name: train num_bytes: 315809 num_examples: 2157 download_size: 151509 dataset_size: 315809 - config_name: sl-ko splits: - name: train num_bytes: 111726172 num_examples: 759146 download_size: 55313166 dataset_size: 111726172 - config_name: sl-lt splits: - name: train num_bytes: 163274512 num_examples: 1222145 download_size: 81350181 dataset_size: 163274512 - config_name: sl-lv splits: - name: train num_bytes: 53642020 num_examples: 405728 download_size: 26677047 dataset_size: 53642020 - config_name: sl-mk splits: - name: train num_bytes: 444808579 num_examples: 2791500 download_size: 202154202 dataset_size: 444808579 - config_name: sl-ml splits: - name: train num_bytes: 46421042 num_examples: 220558 download_size: 18940801 dataset_size: 46421042 - config_name: sl-ms splits: - name: train num_bytes: 175110545 num_examples: 1275879 download_size: 82506421 dataset_size: 175110545 - config_name: sl-nl splits: - name: train num_bytes: 2204849714 num_examples: 15568474 download_size: 1065859748 dataset_size: 2204849714 - config_name: sl-no splits: - name: train num_bytes: 708474521 num_examples: 5166742 download_size: 342326878 dataset_size: 708474521 - config_name: sl-pl splits: - name: train num_bytes: 2369220483 num_examples: 16744288 download_size: 1166575859 dataset_size: 2369220483 - config_name: sl-pt splits: - name: train num_bytes: 2204993463 num_examples: 15691731 download_size: 1065122731 dataset_size: 2204993463 - config_name: sl-ro splits: - name: train num_bytes: 2108760709 num_examples: 18166994 download_size: 1235868699 dataset_size: 2108760709 - config_name: sl-ru splits: - name: train num_bytes: 1351654761 num_examples: 9614740 download_size: 736165265 dataset_size: 1351654761 - config_name: sl-si splits: - name: train num_bytes: 61867122 num_examples: 376756 download_size: 30115709 dataset_size: 61867122 - config_name: sl-sk splits: - name: train num_bytes: 505988669 num_examples: 4433599 download_size: 304286405 dataset_size: 505988669 - config_name: sl-sq splits: - name: train num_bytes: 170526923 num_examples: 1514293 download_size: 100478051 dataset_size: 170526923 - config_name: sl-sr splits: - name: train num_bytes: 1872558548 num_examples: 16426054 download_size: 1104115316 dataset_size: 1872558548 - config_name: sl-sv splits: - name: train num_bytes: 1169482639 num_examples: 10176971 download_size: 682588446 dataset_size: 1169482639 - config_name: sl-ta splits: - name: train num_bytes: 2677277 num_examples: 15696 download_size: 1237533 dataset_size: 2677277 - config_name: sl-te splits: - name: train num_bytes: 2434343 num_examples: 15162 download_size: 1191128 dataset_size: 2434343 - config_name: sl-th splits: - name: train num_bytes: 347863035 num_examples: 2041322 download_size: 168958118 dataset_size: 347863035 - config_name: sl-tl splits: - name: train num_bytes: 752083 num_examples: 6441 download_size: 454996 dataset_size: 752083 - config_name: sl-tr splits: - name: train num_bytes: 2006776523 num_examples: 17243310 download_size: 1169481674 dataset_size: 2006776523 - config_name: sl-uk splits: - name: train num_bytes: 66543184 num_examples: 489577 download_size: 36722509 dataset_size: 66543184 - config_name: sl-ur splits: - name: train num_bytes: 2610336 num_examples: 18444 download_size: 1445821 dataset_size: 2610336 - config_name: sl-vi splits: - name: train num_bytes: 304962075 num_examples: 2439417 download_size: 170809635 dataset_size: 304962075 - config_name: sq-af splits: - name: train num_bytes: 120404 num_examples: 1078 download_size: 77871 dataset_size: 120404 - config_name: sq-ar splits: - name: train num_bytes: 209503571 num_examples: 1548085 download_size: 115066482 dataset_size: 209503571 - config_name: sq-bg splits: - name: train num_bytes: 227359878 num_examples: 1653774 download_size: 121756041 dataset_size: 227359878 - config_name: sq-bn splits: - name: train num_bytes: 16728900 num_examples: 103831 download_size: 8197369 dataset_size: 16728900 - config_name: sq-bs splits: - name: train num_bytes: 113752846 num_examples: 994688 download_size: 67461324 dataset_size: 113752846 - config_name: sq-ca splits: - name: train num_bytes: 4616422 num_examples: 38801 download_size: 2711281 dataset_size: 4616422 - config_name: sq-cs splits: - name: train num_bytes: 192672998 num_examples: 1668326 download_size: 113932942 dataset_size: 192672998 - config_name: sq-da splits: - name: train num_bytes: 154650977 num_examples: 1328813 download_size: 89388178 dataset_size: 154650977 - config_name: sq-de splits: - name: train num_bytes: 129442137 num_examples: 1073832 download_size: 75337053 dataset_size: 129442137 - config_name: sq-el splits: - name: train num_bytes: 281729405 num_examples: 1689873 download_size: 128591842 dataset_size: 281729405 - config_name: sq-en splits: - name: train num_bytes: 215347638 num_examples: 1903990 download_size: 122671967 dataset_size: 215347638 - config_name: sq-es splits: - name: train num_bytes: 206237301 num_examples: 1774185 download_size: 119322615 dataset_size: 206237301 - config_name: sq-et splits: - name: train num_bytes: 161621933 num_examples: 1394116 download_size: 94899545 dataset_size: 161621933 - config_name: sq-eu splits: - name: train num_bytes: 6695153 num_examples: 57894 download_size: 3939135 dataset_size: 6695153 - config_name: sq-fa splits: - name: train num_bytes: 112670101 num_examples: 811976 download_size: 60925128 dataset_size: 112670101 - config_name: sq-fi splits: - name: train num_bytes: 175036983 num_examples: 1480967 download_size: 102174254 dataset_size: 175036983 - config_name: sq-fr splits: - name: train num_bytes: 175302865 num_examples: 1474957 download_size: 101403285 dataset_size: 175302865 - config_name: sq-gl splits: - name: train num_bytes: 1431243 num_examples: 12458 download_size: 866314 dataset_size: 1431243 - config_name: sq-he splits: - name: train num_bytes: 202278128 num_examples: 1557714 download_size: 109561454 dataset_size: 202278128 - config_name: sq-hi splits: - name: train num_bytes: 4423950 num_examples: 27301 download_size: 2149717 dataset_size: 4423950 - config_name: sq-hr splits: - name: train num_bytes: 182156405 num_examples: 1607729 download_size: 107067565 dataset_size: 182156405 - config_name: sq-hu splits: - name: train num_bytes: 191707585 num_examples: 1656527 download_size: 112931859 dataset_size: 191707585 - config_name: sq-hy splits: - name: train num_bytes: 17233 num_examples: 120 download_size: 14588 dataset_size: 17233 - config_name: sq-id splits: - name: train num_bytes: 127530097 num_examples: 1085165 download_size: 71944675 dataset_size: 127530097 - config_name: sq-is splits: - name: train num_bytes: 41881250 num_examples: 346875 download_size: 24437562 dataset_size: 41881250 - config_name: sq-it splits: - name: train num_bytes: 154767933 num_examples: 1317228 download_size: 90237983 dataset_size: 154767933 - config_name: sq-ja splits: - name: train num_bytes: 41730136 num_examples: 280216 download_size: 19959571 dataset_size: 41730136 - config_name: sq-ka splits: - name: train num_bytes: 8307069 num_examples: 44751 download_size: 3516832 dataset_size: 8307069 - config_name: sq-ko splits: - name: train num_bytes: 13862949 num_examples: 93617 download_size: 6773153 dataset_size: 13862949 - config_name: sq-lt splits: - name: train num_bytes: 56487293 num_examples: 400606 download_size: 27863354 dataset_size: 56487293 - config_name: sq-lv splits: - name: train num_bytes: 21952741 num_examples: 157229 download_size: 10835312 dataset_size: 21952741 - config_name: sq-mk splits: - name: train num_bytes: 114430301 num_examples: 696310 download_size: 52059527 dataset_size: 114430301 - config_name: sq-ml splits: - name: train num_bytes: 14645597 num_examples: 67813 download_size: 5991789 dataset_size: 14645597 - config_name: sq-ms splits: - name: train num_bytes: 57324616 num_examples: 403811 download_size: 26873094 dataset_size: 57324616 - config_name: sq-nl splits: - name: train num_bytes: 229813924 num_examples: 1601437 download_size: 110360527 dataset_size: 229813924 - config_name: sq-no splits: - name: train num_bytes: 117701159 num_examples: 835802 download_size: 56766057 dataset_size: 117701159 - config_name: sq-pl splits: - name: train num_bytes: 236555537 num_examples: 1660405 download_size: 115604306 dataset_size: 236555537 - config_name: sq-pt splits: - name: train num_bytes: 240482490 num_examples: 1703896 download_size: 115154737 dataset_size: 240482490 - config_name: sq-ro splits: - name: train num_bytes: 208814677 num_examples: 1796074 download_size: 120900455 dataset_size: 208814677 - config_name: sq-ru splits: - name: train num_bytes: 146189175 num_examples: 1056444 download_size: 79776110 dataset_size: 146189175 - config_name: sq-si splits: - name: train num_bytes: 16869827 num_examples: 103115 download_size: 8307150 dataset_size: 16869827 - config_name: sq-sk splits: - name: train num_bytes: 74677972 num_examples: 643261 download_size: 44499142 dataset_size: 74677972 - config_name: sq-sl splits: - name: train num_bytes: 170526923 num_examples: 1514293 download_size: 100478051 dataset_size: 170526923 - config_name: sq-sr splits: - name: train num_bytes: 198190793 num_examples: 1687576 download_size: 114741291 dataset_size: 198190793 - config_name: sq-sv splits: - name: train num_bytes: 150105163 num_examples: 1265210 download_size: 86829915 dataset_size: 150105163 - config_name: sq-ta splits: - name: train num_bytes: 2102272 num_examples: 12068 download_size: 965286 dataset_size: 2102272 - config_name: sq-te splits: - name: train num_bytes: 1637018 num_examples: 9924 download_size: 791994 dataset_size: 1637018 - config_name: sq-th splits: - name: train num_bytes: 65222651 num_examples: 386006 download_size: 31987177 dataset_size: 65222651 - config_name: sq-tl splits: - name: train num_bytes: 569505 num_examples: 4449 download_size: 336296 dataset_size: 569505 - config_name: sq-tr splits: - name: train num_bytes: 207308159 num_examples: 1775111 download_size: 119625759 dataset_size: 207308159 - config_name: sq-uk splits: - name: train num_bytes: 11979355 num_examples: 87397 download_size: 6673091 dataset_size: 11979355 - config_name: sq-ur splits: - name: train num_bytes: 1202975 num_examples: 8138 download_size: 654119 dataset_size: 1202975 - config_name: sq-vi splits: - name: train num_bytes: 77378954 num_examples: 606767 download_size: 43137798 dataset_size: 77378954 - config_name: sr-af splits: - name: train num_bytes: 1964264 num_examples: 17396 download_size: 1198323 dataset_size: 1964264 - config_name: sr-ar splits: - name: train num_bytes: 2919647936 num_examples: 21116415 download_size: 1600128717 dataset_size: 2919647936 - config_name: sr-bg splits: - name: train num_bytes: 4005554245 num_examples: 28306263 download_size: 2137616652 dataset_size: 4005554245 - config_name: sr-bn splits: - name: train num_bytes: 47351885 num_examples: 288856 download_size: 22999739 dataset_size: 47351885 - config_name: sr-bs splits: - name: train num_bytes: 1240461792 num_examples: 11207177 download_size: 725112274 dataset_size: 1240461792 - config_name: sr-ca splits: - name: train num_bytes: 44348997 num_examples: 372630 download_size: 25902854 dataset_size: 44348997 - config_name: sr-cs splits: - name: train num_bytes: 3357068079 num_examples: 28272036 download_size: 1988355456 dataset_size: 3357068079 - config_name: sr-da splits: - name: train num_bytes: 1348248860 num_examples: 11444039 download_size: 781595266 dataset_size: 1348248860 - config_name: sr-de splits: - name: train num_bytes: 1805114230 num_examples: 14843597 download_size: 1047418207 dataset_size: 1805114230 - config_name: sr-el splits: - name: train num_bytes: 4773576405 num_examples: 27873774 download_size: 2191070533 dataset_size: 4773576405 - config_name: sr-en splits: - name: train num_bytes: 4986633944 num_examples: 42635098 download_size: 2859258243 dataset_size: 4986633944 - config_name: sr-es splits: - name: train num_bytes: 4420284368 num_examples: 36740123 download_size: 2566679673 dataset_size: 4420284368 - config_name: sr-et splits: - name: train num_bytes: 1209801207 num_examples: 10368386 download_size: 711087537 dataset_size: 1209801207 - config_name: sr-eu splits: - name: train num_bytes: 71309377 num_examples: 615814 download_size: 42056475 dataset_size: 71309377 - config_name: sr-fa splits: - name: train num_bytes: 655341255 num_examples: 4628679 download_size: 352768566 dataset_size: 655341255 - config_name: sr-fi splits: - name: train num_bytes: 2425281902 num_examples: 20217691 download_size: 1418788373 dataset_size: 2425281902 - config_name: sr-fr splits: - name: train num_bytes: 3310744904 num_examples: 27256897 download_size: 1913557283 dataset_size: 3310744904 - config_name: sr-gl splits: - name: train num_bytes: 12473896 num_examples: 104409 download_size: 7458752 dataset_size: 12473896 - config_name: sr-he splits: - name: train num_bytes: 2901939549 num_examples: 21620148 download_size: 1567275465 dataset_size: 2901939549 - config_name: sr-hi splits: - name: train num_bytes: 10554867 num_examples: 63706 download_size: 5108540 dataset_size: 10554867 - config_name: sr-hr splits: - name: train num_bytes: 2955309013 num_examples: 26643127 download_size: 1718353413 dataset_size: 2955309013 - config_name: sr-hu splits: - name: train num_bytes: 3556699160 num_examples: 29841465 download_size: 2096524852 dataset_size: 3556699160 - config_name: sr-hy splits: - name: train num_bytes: 516507 num_examples: 3118 download_size: 301729 dataset_size: 516507 - config_name: sr-id splits: - name: train num_bytes: 792744886 num_examples: 6631037 download_size: 445349843 dataset_size: 792744886 - config_name: sr-is splits: - name: train num_bytes: 156311618 num_examples: 1350101 download_size: 92187916 dataset_size: 156311618 - config_name: sr-it splits: - name: train num_bytes: 2662719247 num_examples: 21905809 download_size: 1554998886 dataset_size: 2662719247 - config_name: sr-ja splits: - name: train num_bytes: 240386033 num_examples: 1597196 download_size: 114056604 dataset_size: 240386033 - config_name: sr-ka splits: - name: train num_bytes: 30950809 num_examples: 167048 download_size: 13008013 dataset_size: 30950809 - config_name: sr-kk splits: - name: train num_bytes: 535995 num_examples: 3424 download_size: 250723 dataset_size: 535995 - config_name: sr-ko splits: - name: train num_bytes: 147633153 num_examples: 975262 download_size: 72716732 dataset_size: 147633153 - config_name: sr-lt splits: - name: train num_bytes: 174067870 num_examples: 1250593 download_size: 86121552 dataset_size: 174067870 - config_name: sr-lv splits: - name: train num_bytes: 63117048 num_examples: 454845 download_size: 31181641 dataset_size: 63117048 - config_name: sr-mk splits: - name: train num_bytes: 522671884 num_examples: 3223444 download_size: 235263670 dataset_size: 522671884 - config_name: sr-ml splits: - name: train num_bytes: 60325315 num_examples: 282168 download_size: 24547545 dataset_size: 60325315 - config_name: sr-ms splits: - name: train num_bytes: 205703294 num_examples: 1444162 download_size: 96137539 dataset_size: 205703294 - config_name: sr-nl splits: - name: train num_bytes: 3841706813 num_examples: 26302268 download_size: 1853141017 dataset_size: 3841706813 - config_name: sr-no splits: - name: train num_bytes: 927894774 num_examples: 6553801 download_size: 447123478 dataset_size: 927894774 - config_name: sr-pl splits: - name: train num_bytes: 4256902775 num_examples: 29228682 download_size: 2091199963 dataset_size: 4256902775 - config_name: sr-pt splits: - name: train num_bytes: 3503444935 num_examples: 24285032 download_size: 1688529380 dataset_size: 3503444935 - config_name: sr-ro splits: - name: train num_bytes: 4058923584 num_examples: 34011083 download_size: 2357548703 dataset_size: 4058923584 - config_name: sr-ru splits: - name: train num_bytes: 2273395571 num_examples: 15749685 download_size: 1228103655 dataset_size: 2273395571 - config_name: sr-si splits: - name: train num_bytes: 74979965 num_examples: 448397 download_size: 36404278 dataset_size: 74979965 - config_name: sr-sk splits: - name: train num_bytes: 728506659 num_examples: 6218597 download_size: 433916105 dataset_size: 728506659 - config_name: sr-sl splits: - name: train num_bytes: 1872558548 num_examples: 16426054 download_size: 1104115316 dataset_size: 1872558548 - config_name: sr-sq splits: - name: train num_bytes: 198190793 num_examples: 1687576 download_size: 114741291 dataset_size: 198190793 - config_name: sr-sv splits: - name: train num_bytes: 1571982290 num_examples: 13191137 download_size: 910091826 dataset_size: 1571982290 - config_name: sr-ta splits: - name: train num_bytes: 3854254 num_examples: 21250 download_size: 1755349 dataset_size: 3854254 - config_name: sr-te splits: - name: train num_bytes: 3196147 num_examples: 18434 download_size: 1527240 dataset_size: 3196147 - config_name: sr-th splits: - name: train num_bytes: 424637895 num_examples: 2430041 download_size: 205780284 dataset_size: 424637895 - config_name: sr-tl splits: - name: train num_bytes: 712750 num_examples: 5513 download_size: 424892 dataset_size: 712750 - config_name: sr-tr splits: - name: train num_bytes: 3713226546 num_examples: 31001496 download_size: 2146832048 dataset_size: 3713226546 - config_name: sr-uk splits: - name: train num_bytes: 86742051 num_examples: 620082 download_size: 47400768 dataset_size: 86742051 - config_name: sr-ur splits: - name: train num_bytes: 3080116 num_examples: 21253 download_size: 1688348 dataset_size: 3080116 - config_name: sr-vi splits: - name: train num_bytes: 357549026 num_examples: 2765729 download_size: 198225584 dataset_size: 357549026 - config_name: sv-af splits: - name: train num_bytes: 860918 num_examples: 7704 download_size: 520719 dataset_size: 860918 - config_name: sv-ar splits: - name: train num_bytes: 1710374910 num_examples: 12276924 download_size: 922290818 dataset_size: 1710374910 - config_name: sv-bg splits: - name: train num_bytes: 1952701548 num_examples: 13699784 download_size: 1027999551 dataset_size: 1952701548 - config_name: sv-bn splits: - name: train num_bytes: 44280487 num_examples: 275980 download_size: 21415654 dataset_size: 44280487 - config_name: sv-bs splits: - name: train num_bytes: 658528949 num_examples: 5637922 download_size: 386326813 dataset_size: 658528949 - config_name: sv-ca splits: - name: train num_bytes: 30067859 num_examples: 249711 download_size: 17325163 dataset_size: 30067859 - config_name: sv-cs splits: - name: train num_bytes: 1772898290 num_examples: 14880023 download_size: 1035154954 dataset_size: 1772898290 - config_name: sv-da splits: - name: train num_bytes: 1146106544 num_examples: 10048583 download_size: 648566400 dataset_size: 1146106544 - config_name: sv-de splits: - name: train num_bytes: 1171363604 num_examples: 9657407 download_size: 667069408 dataset_size: 1171363604 - config_name: sv-el splits: - name: train num_bytes: 2369270401 num_examples: 13735943 download_size: 1072737721 dataset_size: 2369270401 - config_name: sv-en splits: - name: train num_bytes: 2093174228 num_examples: 17660152 download_size: 1183519631 dataset_size: 2093174228 - config_name: sv-es splits: - name: train num_bytes: 1930096122 num_examples: 16028440 download_size: 1104241656 dataset_size: 1930096122 - config_name: sv-et splits: - name: train num_bytes: 861307088 num_examples: 7365540 download_size: 499940673 dataset_size: 861307088 - config_name: sv-eu splits: - name: train num_bytes: 60282463 num_examples: 528236 download_size: 35055766 dataset_size: 60282463 - config_name: sv-fa splits: - name: train num_bytes: 535000989 num_examples: 3784463 download_size: 283997947 dataset_size: 535000989 - config_name: sv-fi splits: - name: train num_bytes: 1672821349 num_examples: 14356833 download_size: 955354120 dataset_size: 1672821349 - config_name: sv-fr splits: - name: train num_bytes: 1543030020 num_examples: 12708915 download_size: 877896446 dataset_size: 1543030020 - config_name: sv-gl splits: - name: train num_bytes: 5435889 num_examples: 46351 download_size: 3235424 dataset_size: 5435889 - config_name: sv-he splits: - name: train num_bytes: 1661562661 num_examples: 12304434 download_size: 884094810 dataset_size: 1661562661 - config_name: sv-hi splits: - name: train num_bytes: 8370311 num_examples: 52116 download_size: 4030313 dataset_size: 8370311 - config_name: sv-hr splits: - name: train num_bytes: 1517568386 num_examples: 12981919 download_size: 884182004 dataset_size: 1517568386 - config_name: sv-hu splits: - name: train num_bytes: 1652873404 num_examples: 13811059 download_size: 961782112 dataset_size: 1652873404 - config_name: sv-hy splits: - name: train num_bytes: 176116 num_examples: 995 download_size: 104059 dataset_size: 176116 - config_name: sv-id splits: - name: train num_bytes: 578895375 num_examples: 4819662 download_size: 321728791 dataset_size: 578895375 - config_name: sv-is splits: - name: train num_bytes: 140723499 num_examples: 1206627 download_size: 81632497 dataset_size: 140723499 - config_name: sv-it splits: - name: train num_bytes: 1385767185 num_examples: 11395212 download_size: 796539907 dataset_size: 1385767185 - config_name: sv-ja splits: - name: train num_bytes: 207842670 num_examples: 1395574 download_size: 97089521 dataset_size: 207842670 - config_name: sv-ka splits: - name: train num_bytes: 24400376 num_examples: 131075 download_size: 10105756 dataset_size: 24400376 - config_name: sv-kk splits: - name: train num_bytes: 474672 num_examples: 3039 download_size: 222379 dataset_size: 474672 - config_name: sv-ko splits: - name: train num_bytes: 114621994 num_examples: 769024 download_size: 55346953 dataset_size: 114621994 - config_name: sv-lt splits: - name: train num_bytes: 149493427 num_examples: 1076857 download_size: 73451729 dataset_size: 149493427 - config_name: sv-lv splits: - name: train num_bytes: 50065656 num_examples: 367455 download_size: 24417311 dataset_size: 50065656 - config_name: sv-mk splits: - name: train num_bytes: 379728038 num_examples: 2286077 download_size: 170579304 dataset_size: 379728038 - config_name: sv-ml splits: - name: train num_bytes: 49660019 num_examples: 230290 download_size: 19986072 dataset_size: 49660019 - config_name: sv-ms splits: - name: train num_bytes: 172187858 num_examples: 1212972 download_size: 79770776 dataset_size: 172187858 - config_name: sv-nl splits: - name: train num_bytes: 2029650600 num_examples: 14067933 download_size: 959529946 dataset_size: 2029650600 - config_name: sv-no splits: - name: train num_bytes: 961631314 num_examples: 7025119 download_size: 449177330 dataset_size: 961631314 - config_name: sv-pl splits: - name: train num_bytes: 2025896315 num_examples: 13991753 download_size: 977917392 dataset_size: 2025896315 - config_name: sv-pt splits: - name: train num_bytes: 1964346699 num_examples: 13618563 download_size: 932260128 dataset_size: 1964346699 - config_name: sv-ro splits: - name: train num_bytes: 1844103632 num_examples: 15334946 download_size: 1057705496 dataset_size: 1844103632 - config_name: sv-ru splits: - name: train num_bytes: 1239094683 num_examples: 8498769 download_size: 661216655 dataset_size: 1239094683 - config_name: sv-si splits: - name: train num_bytes: 64274630 num_examples: 378086 download_size: 30744860 dataset_size: 64274630 - config_name: sv-sk splits: - name: train num_bytes: 429043159 num_examples: 3628761 download_size: 251947641 dataset_size: 429043159 - config_name: sv-sl splits: - name: train num_bytes: 1169482639 num_examples: 10176971 download_size: 682588446 dataset_size: 1169482639 - config_name: sv-sq splits: - name: train num_bytes: 150105163 num_examples: 1265210 download_size: 86829915 dataset_size: 150105163 - config_name: sv-sr splits: - name: train num_bytes: 1571982290 num_examples: 13191137 download_size: 910091826 dataset_size: 1571982290 - config_name: sv-ta splits: - name: train num_bytes: 2791779 num_examples: 15799 download_size: 1263303 dataset_size: 2791779 - config_name: sv-te splits: - name: train num_bytes: 2397044 num_examples: 14531 download_size: 1155765 dataset_size: 2397044 - config_name: sv-th splits: - name: train num_bytes: 300238974 num_examples: 1710024 download_size: 143028915 dataset_size: 300238974 - config_name: sv-tl splits: - name: train num_bytes: 743336 num_examples: 6232 download_size: 439804 dataset_size: 743336 - config_name: sv-tr splits: - name: train num_bytes: 1785542570 num_examples: 14825288 download_size: 1017894694 dataset_size: 1785542570 - config_name: sv-uk splits: - name: train num_bytes: 55961790 num_examples: 399792 download_size: 30298198 dataset_size: 55961790 - config_name: sv-ur splits: - name: train num_bytes: 2897178 num_examples: 20107 download_size: 1571273 dataset_size: 2897178 - config_name: sv-vi splits: - name: train num_bytes: 296596399 num_examples: 2281627 download_size: 162431046 dataset_size: 296596399 - config_name: ta-af splits: - name: train num_bytes: 202773 num_examples: 1125 download_size: 97111 dataset_size: 202773 - config_name: ta-ar splits: - name: train num_bytes: 4642330 num_examples: 24676 download_size: 2057283 dataset_size: 4642330 - config_name: ta-bg splits: - name: train num_bytes: 4192067 num_examples: 20905 download_size: 1832897 dataset_size: 4192067 - config_name: ta-bn splits: - name: train num_bytes: 623419 num_examples: 3195 download_size: 261936 dataset_size: 623419 - config_name: ta-bs splits: - name: train num_bytes: 1980224 num_examples: 11433 download_size: 922998 dataset_size: 1980224 - config_name: ta-cs splits: - name: train num_bytes: 3432879 num_examples: 20340 download_size: 1589205 dataset_size: 3432879 - config_name: ta-da splits: - name: train num_bytes: 3180684 num_examples: 18369 download_size: 1445542 dataset_size: 3180684 - config_name: ta-de splits: - name: train num_bytes: 3009325 num_examples: 17376 download_size: 1374679 dataset_size: 3009325 - config_name: ta-el splits: - name: train num_bytes: 4592198 num_examples: 20440 download_size: 1801465 dataset_size: 4592198 - config_name: ta-en splits: - name: train num_bytes: 5218326 num_examples: 32417 download_size: 2350665 dataset_size: 5218326 - config_name: ta-es splits: - name: train num_bytes: 4263517 num_examples: 24956 download_size: 1947242 dataset_size: 4263517 - config_name: ta-et splits: - name: train num_bytes: 2416007 num_examples: 14339 download_size: 1112144 dataset_size: 2416007 - config_name: ta-eu splits: - name: train num_bytes: 550584 num_examples: 3353 download_size: 257139 dataset_size: 550584 - config_name: ta-fa splits: - name: train num_bytes: 2728002 num_examples: 14703 download_size: 1208945 dataset_size: 2728002 - config_name: ta-fi splits: - name: train num_bytes: 3686065 num_examples: 20077 download_size: 1674299 dataset_size: 3686065 - config_name: ta-fr splits: - name: train num_bytes: 3225678 num_examples: 17791 download_size: 1476658 dataset_size: 3225678 - config_name: ta-he splits: - name: train num_bytes: 4547167 num_examples: 23994 download_size: 1983254 dataset_size: 4547167 - config_name: ta-hi splits: - name: train num_bytes: 5488358 num_examples: 28531 download_size: 2229363 dataset_size: 5488358 - config_name: ta-hr splits: - name: train num_bytes: 3638670 num_examples: 21298 download_size: 1684087 dataset_size: 3638670 - config_name: ta-hu splits: - name: train num_bytes: 3572000 num_examples: 20629 download_size: 1646241 dataset_size: 3572000 - config_name: ta-id splits: - name: train num_bytes: 2849256 num_examples: 16659 download_size: 1271639 dataset_size: 2849256 - config_name: ta-is splits: - name: train num_bytes: 352088 num_examples: 1982 download_size: 167329 dataset_size: 352088 - config_name: ta-it splits: - name: train num_bytes: 1927308 num_examples: 11149 download_size: 898123 dataset_size: 1927308 - config_name: ta-ja splits: - name: train num_bytes: 2554541 num_examples: 12683 download_size: 1013357 dataset_size: 2554541 - config_name: ta-ko splits: - name: train num_bytes: 1495117 num_examples: 7501 download_size: 614733 dataset_size: 1495117 - config_name: ta-lt splits: - name: train num_bytes: 2568758 num_examples: 13042 download_size: 1045380 dataset_size: 2568758 - config_name: ta-lv splits: - name: train num_bytes: 761304 num_examples: 4102 download_size: 308674 dataset_size: 761304 - config_name: ta-mk splits: - name: train num_bytes: 1145367 num_examples: 5185 download_size: 454573 dataset_size: 1145367 - config_name: ta-ml splits: - name: train num_bytes: 875894 num_examples: 3431 download_size: 329149 dataset_size: 875894 - config_name: ta-ms splits: - name: train num_bytes: 1746602 num_examples: 9008 download_size: 690305 dataset_size: 1746602 - config_name: ta-nl splits: - name: train num_bytes: 4280539 num_examples: 20504 download_size: 1716607 dataset_size: 4280539 - config_name: ta-no splits: - name: train num_bytes: 2413147 num_examples: 12325 download_size: 954112 dataset_size: 2413147 - config_name: ta-pl splits: - name: train num_bytes: 4172848 num_examples: 20849 download_size: 1676094 dataset_size: 4172848 - config_name: ta-pt splits: - name: train num_bytes: 3918915 num_examples: 20131 download_size: 1571464 dataset_size: 3918915 - config_name: ta-ro splits: - name: train num_bytes: 4687022 num_examples: 27266 download_size: 2153715 dataset_size: 4687022 - config_name: ta-ru splits: - name: train num_bytes: 2243661 num_examples: 11610 download_size: 995921 dataset_size: 2243661 - config_name: ta-si splits: - name: train num_bytes: 1385229 num_examples: 6331 download_size: 579247 dataset_size: 1385229 - config_name: ta-sk splits: - name: train num_bytes: 1702138 num_examples: 9699 download_size: 793238 dataset_size: 1702138 - config_name: ta-sl splits: - name: train num_bytes: 2677277 num_examples: 15696 download_size: 1237533 dataset_size: 2677277 - config_name: ta-sq splits: - name: train num_bytes: 2102272 num_examples: 12068 download_size: 965286 dataset_size: 2102272 - config_name: ta-sr splits: - name: train num_bytes: 3854254 num_examples: 21250 download_size: 1755349 dataset_size: 3854254 - config_name: ta-sv splits: - name: train num_bytes: 2791779 num_examples: 15799 download_size: 1263303 dataset_size: 2791779 - config_name: ta-te splits: - name: train num_bytes: 4020522 num_examples: 21155 download_size: 1631680 dataset_size: 4020522 - config_name: ta-th splits: - name: train num_bytes: 1967827 num_examples: 7911 download_size: 798400 dataset_size: 1967827 - config_name: ta-tr splits: - name: train num_bytes: 3806126 num_examples: 21634 download_size: 1719064 dataset_size: 3806126 - config_name: ta-vi splits: - name: train num_bytes: 2277492 num_examples: 12646 download_size: 1023904 dataset_size: 2277492 - config_name: te-ar splits: - name: train num_bytes: 3452430 num_examples: 19326 download_size: 1618921 dataset_size: 3452430 - config_name: te-bg splits: - name: train num_bytes: 3270138 num_examples: 17143 download_size: 1519653 dataset_size: 3270138 - config_name: te-bs splits: - name: train num_bytes: 1834877 num_examples: 11262 download_size: 906589 dataset_size: 1834877 - config_name: te-cs splits: - name: train num_bytes: 2963070 num_examples: 18718 download_size: 1450631 dataset_size: 2963070 - config_name: te-da splits: - name: train num_bytes: 2847730 num_examples: 17496 download_size: 1373002 dataset_size: 2847730 - config_name: te-de splits: - name: train num_bytes: 2004450 num_examples: 12811 download_size: 972183 dataset_size: 2004450 - config_name: te-el splits: - name: train num_bytes: 3883126 num_examples: 18096 download_size: 1603179 dataset_size: 3883126 - config_name: te-en splits: - name: train num_bytes: 3995120 num_examples: 27222 download_size: 1902804 dataset_size: 3995120 - config_name: te-es splits: - name: train num_bytes: 3438665 num_examples: 21762 download_size: 1664204 dataset_size: 3438665 - config_name: te-et splits: - name: train num_bytes: 2303022 num_examples: 14442 download_size: 1123437 dataset_size: 2303022 - config_name: te-eu splits: - name: train num_bytes: 101918 num_examples: 734 download_size: 54838 dataset_size: 101918 - config_name: te-fa splits: - name: train num_bytes: 1776988 num_examples: 10112 download_size: 829989 dataset_size: 1776988 - config_name: te-fi splits: - name: train num_bytes: 2903233 num_examples: 17129 download_size: 1410151 dataset_size: 2903233 - config_name: te-fr splits: - name: train num_bytes: 1987575 num_examples: 11929 download_size: 960722 dataset_size: 1987575 - config_name: te-he splits: - name: train num_bytes: 3254416 num_examples: 18651 download_size: 1502025 dataset_size: 3254416 - config_name: te-hi splits: - name: train num_bytes: 4125248 num_examples: 23181 download_size: 1737544 dataset_size: 4125248 - config_name: te-hr splits: - name: train num_bytes: 3072107 num_examples: 19063 download_size: 1501516 dataset_size: 3072107 - config_name: te-hu splits: - name: train num_bytes: 2840063 num_examples: 17572 download_size: 1394510 dataset_size: 2840063 - config_name: te-id splits: - name: train num_bytes: 2138400 num_examples: 13304 download_size: 1018809 dataset_size: 2138400 - config_name: te-it splits: - name: train num_bytes: 1623218 num_examples: 9926 download_size: 800137 dataset_size: 1623218 - config_name: te-ja splits: - name: train num_bytes: 1053991 num_examples: 5676 download_size: 441927 dataset_size: 1053991 - config_name: te-ko splits: - name: train num_bytes: 164520 num_examples: 931 download_size: 73173 dataset_size: 164520 - config_name: te-lt splits: - name: train num_bytes: 1864996 num_examples: 9706 download_size: 799236 dataset_size: 1864996 - config_name: te-lv splits: - name: train num_bytes: 723302 num_examples: 4007 download_size: 310051 dataset_size: 723302 - config_name: te-mk splits: - name: train num_bytes: 484852 num_examples: 2374 download_size: 199484 dataset_size: 484852 - config_name: te-ms splits: - name: train num_bytes: 896707 num_examples: 4700 download_size: 379124 dataset_size: 896707 - config_name: te-nl splits: - name: train num_bytes: 3200739 num_examples: 16073 download_size: 1350913 dataset_size: 3200739 - config_name: te-no splits: - name: train num_bytes: 2468110 num_examples: 13042 download_size: 1031507 dataset_size: 2468110 - config_name: te-pl splits: - name: train num_bytes: 3415987 num_examples: 18176 download_size: 1453243 dataset_size: 3415987 - config_name: te-pt splits: - name: train num_bytes: 3547355 num_examples: 19319 download_size: 1493732 dataset_size: 3547355 - config_name: te-ro splits: - name: train num_bytes: 3811997 num_examples: 24442 download_size: 1841560 dataset_size: 3811997 - config_name: te-ru splits: - name: train num_bytes: 1961703 num_examples: 10802 download_size: 921812 dataset_size: 1961703 - config_name: te-si splits: - name: train num_bytes: 392313 num_examples: 1847 download_size: 173312 dataset_size: 392313 - config_name: te-sk splits: - name: train num_bytes: 1420681 num_examples: 8215 download_size: 706234 dataset_size: 1420681 - config_name: te-sl splits: - name: train num_bytes: 2434343 num_examples: 15162 download_size: 1191128 dataset_size: 2434343 - config_name: te-sq splits: - name: train num_bytes: 1637018 num_examples: 9924 download_size: 791994 dataset_size: 1637018 - config_name: te-sr splits: - name: train num_bytes: 3196147 num_examples: 18434 download_size: 1527240 dataset_size: 3196147 - config_name: te-sv splits: - name: train num_bytes: 2397044 num_examples: 14531 download_size: 1155765 dataset_size: 2397044 - config_name: te-ta splits: - name: train num_bytes: 4020522 num_examples: 21155 download_size: 1631680 dataset_size: 4020522 - config_name: te-th splits: - name: train num_bytes: 314660 num_examples: 955 download_size: 111400 dataset_size: 314660 - config_name: te-tr splits: - name: train num_bytes: 3102120 num_examples: 18843 download_size: 1493712 dataset_size: 3102120 - config_name: te-vi splits: - name: train num_bytes: 1598326 num_examples: 9335 download_size: 760539 dataset_size: 1598326 - config_name: th-af splits: - name: train num_bytes: 334232 num_examples: 2081 download_size: 168423 dataset_size: 334232 - config_name: th-ar splits: - name: train num_bytes: 535419296 num_examples: 2947486 download_size: 249322971 dataset_size: 535419296 - config_name: th-bg splits: - name: train num_bytes: 536535872 num_examples: 2727606 download_size: 244997814 dataset_size: 536535872 - config_name: th-bn splits: - name: train num_bytes: 23714054 num_examples: 117126 download_size: 10402379 dataset_size: 23714054 - config_name: th-bs splits: - name: train num_bytes: 202140440 num_examples: 1167505 download_size: 99087333 dataset_size: 202140440 - config_name: th-ca splits: - name: train num_bytes: 19332023 num_examples: 109978 download_size: 9376721 dataset_size: 19332023 - config_name: th-cs splits: - name: train num_bytes: 490630442 num_examples: 2846060 download_size: 240534009 dataset_size: 490630442 - config_name: th-da splits: - name: train num_bytes: 310445820 num_examples: 1794942 download_size: 148396298 dataset_size: 310445820 - config_name: th-de splits: - name: train num_bytes: 296889985 num_examples: 1670586 download_size: 143164534 dataset_size: 296889985 - config_name: th-el splits: - name: train num_bytes: 573575076 num_examples: 2505706 download_size: 235809496 dataset_size: 573575076 - config_name: th-en splits: - name: train num_bytes: 546907138 num_examples: 3281533 download_size: 261622137 dataset_size: 546907138 - config_name: th-es splits: - name: train num_bytes: 513655458 num_examples: 2970255 download_size: 247633561 dataset_size: 513655458 - config_name: th-et splits: - name: train num_bytes: 299145165 num_examples: 1746500 download_size: 145311018 dataset_size: 299145165 - config_name: th-eu splits: - name: train num_bytes: 13267277 num_examples: 80754 download_size: 6556528 dataset_size: 13267277 - config_name: th-fa splits: - name: train num_bytes: 204207449 num_examples: 1132448 download_size: 94563745 dataset_size: 204207449 - config_name: th-fi splits: - name: train num_bytes: 405574379 num_examples: 2288653 download_size: 194766529 dataset_size: 405574379 - config_name: th-fr splits: - name: train num_bytes: 406222789 num_examples: 2318980 download_size: 194945994 dataset_size: 406222789 - config_name: th-gl splits: - name: train num_bytes: 3720097 num_examples: 21253 download_size: 1826270 dataset_size: 3720097 - config_name: th-he splits: - name: train num_bytes: 502279597 num_examples: 2665997 download_size: 230468545 dataset_size: 502279597 - config_name: th-hi splits: - name: train num_bytes: 5351584 num_examples: 25057 download_size: 2295480 dataset_size: 5351584 - config_name: th-hr splits: - name: train num_bytes: 412557577 num_examples: 2404014 download_size: 201065178 dataset_size: 412557577 - config_name: th-hu splits: - name: train num_bytes: 495340888 num_examples: 2867234 download_size: 241515546 dataset_size: 495340888 - config_name: th-id splits: - name: train num_bytes: 261981204 num_examples: 1544231 download_size: 123447658 dataset_size: 261981204 - config_name: th-is splits: - name: train num_bytes: 25164485 num_examples: 143848 download_size: 12433501 dataset_size: 25164485 - config_name: th-it splits: - name: train num_bytes: 431366483 num_examples: 2441314 download_size: 208348638 dataset_size: 431366483 - config_name: th-ja splits: - name: train num_bytes: 120373283 num_examples: 626022 download_size: 49751127 dataset_size: 120373283 - config_name: th-ka splits: - name: train num_bytes: 10196337 num_examples: 43371 download_size: 3967045 dataset_size: 10196337 - config_name: th-kk splits: - name: train num_bytes: 173102 num_examples: 927 download_size: 74704 dataset_size: 173102 - config_name: th-ko splits: - name: train num_bytes: 42607354 num_examples: 224600 download_size: 18223559 dataset_size: 42607354 - config_name: th-lt splits: - name: train num_bytes: 50408749 num_examples: 263891 download_size: 21947444 dataset_size: 50408749 - config_name: th-lv splits: - name: train num_bytes: 20592098 num_examples: 108918 download_size: 9011460 dataset_size: 20592098 - config_name: th-mk splits: - name: train num_bytes: 149942166 num_examples: 688925 download_size: 61004617 dataset_size: 149942166 - config_name: th-ml splits: - name: train num_bytes: 17647798 num_examples: 68559 download_size: 6701881 dataset_size: 17647798 - config_name: th-ms splits: - name: train num_bytes: 77586418 num_examples: 406935 download_size: 32247478 dataset_size: 77586418 - config_name: th-nl splits: - name: train num_bytes: 514663017 num_examples: 2529429 download_size: 216398115 dataset_size: 514663017 - config_name: th-no splits: - name: train num_bytes: 178887137 num_examples: 912368 download_size: 75346945 dataset_size: 178887137 - config_name: th-pl splits: - name: train num_bytes: 534530815 num_examples: 2650840 download_size: 228202097 dataset_size: 534530815 - config_name: th-pt splits: - name: train num_bytes: 507773214 num_examples: 2554853 download_size: 214327782 dataset_size: 507773214 - config_name: th-ro splits: - name: train num_bytes: 496779235 num_examples: 2868498 download_size: 239896657 dataset_size: 496779235 - config_name: th-ru splits: - name: train num_bytes: 422276412 num_examples: 2145025 download_size: 196150739 dataset_size: 422276412 - config_name: th-si splits: - name: train num_bytes: 22625759 num_examples: 109375 download_size: 9837006 dataset_size: 22625759 - config_name: th-sk splits: - name: train num_bytes: 166452065 num_examples: 977971 download_size: 81730102 dataset_size: 166452065 - config_name: th-sl splits: - name: train num_bytes: 347863035 num_examples: 2041322 download_size: 168958118 dataset_size: 347863035 - config_name: th-sq splits: - name: train num_bytes: 65222651 num_examples: 386006 download_size: 31987177 dataset_size: 65222651 - config_name: th-sr splits: - name: train num_bytes: 424637895 num_examples: 2430041 download_size: 205780284 dataset_size: 424637895 - config_name: th-sv splits: - name: train num_bytes: 300238974 num_examples: 1710024 download_size: 143028915 dataset_size: 300238974 - config_name: th-ta splits: - name: train num_bytes: 1967827 num_examples: 7911 download_size: 798400 dataset_size: 1967827 - config_name: th-te splits: - name: train num_bytes: 314660 num_examples: 955 download_size: 111400 dataset_size: 314660 - config_name: th-tl splits: - name: train num_bytes: 1224642 num_examples: 7278 download_size: 570669 dataset_size: 1224642 - config_name: th-tr splits: - name: train num_bytes: 518049019 num_examples: 2997947 download_size: 248877220 dataset_size: 518049019 - config_name: th-uk splits: - name: train num_bytes: 24770264 num_examples: 125887 download_size: 11622241 dataset_size: 24770264 - config_name: th-ur splits: - name: train num_bytes: 1655053 num_examples: 8765 download_size: 774958 dataset_size: 1655053 - config_name: th-vi splits: - name: train num_bytes: 122163351 num_examples: 672817 download_size: 57750176 dataset_size: 122163351 - config_name: tl-ar splits: - name: train num_bytes: 1013684 num_examples: 7578 download_size: 561191 dataset_size: 1013684 - config_name: tl-bg splits: - name: train num_bytes: 993947 num_examples: 6685 download_size: 544467 dataset_size: 993947 - config_name: tl-bn splits: - name: train num_bytes: 296962 num_examples: 2051 download_size: 150590 dataset_size: 296962 - config_name: tl-bs splits: - name: train num_bytes: 226647 num_examples: 1648 download_size: 138980 dataset_size: 226647 - config_name: tl-cs splits: - name: train num_bytes: 873459 num_examples: 7439 download_size: 526885 dataset_size: 873459 - config_name: tl-da splits: - name: train num_bytes: 328178 num_examples: 3059 download_size: 194393 dataset_size: 328178 - config_name: tl-de splits: - name: train num_bytes: 1550113 num_examples: 12913 download_size: 854519 dataset_size: 1550113 - config_name: tl-el splits: - name: train num_bytes: 1711021 num_examples: 10131 download_size: 772593 dataset_size: 1711021 - config_name: tl-en splits: - name: train num_bytes: 1779985 num_examples: 16032 download_size: 976424 dataset_size: 1779985 - config_name: tl-es splits: - name: train num_bytes: 1607847 num_examples: 13547 download_size: 893545 dataset_size: 1607847 - config_name: tl-et splits: - name: train num_bytes: 330894 num_examples: 3089 download_size: 198020 dataset_size: 330894 - config_name: tl-eu splits: - name: train num_bytes: 274996 num_examples: 2597 download_size: 161625 dataset_size: 274996 - config_name: tl-fa splits: - name: train num_bytes: 955565 num_examples: 7018 download_size: 526730 dataset_size: 955565 - config_name: tl-fi splits: - name: train num_bytes: 720568 num_examples: 5912 download_size: 429850 dataset_size: 720568 - config_name: tl-fr splits: - name: train num_bytes: 1596520 num_examples: 13176 download_size: 880547 dataset_size: 1596520 - config_name: tl-he splits: - name: train num_bytes: 930470 num_examples: 6583 download_size: 522892 dataset_size: 930470 - config_name: tl-hi splits: - name: train num_bytes: 77691 num_examples: 506 download_size: 40618 dataset_size: 77691 - config_name: tl-hr splits: - name: train num_bytes: 862761 num_examples: 7386 download_size: 523492 dataset_size: 862761 - config_name: tl-hu splits: - name: train num_bytes: 1259364 num_examples: 10790 download_size: 719603 dataset_size: 1259364 - config_name: tl-id splits: - name: train num_bytes: 857904 num_examples: 7800 download_size: 487404 dataset_size: 857904 - config_name: tl-is splits: - name: train num_bytes: 143624 num_examples: 1244 download_size: 89792 dataset_size: 143624 - config_name: tl-it splits: - name: train num_bytes: 1576367 num_examples: 13299 download_size: 876367 dataset_size: 1576367 - config_name: tl-ja splits: - name: train num_bytes: 1530657 num_examples: 10064 download_size: 691756 dataset_size: 1530657 - config_name: tl-ka splits: - name: train num_bytes: 221269 num_examples: 1273 download_size: 97913 dataset_size: 221269 - config_name: tl-ko splits: - name: train num_bytes: 167951 num_examples: 1213 download_size: 86502 dataset_size: 167951 - config_name: tl-lt splits: - name: train num_bytes: 177583 num_examples: 1359 download_size: 92731 dataset_size: 177583 - config_name: tl-mk splits: - name: train num_bytes: 635154 num_examples: 3644 download_size: 293613 dataset_size: 635154 - config_name: tl-ml splits: - name: train num_bytes: 731992 num_examples: 3295 download_size: 295285 dataset_size: 731992 - config_name: tl-ms splits: - name: train num_bytes: 982254 num_examples: 7258 download_size: 459124 dataset_size: 982254 - config_name: tl-nl splits: - name: train num_bytes: 1159161 num_examples: 8182 download_size: 561650 dataset_size: 1159161 - config_name: tl-no splits: - name: train num_bytes: 403664 num_examples: 3034 download_size: 199818 dataset_size: 403664 - config_name: tl-pl splits: - name: train num_bytes: 1128842 num_examples: 8032 download_size: 555957 dataset_size: 1128842 - config_name: tl-pt splits: - name: train num_bytes: 1479476 num_examples: 10348 download_size: 707357 dataset_size: 1479476 - config_name: tl-ro splits: - name: train num_bytes: 1022700 num_examples: 8610 download_size: 608659 dataset_size: 1022700 - config_name: tl-ru splits: - name: train num_bytes: 1800615 num_examples: 13428 download_size: 943261 dataset_size: 1800615 - config_name: tl-si splits: - name: train num_bytes: 517937 num_examples: 3021 download_size: 264752 dataset_size: 517937 - config_name: tl-sk splits: - name: train num_bytes: 126393 num_examples: 1183 download_size: 76443 dataset_size: 126393 - config_name: tl-sl splits: - name: train num_bytes: 752083 num_examples: 6441 download_size: 454996 dataset_size: 752083 - config_name: tl-sq splits: - name: train num_bytes: 569505 num_examples: 4449 download_size: 336296 dataset_size: 569505 - config_name: tl-sr splits: - name: train num_bytes: 712750 num_examples: 5513 download_size: 424892 dataset_size: 712750 - config_name: tl-sv splits: - name: train num_bytes: 743336 num_examples: 6232 download_size: 439804 dataset_size: 743336 - config_name: tl-th splits: - name: train num_bytes: 1224642 num_examples: 7278 download_size: 570669 dataset_size: 1224642 - config_name: tl-tr splits: - name: train num_bytes: 1694442 num_examples: 14507 download_size: 943010 dataset_size: 1694442 - config_name: tl-uk splits: - name: train num_bytes: 171962 num_examples: 1341 download_size: 101468 dataset_size: 171962 - config_name: tl-vi splits: - name: train num_bytes: 787804 num_examples: 5856 download_size: 443661 dataset_size: 787804 - config_name: tr-af splits: - name: train num_bytes: 2884265 num_examples: 24218 download_size: 1722691 dataset_size: 2884265 - config_name: tr-ar splits: - name: train num_bytes: 3690474501 num_examples: 26528738 download_size: 1994176882 dataset_size: 3690474501 - config_name: tr-bg splits: - name: train num_bytes: 4424357819 num_examples: 31075655 download_size: 2331361333 dataset_size: 4424357819 - config_name: tr-bn splits: - name: train num_bytes: 51290410 num_examples: 327016 download_size: 25048848 dataset_size: 51290410 - config_name: tr-bs splits: - name: train num_bytes: 1333752741 num_examples: 11354396 download_size: 780913167 dataset_size: 1333752741 - config_name: tr-ca splits: - name: train num_bytes: 51233240 num_examples: 430642 download_size: 29547526 dataset_size: 51233240 - config_name: tr-cs splits: - name: train num_bytes: 3905507788 num_examples: 32632447 download_size: 2272977883 dataset_size: 3905507788 - config_name: tr-da splits: - name: train num_bytes: 1531776455 num_examples: 12888418 download_size: 875526783 dataset_size: 1531776455 - config_name: tr-de splits: - name: train num_bytes: 2112572636 num_examples: 17205364 download_size: 1204247178 dataset_size: 2112572636 - config_name: tr-el splits: - name: train num_bytes: 5329964195 num_examples: 31032881 download_size: 2416706054 dataset_size: 5329964195 - config_name: tr-en splits: - name: train num_bytes: 5293763367 num_examples: 44986121 download_size: 2981747525 dataset_size: 5293763367 - config_name: tr-es splits: - name: train num_bytes: 4805023547 num_examples: 39643276 download_size: 2740139443 dataset_size: 4805023547 - config_name: tr-et splits: - name: train num_bytes: 1327335381 num_examples: 11350121 download_size: 769538468 dataset_size: 1327335381 - config_name: tr-eu splits: - name: train num_bytes: 83598165 num_examples: 722770 download_size: 48860202 dataset_size: 83598165 - config_name: tr-fa splits: - name: train num_bytes: 777929872 num_examples: 5556914 download_size: 415510437 dataset_size: 777929872 - config_name: tr-fi splits: - name: train num_bytes: 2738401163 num_examples: 22492040 download_size: 1574691928 dataset_size: 2738401163 - config_name: tr-fr splits: - name: train num_bytes: 3574951864 num_examples: 29089409 download_size: 2031550045 dataset_size: 3574951864 - config_name: tr-gl splits: - name: train num_bytes: 15482246 num_examples: 125932 download_size: 9209958 dataset_size: 15482246 - config_name: tr-he splits: - name: train num_bytes: 3394858605 num_examples: 25179227 download_size: 1808232687 dataset_size: 3394858605 - config_name: tr-hi splits: - name: train num_bytes: 10265044 num_examples: 64085 download_size: 4968851 dataset_size: 10265044 - config_name: tr-hr splits: - name: train num_bytes: 3173563090 num_examples: 26805517 download_size: 1844359679 dataset_size: 3173563090 - config_name: tr-hu splits: - name: train num_bytes: 3908387371 num_examples: 32479149 download_size: 2262726187 dataset_size: 3908387371 - config_name: tr-hy splits: - name: train num_bytes: 552121 num_examples: 3271 download_size: 317775 dataset_size: 552121 - config_name: tr-id splits: - name: train num_bytes: 925131692 num_examples: 7764058 download_size: 514333569 dataset_size: 925131692 - config_name: tr-is splits: - name: train num_bytes: 173191926 num_examples: 1503742 download_size: 100393884 dataset_size: 173191926 - config_name: tr-it splits: - name: train num_bytes: 3149802522 num_examples: 25666917 download_size: 1806781258 dataset_size: 3149802522 - config_name: tr-ja splits: - name: train num_bytes: 276196852 num_examples: 1840439 download_size: 129931022 dataset_size: 276196852 - config_name: tr-ka splits: - name: train num_bytes: 32816587 num_examples: 178791 download_size: 13764367 dataset_size: 32816587 - config_name: tr-kk splits: - name: train num_bytes: 380385 num_examples: 2594 download_size: 177915 dataset_size: 380385 - config_name: tr-ko splits: - name: train num_bytes: 179546869 num_examples: 1190399 download_size: 87398515 dataset_size: 179546869 - config_name: tr-lt splits: - name: train num_bytes: 186280937 num_examples: 1360008 download_size: 91221893 dataset_size: 186280937 - config_name: tr-lv splits: - name: train num_bytes: 68438127 num_examples: 498299 download_size: 33507937 dataset_size: 68438127 - config_name: tr-mk splits: - name: train num_bytes: 509038938 num_examples: 3118787 download_size: 229400797 dataset_size: 509038938 - config_name: tr-ml splits: - name: train num_bytes: 68300214 num_examples: 327000 download_size: 27659798 dataset_size: 68300214 - config_name: tr-ms splits: - name: train num_bytes: 235915355 num_examples: 1677177 download_size: 109357496 dataset_size: 235915355 - config_name: tr-nl splits: - name: train num_bytes: 4213929782 num_examples: 28559615 download_size: 2005730992 dataset_size: 4213929782 - config_name: tr-no splits: - name: train num_bytes: 1041876617 num_examples: 7313912 download_size: 496547963 dataset_size: 1041876617 - config_name: tr-pl splits: - name: train num_bytes: 4844891904 num_examples: 33005803 download_size: 2344017268 dataset_size: 4844891904 - config_name: tr-pt splits: - name: train num_bytes: 3963515939 num_examples: 27334710 download_size: 1883668900 dataset_size: 3963515939 - config_name: tr-ro splits: - name: train num_bytes: 4388637765 num_examples: 36441374 download_size: 2507484480 dataset_size: 4388637765 - config_name: tr-ru splits: - name: train num_bytes: 2310607163 num_examples: 18608237 download_size: 1428483121 dataset_size: 2310607163 - config_name: tr-si splits: - name: train num_bytes: 71297346 num_examples: 492120 download_size: 39111144 dataset_size: 71297346 - config_name: tr-sk splits: - name: train num_bytes: 705450676 num_examples: 7149752 download_size: 494693796 dataset_size: 705450676 - config_name: tr-sl splits: - name: train num_bytes: 1673989873 num_examples: 17243310 download_size: 1169481674 dataset_size: 1673989873 - config_name: tr-sq splits: - name: train num_bytes: 174309195 num_examples: 1775111 download_size: 119625759 dataset_size: 174309195 - config_name: tr-sr splits: - name: train num_bytes: 3113334513 num_examples: 31001496 download_size: 2146832048 dataset_size: 3113334513 - config_name: tr-sv splits: - name: train num_bytes: 1500836570 num_examples: 14825288 download_size: 1017894694 dataset_size: 1500836570 - config_name: tr-ta splits: - name: train num_bytes: 3380606 num_examples: 21634 download_size: 1719064 dataset_size: 3380606 - config_name: tr-te splits: - name: train num_bytes: 2744403 num_examples: 18843 download_size: 1493712 dataset_size: 2744403 - config_name: tr-th splits: - name: train num_bytes: 459052945 num_examples: 2997947 download_size: 248877220 dataset_size: 459052945 - config_name: tr-tl splits: - name: train num_bytes: 1382423 num_examples: 14507 download_size: 943010 dataset_size: 1382423 - config_name: tr-uk splits: - name: train num_bytes: 99640269 num_examples: 718219 download_size: 53949611 dataset_size: 99640269 - config_name: tr-ur splits: - name: train num_bytes: 3450199 num_examples: 24048 download_size: 1856228 dataset_size: 3450199 - config_name: tr-vi splits: - name: train num_bytes: 407313281 num_examples: 3178031 download_size: 224062018 dataset_size: 407313281 - config_name: uk-af splits: - name: train num_bytes: 284460 num_examples: 2488 download_size: 191782 dataset_size: 284460 - config_name: uk-ar splits: - name: train num_bytes: 93216778 num_examples: 591338 download_size: 48444982 dataset_size: 93216778 - config_name: uk-bg splits: - name: train num_bytes: 105467853 num_examples: 657579 download_size: 53539970 dataset_size: 105467853 - config_name: uk-bn splits: - name: train num_bytes: 4884703 num_examples: 30784 download_size: 2621900 dataset_size: 4884703 - config_name: uk-bs splits: - name: train num_bytes: 35389877 num_examples: 257809 download_size: 19644518 dataset_size: 35389877 - config_name: uk-ca splits: - name: train num_bytes: 5231757 num_examples: 43827 download_size: 3329240 dataset_size: 5231757 - config_name: uk-cs splits: - name: train num_bytes: 100339267 num_examples: 730804 download_size: 55410782 dataset_size: 100339267 - config_name: uk-da splits: - name: train num_bytes: 54998950 num_examples: 396040 download_size: 29855367 dataset_size: 54998950 - config_name: uk-de splits: - name: train num_bytes: 64032415 num_examples: 454610 download_size: 34715937 dataset_size: 64032415 - config_name: uk-el splits: - name: train num_bytes: 125784250 num_examples: 658201 download_size: 55910575 dataset_size: 125784250 - config_name: uk-en splits: - name: train num_bytes: 117067969 num_examples: 877780 download_size: 62752356 dataset_size: 117067969 - config_name: uk-es splits: - name: train num_bytes: 108104017 num_examples: 780116 download_size: 58650002 dataset_size: 108104017 - config_name: uk-et splits: - name: train num_bytes: 45052714 num_examples: 384837 download_size: 28929877 dataset_size: 45052714 - config_name: uk-eu splits: - name: train num_bytes: 4760740 num_examples: 37731 download_size: 3032188 dataset_size: 4760740 - config_name: uk-fa splits: - name: train num_bytes: 33270932 num_examples: 238453 download_size: 19472177 dataset_size: 33270932 - config_name: uk-fi splits: - name: train num_bytes: 75185052 num_examples: 527306 download_size: 41030733 dataset_size: 75185052 - config_name: uk-fr splits: - name: train num_bytes: 88514049 num_examples: 632491 download_size: 47765131 dataset_size: 88514049 - config_name: uk-gl splits: - name: train num_bytes: 1046239 num_examples: 7572 download_size: 587049 dataset_size: 1046239 - config_name: uk-he splits: - name: train num_bytes: 90878453 num_examples: 593074 download_size: 46534583 dataset_size: 90878453 - config_name: uk-hi splits: - name: train num_bytes: 956188 num_examples: 5724 download_size: 455597 dataset_size: 956188 - config_name: uk-hr splits: - name: train num_bytes: 80629274 num_examples: 589286 download_size: 44423103 dataset_size: 80629274 - config_name: uk-hu splits: - name: train num_bytes: 94475716 num_examples: 685291 download_size: 52028384 dataset_size: 94475716 - config_name: uk-id splits: - name: train num_bytes: 36770942 num_examples: 266344 download_size: 19522918 dataset_size: 36770942 - config_name: uk-is splits: - name: train num_bytes: 5013720 num_examples: 36356 download_size: 2801714 dataset_size: 5013720 - config_name: uk-it splits: - name: train num_bytes: 88451469 num_examples: 629710 download_size: 48268530 dataset_size: 88451469 - config_name: uk-ja splits: - name: train num_bytes: 22345382 num_examples: 132404 download_size: 10219814 dataset_size: 22345382 - config_name: uk-ka splits: - name: train num_bytes: 2106429 num_examples: 10491 download_size: 872568 dataset_size: 2106429 - config_name: uk-kk splits: - name: train num_bytes: 183520 num_examples: 1300 download_size: 97660 dataset_size: 183520 - config_name: uk-ko splits: - name: train num_bytes: 7354116 num_examples: 51313 download_size: 4075603 dataset_size: 7354116 - config_name: uk-lt splits: - name: train num_bytes: 11654448 num_examples: 87824 download_size: 6600052 dataset_size: 11654448 - config_name: uk-lv splits: - name: train num_bytes: 4221207 num_examples: 39240 download_size: 2790990 dataset_size: 4221207 - config_name: uk-mk splits: - name: train num_bytes: 25586675 num_examples: 161932 download_size: 13013873 dataset_size: 25586675 - config_name: uk-ml splits: - name: train num_bytes: 2867827 num_examples: 13464 download_size: 1302429 dataset_size: 2867827 - config_name: uk-ms splits: - name: train num_bytes: 12988887 num_examples: 98951 download_size: 6998791 dataset_size: 12988887 - config_name: uk-nl splits: - name: train num_bytes: 87269176 num_examples: 613782 download_size: 47172139 dataset_size: 87269176 - config_name: uk-no splits: - name: train num_bytes: 35993696 num_examples: 264774 download_size: 19627171 dataset_size: 35993696 - config_name: uk-pl splits: - name: train num_bytes: 95268355 num_examples: 682988 download_size: 52446037 dataset_size: 95268355 - config_name: uk-pt splits: - name: train num_bytes: 82582463 num_examples: 595055 download_size: 44838897 dataset_size: 82582463 - config_name: uk-ro splits: - name: train num_bytes: 86632069 num_examples: 731321 download_size: 54982055 dataset_size: 86632069 - config_name: uk-ru splits: - name: train num_bytes: 86960977 num_examples: 653020 download_size: 51265874 dataset_size: 86960977 - config_name: uk-si splits: - name: train num_bytes: 3271786 num_examples: 20037 download_size: 1734956 dataset_size: 3271786 - config_name: uk-sk splits: - name: train num_bytes: 25005015 num_examples: 215353 download_size: 16219782 dataset_size: 25005015 - config_name: uk-sl splits: - name: train num_bytes: 56958570 num_examples: 489577 download_size: 36722509 dataset_size: 56958570 - config_name: uk-sq splits: - name: train num_bytes: 10382955 num_examples: 87397 download_size: 6673091 dataset_size: 10382955 - config_name: uk-sr splits: - name: train num_bytes: 74598874 num_examples: 620082 download_size: 47400768 dataset_size: 74598874 - config_name: uk-sv splits: - name: train num_bytes: 48174213 num_examples: 399792 download_size: 30298198 dataset_size: 48174213 - config_name: uk-th splits: - name: train num_bytes: 22317183 num_examples: 125887 download_size: 11622241 dataset_size: 22317183 - config_name: uk-tl splits: - name: train num_bytes: 148937 num_examples: 1341 download_size: 101468 dataset_size: 148937 - config_name: uk-tr splits: - name: train num_bytes: 85383477 num_examples: 718219 download_size: 53949611 dataset_size: 85383477 - config_name: uk-ur splits: - name: train num_bytes: 231348 num_examples: 1668 download_size: 133915 dataset_size: 231348 - config_name: uk-vi splits: - name: train num_bytes: 24167320 num_examples: 162327 download_size: 12821194 dataset_size: 24167320 - config_name: ur-ar splits: - name: train num_bytes: 3875112 num_examples: 25650 download_size: 1997751 dataset_size: 3875112 - config_name: ur-bg splits: - name: train num_bytes: 3564972 num_examples: 21660 download_size: 1815192 dataset_size: 3564972 - config_name: ur-bn splits: - name: train num_bytes: 1158849 num_examples: 6656 download_size: 545013 dataset_size: 1158849 - config_name: ur-bs splits: - name: train num_bytes: 1385143 num_examples: 9693 download_size: 764586 dataset_size: 1385143 - config_name: ur-cs splits: - name: train num_bytes: 3228588 num_examples: 23035 download_size: 1788810 dataset_size: 3228588 - config_name: ur-da splits: - name: train num_bytes: 2628484 num_examples: 18276 download_size: 1416881 dataset_size: 2628484 - config_name: ur-de splits: - name: train num_bytes: 2715028 num_examples: 18678 download_size: 1472368 dataset_size: 2715028 - config_name: ur-el splits: - name: train num_bytes: 3816110 num_examples: 23072 download_size: 1945516 dataset_size: 3816110 - config_name: ur-en splits: - name: train num_bytes: 3860232 num_examples: 29074 download_size: 2058990 dataset_size: 3860232 - config_name: ur-es splits: - name: train num_bytes: 3446488 num_examples: 24994 download_size: 1865029 dataset_size: 3446488 - config_name: ur-et splits: - name: train num_bytes: 2735577 num_examples: 19522 download_size: 1510384 dataset_size: 2735577 - config_name: ur-eu splits: - name: train num_bytes: 240549 num_examples: 2010 download_size: 142255 dataset_size: 240549 - config_name: ur-fa splits: - name: train num_bytes: 2982033 num_examples: 19034 download_size: 1526952 dataset_size: 2982033 - config_name: ur-fi splits: - name: train num_bytes: 2810381 num_examples: 19434 download_size: 1534592 dataset_size: 2810381 - config_name: ur-fr splits: - name: train num_bytes: 3017683 num_examples: 21346 download_size: 1636608 dataset_size: 3017683 - config_name: ur-gl splits: - name: train num_bytes: 170522 num_examples: 1291 download_size: 98971 dataset_size: 170522 - config_name: ur-he splits: - name: train num_bytes: 3206973 num_examples: 20559 download_size: 1646292 dataset_size: 3206973 - config_name: ur-hi splits: - name: train num_bytes: 227573 num_examples: 1259 download_size: 110769 dataset_size: 227573 - config_name: ur-hr splits: - name: train num_bytes: 3037110 num_examples: 21869 download_size: 1672479 dataset_size: 3037110 - config_name: ur-hu splits: - name: train num_bytes: 3153133 num_examples: 23064 download_size: 1733934 dataset_size: 3153133 - config_name: ur-id splits: - name: train num_bytes: 3135190 num_examples: 22830 download_size: 1657125 dataset_size: 3135190 - config_name: ur-is splits: - name: train num_bytes: 162324 num_examples: 1161 download_size: 93494 dataset_size: 162324 - config_name: ur-it splits: - name: train num_bytes: 2604652 num_examples: 18354 download_size: 1428782 dataset_size: 2604652 - config_name: ur-ja splits: - name: train num_bytes: 1555332 num_examples: 11517 download_size: 826093 dataset_size: 1555332 - config_name: ur-ka splits: - name: train num_bytes: 415236 num_examples: 1983 download_size: 195889 dataset_size: 415236 - config_name: ur-ko splits: - name: train num_bytes: 1001455 num_examples: 7094 download_size: 549507 dataset_size: 1001455 - config_name: ur-lt splits: - name: train num_bytes: 601064 num_examples: 4473 download_size: 334141 dataset_size: 601064 - config_name: ur-lv splits: - name: train num_bytes: 131865 num_examples: 1080 download_size: 78991 dataset_size: 131865 - config_name: ur-mk splits: - name: train num_bytes: 1793710 num_examples: 10853 download_size: 908669 dataset_size: 1793710 - config_name: ur-ml splits: - name: train num_bytes: 123564 num_examples: 748 download_size: 63127 dataset_size: 123564 - config_name: ur-ms splits: - name: train num_bytes: 2634582 num_examples: 19054 download_size: 1384509 dataset_size: 2634582 - config_name: ur-nl splits: - name: train num_bytes: 3131880 num_examples: 21485 download_size: 1700786 dataset_size: 3131880 - config_name: ur-no splits: - name: train num_bytes: 2427469 num_examples: 17247 download_size: 1316323 dataset_size: 2427469 - config_name: ur-pl splits: - name: train num_bytes: 3174791 num_examples: 22642 download_size: 1750054 dataset_size: 3174791 - config_name: ur-pt splits: - name: train num_bytes: 3181007 num_examples: 22694 download_size: 1723612 dataset_size: 3181007 - config_name: ur-ro splits: - name: train num_bytes: 2914638 num_examples: 23619 download_size: 1822105 dataset_size: 2914638 - config_name: ur-ru splits: - name: train num_bytes: 2420814 num_examples: 16822 download_size: 1418314 dataset_size: 2420814 - config_name: ur-si splits: - name: train num_bytes: 787503 num_examples: 4949 download_size: 408476 dataset_size: 787503 - config_name: ur-sk splits: - name: train num_bytes: 1234632 num_examples: 9966 download_size: 787648 dataset_size: 1234632 - config_name: ur-sl splits: - name: train num_bytes: 2280748 num_examples: 18444 download_size: 1445821 dataset_size: 2280748 - config_name: ur-sq splits: - name: train num_bytes: 1053774 num_examples: 8138 download_size: 654119 dataset_size: 1053774 - config_name: ur-sr splits: - name: train num_bytes: 2700498 num_examples: 21253 download_size: 1688348 dataset_size: 2700498 - config_name: ur-sv splits: - name: train num_bytes: 2537919 num_examples: 20107 download_size: 1571273 dataset_size: 2537919 - config_name: ur-th splits: - name: train num_bytes: 1495760 num_examples: 8765 download_size: 774958 dataset_size: 1495760 - config_name: ur-tr splits: - name: train num_bytes: 3008954 num_examples: 24048 download_size: 1856228 dataset_size: 3008954 - config_name: ur-uk splits: - name: train num_bytes: 201687 num_examples: 1668 download_size: 133915 dataset_size: 201687 - config_name: ur-vi splits: - name: train num_bytes: 1879621 num_examples: 12522 download_size: 983020 dataset_size: 1879621 - config_name: vi-af splits: - name: train num_bytes: 349446 num_examples: 3304 download_size: 235099 dataset_size: 349446 - config_name: vi-ar splits: - name: train num_bytes: 421228815 num_examples: 2875003 download_size: 221286242 dataset_size: 421228815 - config_name: vi-bg splits: - name: train num_bytes: 429035515 num_examples: 2855128 download_size: 220304596 dataset_size: 429035515 - config_name: vi-bn splits: - name: train num_bytes: 36639339 num_examples: 219569 download_size: 17409638 dataset_size: 36639339 - config_name: vi-bs splits: - name: train num_bytes: 182410345 num_examples: 1449367 download_size: 102878289 dataset_size: 182410345 - config_name: vi-ca splits: - name: train num_bytes: 18251372 num_examples: 139798 download_size: 10118589 dataset_size: 18251372 - config_name: vi-cs splits: - name: train num_bytes: 385622743 num_examples: 3040137 download_size: 216864357 dataset_size: 385622743 - config_name: vi-da splits: - name: train num_bytes: 284588156 num_examples: 2223616 download_size: 156155759 dataset_size: 284588156 - config_name: vi-de splits: - name: train num_bytes: 299917788 num_examples: 2284697 download_size: 165073287 dataset_size: 299917788 - config_name: vi-el splits: - name: train num_bytes: 458881731 num_examples: 2966137 download_size: 235713513 dataset_size: 458881731 - config_name: vi-en splits: - name: train num_bytes: 431690759 num_examples: 3505276 download_size: 234779741 dataset_size: 431690759 - config_name: vi-es splits: - name: train num_bytes: 409738525 num_examples: 3215157 download_size: 225723989 dataset_size: 409738525 - config_name: vi-et splits: - name: train num_bytes: 258530352 num_examples: 2048374 download_size: 144458871 dataset_size: 258530352 - config_name: vi-eu splits: - name: train num_bytes: 24896791 num_examples: 201280 download_size: 13893245 dataset_size: 24896791 - config_name: vi-fa splits: - name: train num_bytes: 225790751 num_examples: 1514036 download_size: 116902627 dataset_size: 225790751 - config_name: vi-fi splits: - name: train num_bytes: 327130397 num_examples: 2517078 download_size: 181127385 dataset_size: 327130397 - config_name: vi-fr splits: - name: train num_bytes: 358238205 num_examples: 2752315 download_size: 196637011 dataset_size: 358238205 - config_name: vi-gl splits: - name: train num_bytes: 3463570 num_examples: 26267 download_size: 1955774 dataset_size: 3463570 - config_name: vi-he splits: - name: train num_bytes: 399289605 num_examples: 2813730 download_size: 206730057 dataset_size: 399289605 - config_name: vi-hi splits: - name: train num_bytes: 5497555 num_examples: 32991 download_size: 2609815 dataset_size: 5497555 - config_name: vi-hr splits: - name: train num_bytes: 357164637 num_examples: 2842528 download_size: 200024269 dataset_size: 357164637 - config_name: vi-hu splits: - name: train num_bytes: 378504911 num_examples: 2974607 download_size: 212133098 dataset_size: 378504911 - config_name: vi-id splits: - name: train num_bytes: 264798782 num_examples: 2081700 download_size: 141956684 dataset_size: 264798782 - config_name: vi-is splits: - name: train num_bytes: 43505715 num_examples: 336571 download_size: 24327280 dataset_size: 43505715 - config_name: vi-it splits: - name: train num_bytes: 329570848 num_examples: 2542407 download_size: 182792757 dataset_size: 329570848 - config_name: vi-ja splits: - name: train num_bytes: 78378868 num_examples: 679305 download_size: 49452044 dataset_size: 78378868 - config_name: vi-ka splits: - name: train num_bytes: 9098633 num_examples: 53575 download_size: 4299661 dataset_size: 9098633 - config_name: vi-kk splits: - name: train num_bytes: 142116 num_examples: 1178 download_size: 81276 dataset_size: 142116 - config_name: vi-ko splits: - name: train num_bytes: 46223328 num_examples: 345791 download_size: 25870504 dataset_size: 46223328 - config_name: vi-lt splits: - name: train num_bytes: 60762149 num_examples: 486839 download_size: 34736116 dataset_size: 60762149 - config_name: vi-lv splits: - name: train num_bytes: 25632280 num_examples: 209398 download_size: 14659253 dataset_size: 25632280 - config_name: vi-mk splits: - name: train num_bytes: 136198215 num_examples: 910783 download_size: 69998350 dataset_size: 136198215 - config_name: vi-ml splits: - name: train num_bytes: 24159902 num_examples: 124298 download_size: 10892819 dataset_size: 24159902 - config_name: vi-ms splits: - name: train num_bytes: 106338229 num_examples: 851694 download_size: 57256701 dataset_size: 106338229 - config_name: vi-nl splits: - name: train num_bytes: 357575974 num_examples: 2748283 download_size: 196341160 dataset_size: 357575974 - config_name: vi-no splits: - name: train num_bytes: 195473815 num_examples: 1541764 download_size: 107911512 dataset_size: 195473815 - config_name: vi-pl splits: - name: train num_bytes: 376945252 num_examples: 2929816 download_size: 210882427 dataset_size: 376945252 - config_name: vi-pt splits: - name: train num_bytes: 378714048 num_examples: 2963834 download_size: 208401056 dataset_size: 378714048 - config_name: vi-ro splits: - name: train num_bytes: 348277135 num_examples: 3207734 download_size: 226033059 dataset_size: 348277135 - config_name: vi-ru splits: - name: train num_bytes: 298591863 num_examples: 2289721 download_size: 178238738 dataset_size: 298591863 - config_name: vi-si splits: - name: train num_bytes: 32346009 num_examples: 210151 download_size: 17211993 dataset_size: 32346009 - config_name: vi-sk splits: - name: train num_bytes: 104471637 num_examples: 972827 download_size: 69716226 dataset_size: 104471637 - config_name: vi-sl splits: - name: train num_bytes: 258911839 num_examples: 2439417 download_size: 170809635 dataset_size: 258911839 - config_name: vi-sq splits: - name: train num_bytes: 66352862 num_examples: 606767 download_size: 43137798 dataset_size: 66352862 - config_name: vi-sr splits: - name: train num_bytes: 305410707 num_examples: 2765729 download_size: 198225584 dataset_size: 305410707 - config_name: vi-sv splits: - name: train num_bytes: 253714584 num_examples: 2281627 download_size: 162431046 dataset_size: 253714584 - config_name: vi-ta splits: - name: train num_bytes: 2040848 num_examples: 12646 download_size: 1023904 dataset_size: 2040848 - config_name: vi-te splits: - name: train num_bytes: 1424758 num_examples: 9335 download_size: 760539 dataset_size: 1424758 - config_name: vi-th splits: - name: train num_bytes: 109392888 num_examples: 672817 download_size: 57750176 dataset_size: 109392888 - config_name: vi-tl splits: - name: train num_bytes: 677319 num_examples: 5856 download_size: 443661 dataset_size: 677319 - config_name: vi-tr splits: - name: train num_bytes: 346768965 num_examples: 3178031 download_size: 224062018 dataset_size: 346768965 - config_name: vi-uk splits: - name: train num_bytes: 21078212 num_examples: 162327 download_size: 12821194 dataset_size: 21078212 - config_name: vi-ur splits: - name: train num_bytes: 1653243 num_examples: 12522 download_size: 983020 dataset_size: 1653243 configs: - config_name: af-ar data_files: - split: train path: af-ar/train-* - config_name: af-bg data_files: - split: train path: af-bg/train-* - config_name: af-bn data_files: - split: train path: af-bn/train-* - config_name: af-bs data_files: - split: train path: af-bs/train-* - config_name: af-cs data_files: - split: train path: af-cs/train-* - config_name: af-da data_files: - split: train path: af-da/train-* - config_name: af-de data_files: - split: train path: af-de/train-* - config_name: af-el data_files: - split: train path: af-el/train-* - config_name: af-en data_files: - split: train path: af-en/train-* - config_name: af-es data_files: - split: train path: af-es/train-* - config_name: af-et data_files: - split: train path: af-et/train-* - config_name: af-fa data_files: - split: train path: af-fa/train-* - config_name: af-fi data_files: - split: train path: af-fi/train-* - config_name: af-fr data_files: - split: train path: af-fr/train-* - config_name: af-he data_files: - split: train path: af-he/train-* - config_name: af-hi data_files: - split: train path: af-hi/train-* - config_name: af-hr data_files: - split: train path: af-hr/train-* - config_name: af-hu data_files: - split: train path: af-hu/train-* - config_name: af-id data_files: - split: train path: af-id/train-* - config_name: af-it data_files: - split: train path: af-it/train-* - config_name: af-ja data_files: - split: train path: af-ja/train-* - config_name: af-lt data_files: - split: train path: af-lt/train-* - config_name: af-lv data_files: - split: train path: af-lv/train-* - config_name: af-mk data_files: - split: train path: af-mk/train-* - config_name: af-ml data_files: - split: train path: af-ml/train-* - config_name: af-ms data_files: - split: train path: af-ms/train-* - config_name: af-nl data_files: - split: train path: af-nl/train-* - config_name: af-no data_files: - split: train path: af-no/train-* - config_name: af-pl data_files: - split: train path: af-pl/train-* - config_name: af-pt data_files: - split: train path: af-pt/train-* - config_name: af-ro data_files: - split: train path: af-ro/train-* - config_name: af-ru data_files: - split: train path: af-ru/train-* - config_name: af-si data_files: - split: train path: af-si/train-* - config_name: af-sk data_files: - split: train path: af-sk/train-* - config_name: af-sl data_files: - split: train path: af-sl/train-* - config_name: af-sq data_files: - split: train path: af-sq/train-* - config_name: af-sr data_files: - split: train path: af-sr/train-* - config_name: af-sv data_files: - split: train path: af-sv/train-* - config_name: af-ta data_files: - split: train path: af-ta/train-* - config_name: af-th data_files: - split: train path: af-th/train-* - config_name: af-tr data_files: - split: train path: af-tr/train-* - config_name: af-uk data_files: - split: train path: af-uk/train-* - config_name: af-vi data_files: - split: train path: af-vi/train-* - config_name: ar-af data_files: - split: train path: ar-af/train-* - config_name: ar-bg data_files: - split: train path: ar-bg/train-* - config_name: ar-bn data_files: - split: train path: ar-bn/train-* - config_name: ar-bs data_files: - split: train path: ar-bs/train-* - config_name: ar-ca data_files: - split: train path: ar-ca/train-* - config_name: ar-cs data_files: - split: train path: ar-cs/train-* - config_name: ar-da data_files: - split: train path: ar-da/train-* - config_name: ar-de data_files: - split: train path: ar-de/train-* - config_name: ar-el data_files: - split: train path: ar-el/train-* - config_name: ar-en data_files: - split: train path: ar-en/train-* - config_name: ar-es data_files: - split: train path: ar-es/train-* - config_name: ar-et data_files: - split: train path: ar-et/train-* - config_name: ar-eu data_files: - split: train path: ar-eu/train-* - config_name: ar-fa data_files: - split: train path: ar-fa/train-* - config_name: ar-fi data_files: - split: train path: ar-fi/train-* - config_name: ar-fr data_files: - split: train path: ar-fr/train-* - config_name: ar-gl data_files: - split: train path: ar-gl/train-* - config_name: ar-he data_files: - split: train path: ar-he/train-* - config_name: ar-hi data_files: - split: train path: ar-hi/train-* - config_name: ar-hr data_files: - split: train path: ar-hr/train-* - config_name: ar-hu data_files: - split: train path: ar-hu/train-* - config_name: ar-hy data_files: - split: train path: ar-hy/train-* - config_name: ar-id data_files: - split: train path: ar-id/train-* - config_name: ar-is data_files: - split: train path: ar-is/train-* - config_name: ar-it data_files: - split: train path: ar-it/train-* - config_name: ar-ja data_files: - split: train path: ar-ja/train-* - config_name: ar-ka data_files: - split: train path: ar-ka/train-* - config_name: ar-kk data_files: - split: train path: ar-kk/train-* - config_name: ar-ko data_files: - split: train path: ar-ko/train-* - config_name: ar-lt data_files: - split: train path: ar-lt/train-* - config_name: ar-lv data_files: - split: train path: ar-lv/train-* - config_name: ar-mk data_files: - split: train path: ar-mk/train-* - config_name: ar-ml data_files: - split: train path: ar-ml/train-* - config_name: ar-ms data_files: - split: train path: ar-ms/train-* - config_name: ar-nl data_files: - split: train path: ar-nl/train-* - config_name: ar-no data_files: - split: train path: ar-no/train-* - config_name: ar-pl data_files: - split: train path: ar-pl/train-* - config_name: ar-pt data_files: - split: train path: ar-pt/train-* - config_name: ar-ro data_files: - split: train path: ar-ro/train-* - config_name: ar-ru data_files: - split: train path: ar-ru/train-* - config_name: ar-si data_files: - split: train path: ar-si/train-* - config_name: ar-sk data_files: - split: train path: ar-sk/train-* - config_name: ar-sl data_files: - split: train path: ar-sl/train-* - config_name: ar-sq data_files: - split: train path: ar-sq/train-* - config_name: ar-sr data_files: - split: train path: ar-sr/train-* - config_name: ar-sv data_files: - split: train path: ar-sv/train-* - config_name: ar-ta data_files: - split: train path: ar-ta/train-* - config_name: ar-te data_files: - split: train path: ar-te/train-* - config_name: ar-th data_files: - split: train path: ar-th/train-* - config_name: ar-tl data_files: - split: train path: ar-tl/train-* - config_name: ar-tr data_files: - split: train path: ar-tr/train-* - config_name: ar-uk data_files: - split: train path: ar-uk/train-* - config_name: ar-ur data_files: - split: train path: ar-ur/train-* - config_name: ar-vi data_files: - split: train path: ar-vi/train-* - config_name: bg-af data_files: - split: train path: bg-af/train-* - config_name: bg-ar data_files: - split: train path: bg-ar/train-* - config_name: bg-bn data_files: - split: train path: bg-bn/train-* - config_name: bg-bs data_files: - split: train path: bg-bs/train-* - config_name: bg-ca data_files: - split: train path: bg-ca/train-* - config_name: bg-cs data_files: - split: train path: bg-cs/train-* - config_name: bg-da data_files: - split: train path: bg-da/train-* - config_name: bg-de data_files: - split: train path: bg-de/train-* - config_name: bg-el data_files: - split: train path: bg-el/train-* - config_name: bg-en data_files: - split: train path: bg-en/train-* - config_name: bg-es data_files: - split: train path: bg-es/train-* - config_name: bg-et data_files: - split: train path: bg-et/train-* - config_name: bg-eu data_files: - split: train path: bg-eu/train-* - config_name: bg-fa data_files: - split: train path: bg-fa/train-* - config_name: bg-fi data_files: - split: train path: bg-fi/train-* - config_name: bg-fr data_files: - split: train path: bg-fr/train-* - config_name: bg-gl data_files: - split: train path: bg-gl/train-* - config_name: bg-he data_files: - split: train path: bg-he/train-* - config_name: bg-hi data_files: - split: train path: bg-hi/train-* - config_name: bg-hr data_files: - split: train path: bg-hr/train-* - config_name: bg-hu data_files: - split: train path: bg-hu/train-* - config_name: bg-hy data_files: - split: train path: bg-hy/train-* - config_name: bg-id data_files: - split: train path: bg-id/train-* - config_name: bg-is data_files: - split: train path: bg-is/train-* - config_name: bg-it data_files: - split: train path: bg-it/train-* - config_name: bg-ja data_files: - split: train path: bg-ja/train-* - config_name: bg-ka data_files: - split: train path: bg-ka/train-* - config_name: bg-kk data_files: - split: train path: bg-kk/train-* - config_name: bg-ko data_files: - split: train path: bg-ko/train-* - config_name: bg-lt data_files: - split: train path: bg-lt/train-* - config_name: bg-lv data_files: - split: train path: bg-lv/train-* - config_name: bg-mk data_files: - split: train path: bg-mk/train-* - config_name: bg-ml data_files: - split: train path: bg-ml/train-* - config_name: bg-ms data_files: - split: train path: bg-ms/train-* - config_name: bg-nl data_files: - split: train path: bg-nl/train-* - config_name: bg-no data_files: - split: train path: bg-no/train-* - config_name: bg-pl data_files: - split: train path: bg-pl/train-* - config_name: bg-pt data_files: - split: train path: bg-pt/train-* - config_name: bg-ro data_files: - split: train path: bg-ro/train-* - config_name: bg-ru data_files: - split: train path: bg-ru/train-* - config_name: bg-si data_files: - split: train path: bg-si/train-* - config_name: bg-sk data_files: - split: train path: bg-sk/train-* - config_name: bg-sl data_files: - split: train path: bg-sl/train-* - config_name: bg-sq data_files: - split: train path: bg-sq/train-* - config_name: bg-sr data_files: - split: train path: bg-sr/train-* - config_name: bg-sv data_files: - split: train path: bg-sv/train-* - config_name: bg-ta data_files: - split: train path: bg-ta/train-* - config_name: bg-te data_files: - split: train path: bg-te/train-* - config_name: bg-th data_files: - split: train path: bg-th/train-* - config_name: bg-tl data_files: - split: train path: bg-tl/train-* - config_name: bg-tr data_files: - split: train path: bg-tr/train-* - config_name: bg-uk data_files: - split: train path: bg-uk/train-* - config_name: bg-ur data_files: - split: train path: bg-ur/train-* - config_name: bg-vi data_files: - split: train path: bg-vi/train-* - config_name: bn-af data_files: - split: train path: bn-af/train-* - config_name: bn-ar data_files: - split: train path: bn-ar/train-* - config_name: bn-bg data_files: - split: train path: bn-bg/train-* - config_name: bn-bs data_files: - split: train path: bn-bs/train-* - config_name: bn-ca data_files: - split: train path: bn-ca/train-* - config_name: bn-cs data_files: - split: train path: bn-cs/train-* - config_name: bn-da data_files: - split: train path: bn-da/train-* - config_name: bn-de data_files: - split: train path: bn-de/train-* - config_name: bn-el data_files: - split: train path: bn-el/train-* - config_name: bn-en data_files: - split: train path: bn-en/train-* - config_name: bn-es data_files: - split: train path: bn-es/train-* - config_name: bn-et data_files: - split: train path: bn-et/train-* - config_name: bn-eu data_files: - split: train path: bn-eu/train-* - config_name: bn-fa data_files: - split: train path: bn-fa/train-* - config_name: bn-fi data_files: - split: train path: bn-fi/train-* - config_name: bn-fr data_files: - split: train path: bn-fr/train-* - config_name: bn-gl data_files: - split: train path: bn-gl/train-* - config_name: bn-he data_files: - split: train path: bn-he/train-* - config_name: bn-hi data_files: - split: train path: bn-hi/train-* - config_name: bn-hr data_files: - split: train path: bn-hr/train-* - config_name: bn-hu data_files: - split: train path: bn-hu/train-* - config_name: bn-id data_files: - split: train path: bn-id/train-* - config_name: bn-is data_files: - split: train path: bn-is/train-* - config_name: bn-it data_files: - split: train path: bn-it/train-* - config_name: bn-ja data_files: - split: train path: bn-ja/train-* - config_name: bn-ka data_files: - split: train path: bn-ka/train-* - config_name: bn-ko data_files: - split: train path: bn-ko/train-* - config_name: bn-lt data_files: - split: train path: bn-lt/train-* - config_name: bn-lv data_files: - split: train path: bn-lv/train-* - config_name: bn-mk data_files: - split: train path: bn-mk/train-* - config_name: bn-ml data_files: - split: train path: bn-ml/train-* - config_name: bn-ms data_files: - split: train path: bn-ms/train-* - config_name: bn-nl data_files: - split: train path: bn-nl/train-* - config_name: bn-no data_files: - split: train path: bn-no/train-* - config_name: bn-pl data_files: - split: train path: bn-pl/train-* - config_name: bn-pt data_files: - split: train path: bn-pt/train-* - config_name: bn-ro data_files: - split: train path: bn-ro/train-* - config_name: bn-ru data_files: - split: train path: bn-ru/train-* - config_name: bn-si data_files: - split: train path: bn-si/train-* - config_name: bn-sk data_files: - split: train path: bn-sk/train-* - config_name: bn-sl data_files: - split: train path: bn-sl/train-* - config_name: bn-sq data_files: - split: train path: bn-sq/train-* - config_name: bn-sr data_files: - split: train path: bn-sr/train-* - config_name: bn-sv data_files: - split: train path: bn-sv/train-* - config_name: bn-ta data_files: - split: train path: bn-ta/train-* - config_name: bn-th data_files: - split: train path: bn-th/train-* - config_name: bn-tl data_files: - split: train path: bn-tl/train-* - config_name: bn-tr data_files: - split: train path: bn-tr/train-* - config_name: bn-uk data_files: - split: train path: bn-uk/train-* - config_name: bn-ur data_files: - split: train path: bn-ur/train-* - config_name: bn-vi data_files: - split: train path: bn-vi/train-* - config_name: bs-af data_files: - split: train path: bs-af/train-* - config_name: bs-ar data_files: - split: train path: bs-ar/train-* - config_name: bs-bg data_files: - split: train path: bs-bg/train-* - config_name: bs-bn data_files: - split: train path: bs-bn/train-* - config_name: bs-ca data_files: - split: train path: bs-ca/train-* - config_name: bs-cs data_files: - split: train path: bs-cs/train-* - config_name: bs-da data_files: - split: train path: bs-da/train-* - config_name: bs-de data_files: - split: train path: bs-de/train-* - config_name: bs-el data_files: - split: train path: bs-el/train-* - config_name: bs-en data_files: - split: train path: bs-en/train-* - config_name: bs-es data_files: - split: train path: bs-es/train-* - config_name: bs-et data_files: - split: train path: bs-et/train-* - config_name: bs-eu data_files: - split: train path: bs-eu/train-* - config_name: bs-fa data_files: - split: train path: bs-fa/train-* - config_name: bs-fi data_files: - split: train path: bs-fi/train-* - config_name: bs-fr data_files: - split: train path: bs-fr/train-* - config_name: bs-gl data_files: - split: train path: bs-gl/train-* - config_name: bs-he data_files: - split: train path: bs-he/train-* - config_name: bs-hi data_files: - split: train path: bs-hi/train-* - config_name: bs-hr data_files: - split: train path: bs-hr/train-* - config_name: bs-hu data_files: - split: train path: bs-hu/train-* - config_name: bs-hy data_files: - split: train path: bs-hy/train-* - config_name: bs-id data_files: - split: train path: bs-id/train-* - config_name: bs-is data_files: - split: train path: bs-is/train-* - config_name: bs-it data_files: - split: train path: bs-it/train-* - config_name: bs-ja data_files: - split: train path: bs-ja/train-* - config_name: bs-ka data_files: - split: train path: bs-ka/train-* - config_name: bs-kk data_files: - split: train path: bs-kk/train-* - config_name: bs-ko data_files: - split: train path: bs-ko/train-* - config_name: bs-lt data_files: - split: train path: bs-lt/train-* - config_name: bs-lv data_files: - split: train path: bs-lv/train-* - config_name: bs-mk data_files: - split: train path: bs-mk/train-* - config_name: bs-ml data_files: - split: train path: bs-ml/train-* - config_name: bs-ms data_files: - split: train path: bs-ms/train-* - config_name: bs-nl data_files: - split: train path: bs-nl/train-* - config_name: bs-no data_files: - split: train path: bs-no/train-* - config_name: bs-pl data_files: - split: train path: bs-pl/train-* - config_name: bs-pt data_files: - split: train path: bs-pt/train-* - config_name: bs-ro data_files: - split: train path: bs-ro/train-* - config_name: bs-ru data_files: - split: train path: bs-ru/train-* - config_name: bs-si data_files: - split: train path: bs-si/train-* - config_name: bs-sk data_files: - split: train path: bs-sk/train-* - config_name: bs-sl data_files: - split: train path: bs-sl/train-* - config_name: bs-sq data_files: - split: train path: bs-sq/train-* - config_name: bs-sr data_files: - split: train path: bs-sr/train-* - config_name: bs-sv data_files: - split: train path: bs-sv/train-* - config_name: bs-ta data_files: - split: train path: bs-ta/train-* - config_name: bs-te data_files: - split: train path: bs-te/train-* - config_name: bs-th data_files: - split: train path: bs-th/train-* - config_name: bs-tl data_files: - split: train path: bs-tl/train-* - config_name: bs-tr data_files: - split: train path: bs-tr/train-* - config_name: bs-uk data_files: - split: train path: bs-uk/train-* - config_name: bs-ur data_files: - split: train path: bs-ur/train-* - config_name: bs-vi data_files: - split: train path: bs-vi/train-* - config_name: ca-ar data_files: - split: train path: ca-ar/train-* - config_name: ca-bg data_files: - split: train path: ca-bg/train-* - config_name: ca-bn data_files: - split: train path: ca-bn/train-* - config_name: ca-bs data_files: - split: train path: ca-bs/train-* - config_name: ca-cs data_files: - split: train path: ca-cs/train-* - config_name: ca-da data_files: - split: train path: ca-da/train-* - config_name: ca-de data_files: - split: train path: ca-de/train-* - config_name: ca-el data_files: - split: train path: ca-el/train-* - config_name: ca-en data_files: - split: train path: ca-en/train-* - config_name: ca-es data_files: - split: train path: ca-es/train-* - config_name: ca-et data_files: - split: train path: ca-et/train-* - config_name: ca-eu data_files: - split: train path: ca-eu/train-* - config_name: ca-fa data_files: - split: train path: ca-fa/train-* - config_name: ca-fi data_files: - split: train path: ca-fi/train-* - config_name: ca-fr data_files: - split: train path: ca-fr/train-* - config_name: ca-gl data_files: - split: train path: ca-gl/train-* - config_name: ca-he data_files: - split: train path: ca-he/train-* - config_name: ca-hi data_files: - split: train path: ca-hi/train-* - config_name: ca-hr data_files: - split: train path: ca-hr/train-* - config_name: ca-hu data_files: - split: train path: ca-hu/train-* - config_name: ca-id data_files: - split: train path: ca-id/train-* - config_name: ca-is data_files: - split: train path: ca-is/train-* - config_name: ca-it data_files: - split: train path: ca-it/train-* - config_name: ca-ja data_files: - split: train path: ca-ja/train-* - config_name: ca-ka data_files: - split: train path: ca-ka/train-* - config_name: ca-ko data_files: - split: train path: ca-ko/train-* - config_name: ca-lt data_files: - split: train path: ca-lt/train-* - config_name: ca-lv data_files: - split: train path: ca-lv/train-* - config_name: ca-mk data_files: - split: train path: ca-mk/train-* - config_name: ca-ml data_files: - split: train path: ca-ml/train-* - config_name: ca-ms data_files: - split: train path: ca-ms/train-* - config_name: ca-nl data_files: - split: train path: ca-nl/train-* - config_name: ca-no data_files: - split: train path: ca-no/train-* - config_name: ca-pl data_files: - split: train path: ca-pl/train-* - config_name: ca-pt data_files: - split: train path: ca-pt/train-* - config_name: ca-ro data_files: - split: train path: ca-ro/train-* - config_name: ca-ru data_files: - split: train path: ca-ru/train-* - config_name: ca-si data_files: - split: train path: ca-si/train-* - config_name: ca-sk data_files: - split: train path: ca-sk/train-* - config_name: ca-sl data_files: - split: train path: ca-sl/train-* - config_name: ca-sq data_files: - split: train path: ca-sq/train-* - config_name: ca-sr data_files: - split: train path: ca-sr/train-* - config_name: ca-sv data_files: - split: train path: ca-sv/train-* - config_name: ca-th data_files: - split: train path: ca-th/train-* - config_name: ca-tr data_files: - split: train path: ca-tr/train-* - config_name: ca-uk data_files: - split: train path: ca-uk/train-* - config_name: ca-vi data_files: - split: train path: ca-vi/train-* - config_name: cs-af data_files: - split: train path: cs-af/train-* - config_name: cs-ar data_files: - split: train path: cs-ar/train-* - config_name: cs-bg data_files: - split: train path: cs-bg/train-* - config_name: cs-bn data_files: - split: train path: cs-bn/train-* - config_name: cs-bs data_files: - split: train path: cs-bs/train-* - config_name: cs-ca data_files: - split: train path: cs-ca/train-* - config_name: cs-da data_files: - split: train path: cs-da/train-* - config_name: cs-de data_files: - split: train path: cs-de/train-* - config_name: cs-el data_files: - split: train path: cs-el/train-* - config_name: cs-en data_files: - split: train path: cs-en/train-* - config_name: cs-es data_files: - split: train path: cs-es/train-* - config_name: cs-et data_files: - split: train path: cs-et/train-* - config_name: cs-eu data_files: - split: train path: cs-eu/train-* - config_name: cs-fa data_files: - split: train path: cs-fa/train-* - config_name: cs-fi data_files: - split: train path: cs-fi/train-* - config_name: cs-fr data_files: - split: train path: cs-fr/train-* - config_name: cs-gl data_files: - split: train path: cs-gl/train-* - config_name: cs-he data_files: - split: train path: cs-he/train-* - config_name: cs-hi data_files: - split: train path: cs-hi/train-* - config_name: cs-hr data_files: - split: train path: cs-hr/train-* - config_name: cs-hu data_files: - split: train path: cs-hu/train-* - config_name: cs-hy data_files: - split: train path: cs-hy/train-* - config_name: cs-id data_files: - split: train path: cs-id/train-* - config_name: cs-is data_files: - split: train path: cs-is/train-* - config_name: cs-it data_files: - split: train path: cs-it/train-* - config_name: cs-ja data_files: - split: train path: cs-ja/train-* - config_name: cs-ka data_files: - split: train path: cs-ka/train-* - config_name: cs-kk data_files: - split: train path: cs-kk/train-* - config_name: cs-ko data_files: - split: train path: cs-ko/train-* - config_name: cs-lt data_files: - split: train path: cs-lt/train-* - config_name: cs-lv data_files: - split: train path: cs-lv/train-* - config_name: cs-mk data_files: - split: train path: cs-mk/train-* - config_name: cs-ml data_files: - split: train path: cs-ml/train-* - config_name: cs-ms data_files: - split: train path: cs-ms/train-* - config_name: cs-nl data_files: - split: train path: cs-nl/train-* - config_name: cs-no data_files: - split: train path: cs-no/train-* - config_name: cs-pl data_files: - split: train path: cs-pl/train-* - config_name: cs-pt data_files: - split: train path: cs-pt/train-* - config_name: cs-ro data_files: - split: train path: cs-ro/train-* - config_name: cs-ru data_files: - split: train path: cs-ru/train-* - config_name: cs-si data_files: - split: train path: cs-si/train-* - config_name: cs-sk data_files: - split: train path: cs-sk/train-* - config_name: cs-sl data_files: - split: train path: cs-sl/train-* - config_name: cs-sq data_files: - split: train path: cs-sq/train-* - config_name: cs-sr data_files: - split: train path: cs-sr/train-* - config_name: cs-sv data_files: - split: train path: cs-sv/train-* - config_name: cs-ta data_files: - split: train path: cs-ta/train-* - config_name: cs-te data_files: - split: train path: cs-te/train-* - config_name: cs-th data_files: - split: train path: cs-th/train-* - config_name: cs-tl data_files: - split: train path: cs-tl/train-* - config_name: cs-tr data_files: - split: train path: cs-tr/train-* - config_name: cs-uk data_files: - split: train path: cs-uk/train-* - config_name: cs-ur data_files: - split: train path: cs-ur/train-* - config_name: cs-vi data_files: - split: train path: cs-vi/train-* - config_name: da-af data_files: - split: train path: da-af/train-* - config_name: da-ar data_files: - split: train path: da-ar/train-* - config_name: da-bg data_files: - split: train path: da-bg/train-* - config_name: da-bn data_files: - split: train path: da-bn/train-* - config_name: da-bs data_files: - split: train path: da-bs/train-* - config_name: da-ca data_files: - split: train path: da-ca/train-* - config_name: da-cs data_files: - split: train path: da-cs/train-* - config_name: da-de data_files: - split: train path: da-de/train-* - config_name: da-el data_files: - split: train path: da-el/train-* - config_name: da-en data_files: - split: train path: da-en/train-* - config_name: da-es data_files: - split: train path: da-es/train-* - config_name: da-et data_files: - split: train path: da-et/train-* - config_name: da-eu data_files: - split: train path: da-eu/train-* - config_name: da-fa data_files: - split: train path: da-fa/train-* - config_name: da-fi data_files: - split: train path: da-fi/train-* - config_name: da-fr data_files: - split: train path: da-fr/train-* - config_name: da-gl data_files: - split: train path: da-gl/train-* - config_name: da-he data_files: - split: train path: da-he/train-* - config_name: da-hi data_files: - split: train path: da-hi/train-* - config_name: da-hr data_files: - split: train path: da-hr/train-* - config_name: da-hu data_files: - split: train path: da-hu/train-* - config_name: da-id data_files: - split: train path: da-id/train-* - config_name: da-is data_files: - split: train path: da-is/train-* - config_name: da-it data_files: - split: train path: da-it/train-* - config_name: da-ja data_files: - split: train path: da-ja/train-* - config_name: da-ka data_files: - split: train path: da-ka/train-* - config_name: da-kk data_files: - split: train path: da-kk/train-* - config_name: da-ko data_files: - split: train path: da-ko/train-* - config_name: da-lt data_files: - split: train path: da-lt/train-* - config_name: da-lv data_files: - split: train path: da-lv/train-* - config_name: da-mk data_files: - split: train path: da-mk/train-* - config_name: da-ml data_files: - split: train path: da-ml/train-* - config_name: da-ms data_files: - split: train path: da-ms/train-* - config_name: da-nl data_files: - split: train path: da-nl/train-* - config_name: da-no data_files: - split: train path: da-no/train-* - config_name: da-pl data_files: - split: train path: da-pl/train-* - config_name: da-pt data_files: - split: train path: da-pt/train-* - config_name: da-ro data_files: - split: train path: da-ro/train-* - config_name: da-ru data_files: - split: train path: da-ru/train-* - config_name: da-si data_files: - split: train path: da-si/train-* - config_name: da-sk data_files: - split: train path: da-sk/train-* - config_name: da-sl data_files: - split: train path: da-sl/train-* - config_name: da-sq data_files: - split: train path: da-sq/train-* - config_name: da-sr data_files: - split: train path: da-sr/train-* - config_name: da-sv data_files: - split: train path: da-sv/train-* - config_name: da-ta data_files: - split: train path: da-ta/train-* - config_name: da-te data_files: - split: train path: da-te/train-* - config_name: da-th data_files: - split: train path: da-th/train-* - config_name: da-tl data_files: - split: train path: da-tl/train-* - config_name: da-tr data_files: - split: train path: da-tr/train-* - config_name: da-uk data_files: - split: train path: da-uk/train-* - config_name: da-ur data_files: - split: train path: da-ur/train-* - config_name: da-vi data_files: - split: train path: da-vi/train-* - config_name: de-af data_files: - split: train path: de-af/train-* - config_name: de-ar data_files: - split: train path: de-ar/train-* - config_name: de-bg data_files: - split: train path: de-bg/train-* - config_name: de-bn data_files: - split: train path: de-bn/train-* - config_name: de-bs data_files: - split: train path: de-bs/train-* - config_name: de-ca data_files: - split: train path: de-ca/train-* - config_name: de-cs data_files: - split: train path: de-cs/train-* - config_name: de-da data_files: - split: train path: de-da/train-* - config_name: de-el data_files: - split: train path: de-el/train-* - config_name: de-en data_files: - split: train path: de-en/train-* - config_name: de-es data_files: - split: train path: de-es/train-* - config_name: de-et data_files: - split: train path: de-et/train-* - config_name: de-eu data_files: - split: train path: de-eu/train-* - config_name: de-fa data_files: - split: train path: de-fa/train-* - config_name: de-fi data_files: - split: train path: de-fi/train-* - config_name: de-fr data_files: - split: train path: de-fr/train-* - config_name: de-gl data_files: - split: train path: de-gl/train-* - config_name: de-he data_files: - split: train path: de-he/train-* - config_name: de-hi data_files: - split: train path: de-hi/train-* - config_name: de-hr data_files: - split: train path: de-hr/train-* - config_name: de-hu data_files: - split: train path: de-hu/train-* - config_name: de-hy data_files: - split: train path: de-hy/train-* - config_name: de-id data_files: - split: train path: de-id/train-* - config_name: de-is data_files: - split: train path: de-is/train-* - config_name: de-it data_files: - split: train path: de-it/train-* - config_name: de-ja data_files: - split: train path: de-ja/train-* - config_name: de-ka data_files: - split: train path: de-ka/train-* - config_name: de-kk data_files: - split: train path: de-kk/train-* - config_name: de-ko data_files: - split: train path: de-ko/train-* - config_name: de-lt data_files: - split: train path: de-lt/train-* - config_name: de-lv data_files: - split: train path: de-lv/train-* - config_name: de-mk data_files: - split: train path: de-mk/train-* - config_name: de-ml data_files: - split: train path: de-ml/train-* - config_name: de-ms data_files: - split: train path: de-ms/train-* - config_name: de-nl data_files: - split: train path: de-nl/train-* - config_name: de-no data_files: - split: train path: de-no/train-* - config_name: de-pl data_files: - split: train path: de-pl/train-* - config_name: de-pt data_files: - split: train path: de-pt/train-* - config_name: de-ro data_files: - split: train path: de-ro/train-* - config_name: de-ru data_files: - split: train path: de-ru/train-* - config_name: de-si data_files: - split: train path: de-si/train-* - config_name: de-sk data_files: - split: train path: de-sk/train-* - config_name: de-sl data_files: - split: train path: de-sl/train-* - config_name: de-sq data_files: - split: train path: de-sq/train-* - config_name: de-sr data_files: - split: train path: de-sr/train-* - config_name: de-sv data_files: - split: train path: de-sv/train-* - config_name: de-ta data_files: - split: train path: de-ta/train-* - config_name: de-te data_files: - split: train path: de-te/train-* - config_name: de-th data_files: - split: train path: de-th/train-* - config_name: de-tl data_files: - split: train path: de-tl/train-* - config_name: de-tr data_files: - split: train path: de-tr/train-* - config_name: de-uk data_files: - split: train path: de-uk/train-* - config_name: de-ur data_files: - split: train path: de-ur/train-* - config_name: de-vi data_files: - split: train path: de-vi/train-* - config_name: el-af data_files: - split: train path: el-af/train-* - config_name: el-ar data_files: - split: train path: el-ar/train-* - config_name: el-bg data_files: - split: train path: el-bg/train-* - config_name: el-bn data_files: - split: train path: el-bn/train-* - config_name: el-bs data_files: - split: train path: el-bs/train-* - config_name: el-ca data_files: - split: train path: el-ca/train-* - config_name: el-cs data_files: - split: train path: el-cs/train-* - config_name: el-da data_files: - split: train path: el-da/train-* - config_name: el-de data_files: - split: train path: el-de/train-* - config_name: el-en data_files: - split: train path: el-en/train-* - config_name: el-es data_files: - split: train path: el-es/train-* - config_name: el-et data_files: - split: train path: el-et/train-* - config_name: el-eu data_files: - split: train path: el-eu/train-* - config_name: el-fa data_files: - split: train path: el-fa/train-* - config_name: el-fi data_files: - split: train path: el-fi/train-* - config_name: el-fr data_files: - split: train path: el-fr/train-* - config_name: el-gl data_files: - split: train path: el-gl/train-* - config_name: el-he data_files: - split: train path: el-he/train-* - config_name: el-hi data_files: - split: train path: el-hi/train-* - config_name: el-hr data_files: - split: train path: el-hr/train-* - config_name: el-hu data_files: - split: train path: el-hu/train-* - config_name: el-hy data_files: - split: train path: el-hy/train-* - config_name: el-id data_files: - split: train path: el-id/train-* - config_name: el-is data_files: - split: train path: el-is/train-* - config_name: el-it data_files: - split: train path: el-it/train-* - config_name: el-ja data_files: - split: train path: el-ja/train-* - config_name: el-ka data_files: - split: train path: el-ka/train-* - config_name: el-kk data_files: - split: train path: el-kk/train-* - config_name: el-ko data_files: - split: train path: el-ko/train-* - config_name: el-lt data_files: - split: train path: el-lt/train-* - config_name: el-lv data_files: - split: train path: el-lv/train-* - config_name: el-mk data_files: - split: train path: el-mk/train-* - config_name: el-ml data_files: - split: train path: el-ml/train-* - config_name: el-ms data_files: - split: train path: el-ms/train-* - config_name: el-nl data_files: - split: train path: el-nl/train-* - config_name: el-no data_files: - split: train path: el-no/train-* - config_name: el-pl data_files: - split: train path: el-pl/train-* - config_name: el-pt data_files: - split: train path: el-pt/train-* - config_name: el-ro data_files: - split: train path: el-ro/train-* - config_name: el-ru data_files: - split: train path: el-ru/train-* - config_name: el-si data_files: - split: train path: el-si/train-* - config_name: el-sk data_files: - split: train path: el-sk/train-* - config_name: el-sl data_files: - split: train path: el-sl/train-* - config_name: el-sq data_files: - split: train path: el-sq/train-* - config_name: el-sr data_files: - split: train path: el-sr/train-* - config_name: el-sv data_files: - split: train path: el-sv/train-* - config_name: el-ta data_files: - split: train path: el-ta/train-* - config_name: el-te data_files: - split: train path: el-te/train-* - config_name: el-th data_files: - split: train path: el-th/train-* - config_name: el-tl data_files: - split: train path: el-tl/train-* - config_name: el-tr data_files: - split: train path: el-tr/train-* - config_name: el-uk data_files: - split: train path: el-uk/train-* - config_name: el-ur data_files: - split: train path: el-ur/train-* - config_name: el-vi data_files: - split: train path: el-vi/train-* - config_name: en-af data_files: - split: train path: en-af/train-* - config_name: en-ar data_files: - split: train path: en-ar/train-* - config_name: en-bg data_files: - split: train path: en-bg/train-* - config_name: en-bn data_files: - split: train path: en-bn/train-* - config_name: en-bs data_files: - split: train path: en-bs/train-* - config_name: en-ca data_files: - split: train path: en-ca/train-* - config_name: en-cs data_files: - split: train path: en-cs/train-* - config_name: en-da data_files: - split: train path: en-da/train-* - config_name: en-de data_files: - split: train path: en-de/train-* - config_name: en-el data_files: - split: train path: en-el/train-* - config_name: en-es data_files: - split: train path: en-es/train-* - config_name: en-et data_files: - split: train path: en-et/train-* - config_name: en-eu data_files: - split: train path: en-eu/train-* - config_name: en-fa data_files: - split: train path: en-fa/train-* - config_name: en-fi data_files: - split: train path: en-fi/train-* - config_name: en-fr data_files: - split: train path: en-fr/train-* - config_name: en-gl data_files: - split: train path: en-gl/train-* - config_name: en-he data_files: - split: train path: en-he/train-* - config_name: en-hi data_files: - split: train path: en-hi/train-* - config_name: en-hr data_files: - split: train path: en-hr/train-* - config_name: en-hu data_files: - split: train path: en-hu/train-* - config_name: en-hy data_files: - split: train path: en-hy/train-* - config_name: en-id data_files: - split: train path: en-id/train-* - config_name: en-is data_files: - split: train path: en-is/train-* - config_name: en-it data_files: - split: train path: en-it/train-* - config_name: en-ja data_files: - split: train path: en-ja/train-* - config_name: en-ka data_files: - split: train path: en-ka/train-* - config_name: en-kk data_files: - split: train path: en-kk/train-* - config_name: en-ko data_files: - split: train path: en-ko/train-* - config_name: en-lt data_files: - split: train path: en-lt/train-* - config_name: en-lv data_files: - split: train path: en-lv/train-* - config_name: en-mk data_files: - split: train path: en-mk/train-* - config_name: en-ml data_files: - split: train path: en-ml/train-* - config_name: en-ms data_files: - split: train path: en-ms/train-* - config_name: en-nl data_files: - split: train path: en-nl/train-* - config_name: en-no data_files: - split: train path: en-no/train-* - config_name: en-pl data_files: - split: train path: en-pl/train-* - config_name: en-pt data_files: - split: train path: en-pt/train-* - config_name: en-ro data_files: - split: train path: en-ro/train-* - config_name: en-ru data_files: - split: train path: en-ru/train-* - config_name: en-si data_files: - split: train path: en-si/train-* - config_name: en-sk data_files: - split: train path: en-sk/train-* - config_name: en-sl data_files: - split: train path: en-sl/train-* - config_name: en-sq data_files: - split: train path: en-sq/train-* - config_name: en-sr data_files: - split: train path: en-sr/train-* - config_name: en-sv data_files: - split: train path: en-sv/train-* - config_name: en-ta data_files: - split: train path: en-ta/train-* - config_name: en-te data_files: - split: train path: en-te/train-* - config_name: en-th data_files: - split: train path: en-th/train-* - config_name: en-tl data_files: - split: train path: en-tl/train-* - config_name: en-tr data_files: - split: train path: en-tr/train-* - config_name: en-uk data_files: - split: train path: en-uk/train-* - config_name: en-ur data_files: - split: train path: en-ur/train-* - config_name: en-vi data_files: - split: train path: en-vi/train-* - config_name: es-af data_files: - split: train path: es-af/train-* - config_name: es-ar data_files: - split: train path: es-ar/train-* - config_name: es-bg data_files: - split: train path: es-bg/train-* - config_name: es-bn data_files: - split: train path: es-bn/train-* - config_name: es-bs data_files: - split: train path: es-bs/train-* - config_name: es-ca data_files: - split: train path: es-ca/train-* - config_name: es-cs data_files: - split: train path: es-cs/train-* - config_name: es-da data_files: - split: train path: es-da/train-* - config_name: es-de data_files: - split: train path: es-de/train-* - config_name: es-el data_files: - split: train path: es-el/train-* - config_name: es-en data_files: - split: train path: es-en/train-* - config_name: es-et data_files: - split: train path: es-et/train-* - config_name: es-eu data_files: - split: train path: es-eu/train-* - config_name: es-fa data_files: - split: train path: es-fa/train-* - config_name: es-fi data_files: - split: train path: es-fi/train-* - config_name: es-fr data_files: - split: train path: es-fr/train-* - config_name: es-gl data_files: - split: train path: es-gl/train-* - config_name: es-he data_files: - split: train path: es-he/train-* - config_name: es-hi data_files: - split: train path: es-hi/train-* - config_name: es-hr data_files: - split: train path: es-hr/train-* - config_name: es-hu data_files: - split: train path: es-hu/train-* - config_name: es-hy data_files: - split: train path: es-hy/train-* - config_name: es-id data_files: - split: train path: es-id/train-* - config_name: es-is data_files: - split: train path: es-is/train-* - config_name: es-it data_files: - split: train path: es-it/train-* - config_name: es-ja data_files: - split: train path: es-ja/train-* - config_name: es-ka data_files: - split: train path: es-ka/train-* - config_name: es-kk data_files: - split: train path: es-kk/train-* - config_name: es-ko data_files: - split: train path: es-ko/train-* - config_name: es-lt data_files: - split: train path: es-lt/train-* - config_name: es-lv data_files: - split: train path: es-lv/train-* - config_name: es-mk data_files: - split: train path: es-mk/train-* - config_name: es-ml data_files: - split: train path: es-ml/train-* - config_name: es-ms data_files: - split: train path: es-ms/train-* - config_name: es-nl data_files: - split: train path: es-nl/train-* - config_name: es-no data_files: - split: train path: es-no/train-* - config_name: es-pl data_files: - split: train path: es-pl/train-* - config_name: es-pt data_files: - split: train path: es-pt/train-* - config_name: es-ro data_files: - split: train path: es-ro/train-* - config_name: es-ru data_files: - split: train path: es-ru/train-* - config_name: es-si data_files: - split: train path: es-si/train-* - config_name: es-sk data_files: - split: train path: es-sk/train-* - config_name: es-sl data_files: - split: train path: es-sl/train-* - config_name: es-sq data_files: - split: train path: es-sq/train-* - config_name: es-sr data_files: - split: train path: es-sr/train-* - config_name: es-sv data_files: - split: train path: es-sv/train-* - config_name: es-ta data_files: - split: train path: es-ta/train-* - config_name: es-te data_files: - split: train path: es-te/train-* - config_name: es-th data_files: - split: train path: es-th/train-* - config_name: es-tl data_files: - split: train path: es-tl/train-* - config_name: es-tr data_files: - split: train path: es-tr/train-* - config_name: es-uk data_files: - split: train path: es-uk/train-* - config_name: es-ur data_files: - split: train path: es-ur/train-* - config_name: es-vi data_files: - split: train path: es-vi/train-* - config_name: et-af data_files: - split: train path: et-af/train-* - config_name: et-ar data_files: - split: train path: et-ar/train-* - config_name: et-bg data_files: - split: train path: et-bg/train-* - config_name: et-bn data_files: - split: train path: et-bn/train-* - config_name: et-bs data_files: - split: train path: et-bs/train-* - config_name: et-ca data_files: - split: train path: et-ca/train-* - config_name: et-cs data_files: - split: train path: et-cs/train-* - config_name: et-da data_files: - split: train path: et-da/train-* - config_name: et-de data_files: - split: train path: et-de/train-* - config_name: et-el data_files: - split: train path: et-el/train-* - config_name: et-en data_files: - split: train path: et-en/train-* - config_name: et-es data_files: - split: train path: et-es/train-* - config_name: et-eu data_files: - split: train path: et-eu/train-* - config_name: et-fa data_files: - split: train path: et-fa/train-* - config_name: et-fi data_files: - split: train path: et-fi/train-* - config_name: et-fr data_files: - split: train path: et-fr/train-* - config_name: et-gl data_files: - split: train path: et-gl/train-* - config_name: et-he data_files: - split: train path: et-he/train-* - config_name: et-hi data_files: - split: train path: et-hi/train-* - config_name: et-hr data_files: - split: train path: et-hr/train-* - config_name: et-hu data_files: - split: train path: et-hu/train-* - config_name: et-hy data_files: - split: train path: et-hy/train-* - config_name: et-id data_files: - split: train path: et-id/train-* - config_name: et-is data_files: - split: train path: et-is/train-* - config_name: et-it data_files: - split: train path: et-it/train-* - config_name: et-ja data_files: - split: train path: et-ja/train-* - config_name: et-ka data_files: - split: train path: et-ka/train-* - config_name: et-kk data_files: - split: train path: et-kk/train-* - config_name: et-ko data_files: - split: train path: et-ko/train-* - config_name: et-lt data_files: - split: train path: et-lt/train-* - config_name: et-lv data_files: - split: train path: et-lv/train-* - config_name: et-mk data_files: - split: train path: et-mk/train-* - config_name: et-ml data_files: - split: train path: et-ml/train-* - config_name: et-ms data_files: - split: train path: et-ms/train-* - config_name: et-nl data_files: - split: train path: et-nl/train-* - config_name: et-no data_files: - split: train path: et-no/train-* - config_name: et-pl data_files: - split: train path: et-pl/train-* - config_name: et-pt data_files: - split: train path: et-pt/train-* - config_name: et-ro data_files: - split: train path: et-ro/train-* - config_name: et-ru data_files: - split: train path: et-ru/train-* - config_name: et-si data_files: - split: train path: et-si/train-* - config_name: et-sk data_files: - split: train path: et-sk/train-* - config_name: et-sl data_files: - split: train path: et-sl/train-* - config_name: et-sq data_files: - split: train path: et-sq/train-* - config_name: et-sr data_files: - split: train path: et-sr/train-* - config_name: et-sv data_files: - split: train path: et-sv/train-* - config_name: et-ta data_files: - split: train path: et-ta/train-* - config_name: et-te data_files: - split: train path: et-te/train-* - config_name: et-th data_files: - split: train path: et-th/train-* - config_name: et-tl data_files: - split: train path: et-tl/train-* - config_name: et-tr data_files: - split: train path: et-tr/train-* - config_name: et-uk data_files: - split: train path: et-uk/train-* - config_name: et-ur data_files: - split: train path: et-ur/train-* - config_name: et-vi data_files: - split: train path: et-vi/train-* - config_name: eu-ar data_files: - split: train path: eu-ar/train-* - config_name: eu-bg data_files: - split: train path: eu-bg/train-* - config_name: eu-bn data_files: - split: train path: eu-bn/train-* - config_name: eu-bs data_files: - split: train path: eu-bs/train-* - config_name: eu-ca data_files: - split: train path: eu-ca/train-* - config_name: eu-cs data_files: - split: train path: eu-cs/train-* - config_name: eu-da data_files: - split: train path: eu-da/train-* - config_name: eu-de data_files: - split: train path: eu-de/train-* - config_name: eu-el data_files: - split: train path: eu-el/train-* - config_name: eu-en data_files: - split: train path: eu-en/train-* - config_name: eu-es data_files: - split: train path: eu-es/train-* - config_name: eu-et data_files: - split: train path: eu-et/train-* - config_name: eu-fa data_files: - split: train path: eu-fa/train-* - config_name: eu-fi data_files: - split: train path: eu-fi/train-* - config_name: eu-fr data_files: - split: train path: eu-fr/train-* - config_name: eu-gl data_files: - split: train path: eu-gl/train-* - config_name: eu-he data_files: - split: train path: eu-he/train-* - config_name: eu-hi data_files: - split: train path: eu-hi/train-* - config_name: eu-hr data_files: - split: train path: eu-hr/train-* - config_name: eu-hu data_files: - split: train path: eu-hu/train-* - config_name: eu-id data_files: - split: train path: eu-id/train-* - config_name: eu-is data_files: - split: train path: eu-is/train-* - config_name: eu-it data_files: - split: train path: eu-it/train-* - config_name: eu-ja data_files: - split: train path: eu-ja/train-* - config_name: eu-ka data_files: - split: train path: eu-ka/train-* - config_name: eu-ko data_files: - split: train path: eu-ko/train-* - config_name: eu-lt data_files: - split: train path: eu-lt/train-* - config_name: eu-lv data_files: - split: train path: eu-lv/train-* - config_name: eu-mk data_files: - split: train path: eu-mk/train-* - config_name: eu-ml data_files: - split: train path: eu-ml/train-* - config_name: eu-ms data_files: - split: train path: eu-ms/train-* - config_name: eu-nl data_files: - split: train path: eu-nl/train-* - config_name: eu-no data_files: - split: train path: eu-no/train-* - config_name: eu-pl data_files: - split: train path: eu-pl/train-* - config_name: eu-pt data_files: - split: train path: eu-pt/train-* - config_name: eu-ro data_files: - split: train path: eu-ro/train-* - config_name: eu-ru data_files: - split: train path: eu-ru/train-* - config_name: eu-si data_files: - split: train path: eu-si/train-* - config_name: eu-sk data_files: - split: train path: eu-sk/train-* - config_name: eu-sl data_files: - split: train path: eu-sl/train-* - config_name: eu-sq data_files: - split: train path: eu-sq/train-* - config_name: eu-sr data_files: - split: train path: eu-sr/train-* - config_name: eu-sv data_files: - split: train path: eu-sv/train-* - config_name: eu-ta data_files: - split: train path: eu-ta/train-* - config_name: eu-te data_files: - split: train path: eu-te/train-* - config_name: eu-th data_files: - split: train path: eu-th/train-* - config_name: eu-tl data_files: - split: train path: eu-tl/train-* - config_name: eu-tr data_files: - split: train path: eu-tr/train-* - config_name: eu-uk data_files: - split: train path: eu-uk/train-* - config_name: eu-ur data_files: - split: train path: eu-ur/train-* - config_name: eu-vi data_files: - split: train path: eu-vi/train-* - config_name: fa-af data_files: - split: train path: fa-af/train-* - config_name: fa-ar data_files: - split: train path: fa-ar/train-* - config_name: fa-bg data_files: - split: train path: fa-bg/train-* - config_name: fa-bn data_files: - split: train path: fa-bn/train-* - config_name: fa-bs data_files: - split: train path: fa-bs/train-* - config_name: fa-ca data_files: - split: train path: fa-ca/train-* - config_name: fa-cs data_files: - split: train path: fa-cs/train-* - config_name: fa-da data_files: - split: train path: fa-da/train-* - config_name: fa-de data_files: - split: train path: fa-de/train-* - config_name: fa-el data_files: - split: train path: fa-el/train-* - config_name: fa-en data_files: - split: train path: fa-en/train-* - config_name: fa-es data_files: - split: train path: fa-es/train-* - config_name: fa-et data_files: - split: train path: fa-et/train-* - config_name: fa-eu data_files: - split: train path: fa-eu/train-* - config_name: fa-fi data_files: - split: train path: fa-fi/train-* - config_name: fa-fr data_files: - split: train path: fa-fr/train-* - config_name: fa-gl data_files: - split: train path: fa-gl/train-* - config_name: fa-he data_files: - split: train path: fa-he/train-* - config_name: fa-hi data_files: - split: train path: fa-hi/train-* - config_name: fa-hr data_files: - split: train path: fa-hr/train-* - config_name: fa-hu data_files: - split: train path: fa-hu/train-* - config_name: fa-id data_files: - split: train path: fa-id/train-* - config_name: fa-is data_files: - split: train path: fa-is/train-* - config_name: fa-it data_files: - split: train path: fa-it/train-* - config_name: fa-ja data_files: - split: train path: fa-ja/train-* - config_name: fa-ka data_files: - split: train path: fa-ka/train-* - config_name: fa-kk data_files: - split: train path: fa-kk/train-* - config_name: fa-ko data_files: - split: train path: fa-ko/train-* - config_name: fa-lt data_files: - split: train path: fa-lt/train-* - config_name: fa-lv data_files: - split: train path: fa-lv/train-* - config_name: fa-mk data_files: - split: train path: fa-mk/train-* - config_name: fa-ml data_files: - split: train path: fa-ml/train-* - config_name: fa-ms data_files: - split: train path: fa-ms/train-* - config_name: fa-nl data_files: - split: train path: fa-nl/train-* - config_name: fa-no data_files: - split: train path: fa-no/train-* - config_name: fa-pl data_files: - split: train path: fa-pl/train-* - config_name: fa-pt data_files: - split: train path: fa-pt/train-* - config_name: fa-ro data_files: - split: train path: fa-ro/train-* - config_name: fa-ru data_files: - split: train path: fa-ru/train-* - config_name: fa-si data_files: - split: train path: fa-si/train-* - config_name: fa-sk data_files: - split: train path: fa-sk/train-* - config_name: fa-sl data_files: - split: train path: fa-sl/train-* - config_name: fa-sq data_files: - split: train path: fa-sq/train-* - config_name: fa-sr data_files: - split: train path: fa-sr/train-* - config_name: fa-sv data_files: - split: train path: fa-sv/train-* - config_name: fa-ta data_files: - split: train path: fa-ta/train-* - config_name: fa-te data_files: - split: train path: fa-te/train-* - config_name: fa-th data_files: - split: train path: fa-th/train-* - config_name: fa-tl data_files: - split: train path: fa-tl/train-* - config_name: fa-tr data_files: - split: train path: fa-tr/train-* - config_name: fa-uk data_files: - split: train path: fa-uk/train-* - config_name: fa-ur data_files: - split: train path: fa-ur/train-* - config_name: fa-vi data_files: - split: train path: fa-vi/train-* - config_name: fi-af data_files: - split: train path: fi-af/train-* - config_name: fi-ar data_files: - split: train path: fi-ar/train-* - config_name: fi-bg data_files: - split: train path: fi-bg/train-* - config_name: fi-bn data_files: - split: train path: fi-bn/train-* - config_name: fi-bs data_files: - split: train path: fi-bs/train-* - config_name: fi-ca data_files: - split: train path: fi-ca/train-* - config_name: fi-cs data_files: - split: train path: fi-cs/train-* - config_name: fi-da data_files: - split: train path: fi-da/train-* - config_name: fi-de data_files: - split: train path: fi-de/train-* - config_name: fi-el data_files: - split: train path: fi-el/train-* - config_name: fi-en data_files: - split: train path: fi-en/train-* - config_name: fi-es data_files: - split: train path: fi-es/train-* - config_name: fi-et data_files: - split: train path: fi-et/train-* - config_name: fi-eu data_files: - split: train path: fi-eu/train-* - config_name: fi-fa data_files: - split: train path: fi-fa/train-* - config_name: fi-fr data_files: - split: train path: fi-fr/train-* - config_name: fi-gl data_files: - split: train path: fi-gl/train-* - config_name: fi-he data_files: - split: train path: fi-he/train-* - config_name: fi-hi data_files: - split: train path: fi-hi/train-* - config_name: fi-hr data_files: - split: train path: fi-hr/train-* - config_name: fi-hu data_files: - split: train path: fi-hu/train-* - config_name: fi-hy data_files: - split: train path: fi-hy/train-* - config_name: fi-id data_files: - split: train path: fi-id/train-* - config_name: fi-is data_files: - split: train path: fi-is/train-* - config_name: fi-it data_files: - split: train path: fi-it/train-* - config_name: fi-ja data_files: - split: train path: fi-ja/train-* - config_name: fi-ka data_files: - split: train path: fi-ka/train-* - config_name: fi-kk data_files: - split: train path: fi-kk/train-* - config_name: fi-ko data_files: - split: train path: fi-ko/train-* - config_name: fi-lt data_files: - split: train path: fi-lt/train-* - config_name: fi-lv data_files: - split: train path: fi-lv/train-* - config_name: fi-mk data_files: - split: train path: fi-mk/train-* - config_name: fi-ml data_files: - split: train path: fi-ml/train-* - config_name: fi-ms data_files: - split: train path: fi-ms/train-* - config_name: fi-nl data_files: - split: train path: fi-nl/train-* - config_name: fi-no data_files: - split: train path: fi-no/train-* - config_name: fi-pl data_files: - split: train path: fi-pl/train-* - config_name: fi-pt data_files: - split: train path: fi-pt/train-* - config_name: fi-ro data_files: - split: train path: fi-ro/train-* - config_name: fi-ru data_files: - split: train path: fi-ru/train-* - config_name: fi-si data_files: - split: train path: fi-si/train-* - config_name: fi-sk data_files: - split: train path: fi-sk/train-* - config_name: fi-sl data_files: - split: train path: fi-sl/train-* - config_name: fi-sq data_files: - split: train path: fi-sq/train-* - config_name: fi-sr data_files: - split: train path: fi-sr/train-* - config_name: fi-sv data_files: - split: train path: fi-sv/train-* - config_name: fi-ta data_files: - split: train path: fi-ta/train-* - config_name: fi-te data_files: - split: train path: fi-te/train-* - config_name: fi-th data_files: - split: train path: fi-th/train-* - config_name: fi-tl data_files: - split: train path: fi-tl/train-* - config_name: fi-tr data_files: - split: train path: fi-tr/train-* - config_name: fi-uk data_files: - split: train path: fi-uk/train-* - config_name: fi-ur data_files: - split: train path: fi-ur/train-* - config_name: fi-vi data_files: - split: train path: fi-vi/train-* - config_name: fr-af data_files: - split: train path: fr-af/train-* - config_name: fr-ar data_files: - split: train path: fr-ar/train-* - config_name: fr-bg data_files: - split: train path: fr-bg/train-* - config_name: fr-bn data_files: - split: train path: fr-bn/train-* - config_name: fr-bs data_files: - split: train path: fr-bs/train-* - config_name: fr-ca data_files: - split: train path: fr-ca/train-* - config_name: fr-cs data_files: - split: train path: fr-cs/train-* - config_name: fr-da data_files: - split: train path: fr-da/train-* - config_name: fr-de data_files: - split: train path: fr-de/train-* - config_name: fr-el data_files: - split: train path: fr-el/train-* - config_name: fr-en data_files: - split: train path: fr-en/train-* - config_name: fr-es data_files: - split: train path: fr-es/train-* - config_name: fr-et data_files: - split: train path: fr-et/train-* - config_name: fr-eu data_files: - split: train path: fr-eu/train-* - config_name: fr-fa data_files: - split: train path: fr-fa/train-* - config_name: fr-fi data_files: - split: train path: fr-fi/train-* - config_name: fr-gl data_files: - split: train path: fr-gl/train-* - config_name: fr-he data_files: - split: train path: fr-he/train-* - config_name: fr-hi data_files: - split: train path: fr-hi/train-* - config_name: fr-hr data_files: - split: train path: fr-hr/train-* - config_name: fr-hu data_files: - split: train path: fr-hu/train-* - config_name: fr-hy data_files: - split: train path: fr-hy/train-* - config_name: fr-id data_files: - split: train path: fr-id/train-* - config_name: fr-is data_files: - split: train path: fr-is/train-* - config_name: fr-it data_files: - split: train path: fr-it/train-* - config_name: fr-ja data_files: - split: train path: fr-ja/train-* - config_name: fr-ka data_files: - split: train path: fr-ka/train-* - config_name: fr-kk data_files: - split: train path: fr-kk/train-* - config_name: fr-ko data_files: - split: train path: fr-ko/train-* - config_name: fr-lt data_files: - split: train path: fr-lt/train-* - config_name: fr-lv data_files: - split: train path: fr-lv/train-* - config_name: fr-mk data_files: - split: train path: fr-mk/train-* - config_name: fr-ml data_files: - split: train path: fr-ml/train-* - config_name: fr-ms data_files: - split: train path: fr-ms/train-* - config_name: fr-nl data_files: - split: train path: fr-nl/train-* - config_name: fr-no data_files: - split: train path: fr-no/train-* - config_name: fr-pl data_files: - split: train path: fr-pl/train-* - config_name: fr-pt data_files: - split: train path: fr-pt/train-* - config_name: fr-ro data_files: - split: train path: fr-ro/train-* - config_name: fr-ru data_files: - split: train path: fr-ru/train-* - config_name: fr-si data_files: - split: train path: fr-si/train-* - config_name: fr-sk data_files: - split: train path: fr-sk/train-* - config_name: fr-sl data_files: - split: train path: fr-sl/train-* - config_name: fr-sq data_files: - split: train path: fr-sq/train-* - config_name: fr-sr data_files: - split: train path: fr-sr/train-* - config_name: fr-sv data_files: - split: train path: fr-sv/train-* - config_name: fr-ta data_files: - split: train path: fr-ta/train-* - config_name: fr-te data_files: - split: train path: fr-te/train-* - config_name: fr-th data_files: - split: train path: fr-th/train-* - config_name: fr-tl data_files: - split: train path: fr-tl/train-* - config_name: fr-tr data_files: - split: train path: fr-tr/train-* - config_name: fr-uk data_files: - split: train path: fr-uk/train-* - config_name: fr-ur data_files: - split: train path: fr-ur/train-* - config_name: fr-vi data_files: - split: train path: fr-vi/train-* - config_name: gl-ar data_files: - split: train path: gl-ar/train-* - config_name: gl-bg data_files: - split: train path: gl-bg/train-* - config_name: gl-bn data_files: - split: train path: gl-bn/train-* - config_name: gl-bs data_files: - split: train path: gl-bs/train-* - config_name: gl-ca data_files: - split: train path: gl-ca/train-* - config_name: gl-cs data_files: - split: train path: gl-cs/train-* - config_name: gl-da data_files: - split: train path: gl-da/train-* - config_name: gl-de data_files: - split: train path: gl-de/train-* - config_name: gl-el data_files: - split: train path: gl-el/train-* - config_name: gl-en data_files: - split: train path: gl-en/train-* - config_name: gl-es data_files: - split: train path: gl-es/train-* - config_name: gl-et data_files: - split: train path: gl-et/train-* - config_name: gl-eu data_files: - split: train path: gl-eu/train-* - config_name: gl-fa data_files: - split: train path: gl-fa/train-* - config_name: gl-fi data_files: - split: train path: gl-fi/train-* - config_name: gl-fr data_files: - split: train path: gl-fr/train-* - config_name: gl-he data_files: - split: train path: gl-he/train-* - config_name: gl-hi data_files: - split: train path: gl-hi/train-* - config_name: gl-hr data_files: - split: train path: gl-hr/train-* - config_name: gl-hu data_files: - split: train path: gl-hu/train-* - config_name: gl-id data_files: - split: train path: gl-id/train-* - config_name: gl-is data_files: - split: train path: gl-is/train-* - config_name: gl-it data_files: - split: train path: gl-it/train-* - config_name: gl-ja data_files: - split: train path: gl-ja/train-* - config_name: gl-ka data_files: - split: train path: gl-ka/train-* - config_name: gl-ko data_files: - split: train path: gl-ko/train-* - config_name: gl-lt data_files: - split: train path: gl-lt/train-* - config_name: gl-lv data_files: - split: train path: gl-lv/train-* - config_name: gl-mk data_files: - split: train path: gl-mk/train-* - config_name: gl-ml data_files: - split: train path: gl-ml/train-* - config_name: gl-ms data_files: - split: train path: gl-ms/train-* - config_name: gl-nl data_files: - split: train path: gl-nl/train-* - config_name: gl-no data_files: - split: train path: gl-no/train-* - config_name: gl-pl data_files: - split: train path: gl-pl/train-* - config_name: gl-pt data_files: - split: train path: gl-pt/train-* - config_name: gl-ro data_files: - split: train path: gl-ro/train-* - config_name: gl-ru data_files: - split: train path: gl-ru/train-* - config_name: gl-si data_files: - split: train path: gl-si/train-* - config_name: gl-sk data_files: - split: train path: gl-sk/train-* - config_name: gl-sl data_files: - split: train path: gl-sl/train-* - config_name: gl-sq data_files: - split: train path: gl-sq/train-* - config_name: gl-sr data_files: - split: train path: gl-sr/train-* - config_name: gl-sv data_files: - split: train path: gl-sv/train-* - config_name: gl-th data_files: - split: train path: gl-th/train-* - config_name: gl-tr data_files: - split: train path: gl-tr/train-* - config_name: gl-uk data_files: - split: train path: gl-uk/train-* - config_name: gl-ur data_files: - split: train path: gl-ur/train-* - config_name: gl-vi data_files: - split: train path: gl-vi/train-* - config_name: he-af data_files: - split: train path: he-af/train-* - config_name: he-ar data_files: - split: train path: he-ar/train-* - config_name: he-bg data_files: - split: train path: he-bg/train-* - config_name: he-bn data_files: - split: train path: he-bn/train-* - config_name: he-bs data_files: - split: train path: he-bs/train-* - config_name: he-ca data_files: - split: train path: he-ca/train-* - config_name: he-cs data_files: - split: train path: he-cs/train-* - config_name: he-da data_files: - split: train path: he-da/train-* - config_name: he-de data_files: - split: train path: he-de/train-* - config_name: he-el data_files: - split: train path: he-el/train-* - config_name: he-en data_files: - split: train path: he-en/train-* - config_name: he-es data_files: - split: train path: he-es/train-* - config_name: he-et data_files: - split: train path: he-et/train-* - config_name: he-eu data_files: - split: train path: he-eu/train-* - config_name: he-fa data_files: - split: train path: he-fa/train-* - config_name: he-fi data_files: - split: train path: he-fi/train-* - config_name: he-fr data_files: - split: train path: he-fr/train-* - config_name: he-gl data_files: - split: train path: he-gl/train-* - config_name: he-hi data_files: - split: train path: he-hi/train-* - config_name: he-hr data_files: - split: train path: he-hr/train-* - config_name: he-hu data_files: - split: train path: he-hu/train-* - config_name: he-hy data_files: - split: train path: he-hy/train-* - config_name: he-id data_files: - split: train path: he-id/train-* - config_name: he-is data_files: - split: train path: he-is/train-* - config_name: he-it data_files: - split: train path: he-it/train-* - config_name: he-ja data_files: - split: train path: he-ja/train-* - config_name: he-ka data_files: - split: train path: he-ka/train-* - config_name: he-kk data_files: - split: train path: he-kk/train-* - config_name: he-ko data_files: - split: train path: he-ko/train-* - config_name: he-lt data_files: - split: train path: he-lt/train-* - config_name: he-lv data_files: - split: train path: he-lv/train-* - config_name: he-mk data_files: - split: train path: he-mk/train-* - config_name: he-ml data_files: - split: train path: he-ml/train-* - config_name: he-ms data_files: - split: train path: he-ms/train-* - config_name: he-nl data_files: - split: train path: he-nl/train-* - config_name: he-no data_files: - split: train path: he-no/train-* - config_name: he-pl data_files: - split: train path: he-pl/train-* - config_name: he-pt data_files: - split: train path: he-pt/train-* - config_name: he-ro data_files: - split: train path: he-ro/train-* - config_name: he-ru data_files: - split: train path: he-ru/train-* - config_name: he-si data_files: - split: train path: he-si/train-* - config_name: he-sk data_files: - split: train path: he-sk/train-* - config_name: he-sl data_files: - split: train path: he-sl/train-* - config_name: he-sq data_files: - split: train path: he-sq/train-* - config_name: he-sr data_files: - split: train path: he-sr/train-* - config_name: he-sv data_files: - split: train path: he-sv/train-* - config_name: he-ta data_files: - split: train path: he-ta/train-* - config_name: he-te data_files: - split: train path: he-te/train-* - config_name: he-th data_files: - split: train path: he-th/train-* - config_name: he-tl data_files: - split: train path: he-tl/train-* - config_name: he-tr data_files: - split: train path: he-tr/train-* - config_name: he-uk data_files: - split: train path: he-uk/train-* - config_name: he-ur data_files: - split: train path: he-ur/train-* - config_name: he-vi data_files: - split: train path: he-vi/train-* - config_name: hi-af data_files: - split: train path: hi-af/train-* - config_name: hi-ar data_files: - split: train path: hi-ar/train-* - config_name: hi-bg data_files: - split: train path: hi-bg/train-* - config_name: hi-bn data_files: - split: train path: hi-bn/train-* - config_name: hi-bs data_files: - split: train path: hi-bs/train-* - config_name: hi-ca data_files: - split: train path: hi-ca/train-* - config_name: hi-cs data_files: - split: train path: hi-cs/train-* - config_name: hi-da data_files: - split: train path: hi-da/train-* - config_name: hi-de data_files: - split: train path: hi-de/train-* - config_name: hi-el data_files: - split: train path: hi-el/train-* - config_name: hi-en data_files: - split: train path: hi-en/train-* - config_name: hi-es data_files: - split: train path: hi-es/train-* - config_name: hi-et data_files: - split: train path: hi-et/train-* - config_name: hi-eu data_files: - split: train path: hi-eu/train-* - config_name: hi-fa data_files: - split: train path: hi-fa/train-* - config_name: hi-fi data_files: - split: train path: hi-fi/train-* - config_name: hi-fr data_files: - split: train path: hi-fr/train-* - config_name: hi-gl data_files: - split: train path: hi-gl/train-* - config_name: hi-he data_files: - split: train path: hi-he/train-* - config_name: hi-hr data_files: - split: train path: hi-hr/train-* - config_name: hi-hu data_files: - split: train path: hi-hu/train-* - config_name: hi-id data_files: - split: train path: hi-id/train-* - config_name: hi-is data_files: - split: train path: hi-is/train-* - config_name: hi-it data_files: - split: train path: hi-it/train-* - config_name: hi-ja data_files: - split: train path: hi-ja/train-* - config_name: hi-ka data_files: - split: train path: hi-ka/train-* - config_name: hi-ko data_files: - split: train path: hi-ko/train-* - config_name: hi-lt data_files: - split: train path: hi-lt/train-* - config_name: hi-lv data_files: - split: train path: hi-lv/train-* - config_name: hi-mk data_files: - split: train path: hi-mk/train-* - config_name: hi-ml data_files: - split: train path: hi-ml/train-* - config_name: hi-ms data_files: - split: train path: hi-ms/train-* - config_name: hi-nl data_files: - split: train path: hi-nl/train-* - config_name: hi-no data_files: - split: train path: hi-no/train-* - config_name: hi-pl data_files: - split: train path: hi-pl/train-* - config_name: hi-pt data_files: - split: train path: hi-pt/train-* - config_name: hi-ro data_files: - split: train path: hi-ro/train-* - config_name: hi-ru data_files: - split: train path: hi-ru/train-* - config_name: hi-si data_files: - split: train path: hi-si/train-* - config_name: hi-sk data_files: - split: train path: hi-sk/train-* - config_name: hi-sl data_files: - split: train path: hi-sl/train-* - config_name: hi-sq data_files: - split: train path: hi-sq/train-* - config_name: hi-sr data_files: - split: train path: hi-sr/train-* - config_name: hi-sv data_files: - split: train path: hi-sv/train-* - config_name: hi-ta data_files: - split: train path: hi-ta/train-* - config_name: hi-te data_files: - split: train path: hi-te/train-* - config_name: hi-th data_files: - split: train path: hi-th/train-* - config_name: hi-tl data_files: - split: train path: hi-tl/train-* - config_name: hi-tr data_files: - split: train path: hi-tr/train-* - config_name: hi-uk data_files: - split: train path: hi-uk/train-* - config_name: hi-ur data_files: - split: train path: hi-ur/train-* - config_name: hi-vi data_files: - split: train path: hi-vi/train-* - config_name: hr-af data_files: - split: train path: hr-af/train-* - config_name: hr-ar data_files: - split: train path: hr-ar/train-* - config_name: hr-bg data_files: - split: train path: hr-bg/train-* - config_name: hr-bn data_files: - split: train path: hr-bn/train-* - config_name: hr-bs data_files: - split: train path: hr-bs/train-* - config_name: hr-ca data_files: - split: train path: hr-ca/train-* - config_name: hr-cs data_files: - split: train path: hr-cs/train-* - config_name: hr-da data_files: - split: train path: hr-da/train-* - config_name: hr-de data_files: - split: train path: hr-de/train-* - config_name: hr-el data_files: - split: train path: hr-el/train-* - config_name: hr-en data_files: - split: train path: hr-en/train-* - config_name: hr-es data_files: - split: train path: hr-es/train-* - config_name: hr-et data_files: - split: train path: hr-et/train-* - config_name: hr-eu data_files: - split: train path: hr-eu/train-* - config_name: hr-fa data_files: - split: train path: hr-fa/train-* - config_name: hr-fi data_files: - split: train path: hr-fi/train-* - config_name: hr-fr data_files: - split: train path: hr-fr/train-* - config_name: hr-gl data_files: - split: train path: hr-gl/train-* - config_name: hr-he data_files: - split: train path: hr-he/train-* - config_name: hr-hi data_files: - split: train path: hr-hi/train-* - config_name: hr-hu data_files: - split: train path: hr-hu/train-* - config_name: hr-hy data_files: - split: train path: hr-hy/train-* - config_name: hr-id data_files: - split: train path: hr-id/train-* - config_name: hr-is data_files: - split: train path: hr-is/train-* - config_name: hr-it data_files: - split: train path: hr-it/train-* - config_name: hr-ja data_files: - split: train path: hr-ja/train-* - config_name: hr-ka data_files: - split: train path: hr-ka/train-* - config_name: hr-kk data_files: - split: train path: hr-kk/train-* - config_name: hr-ko data_files: - split: train path: hr-ko/train-* - config_name: hr-lt data_files: - split: train path: hr-lt/train-* - config_name: hr-lv data_files: - split: train path: hr-lv/train-* - config_name: hr-mk data_files: - split: train path: hr-mk/train-* - config_name: hr-ml data_files: - split: train path: hr-ml/train-* - config_name: hr-ms data_files: - split: train path: hr-ms/train-* - config_name: hr-nl data_files: - split: train path: hr-nl/train-* - config_name: hr-no data_files: - split: train path: hr-no/train-* - config_name: hr-pl data_files: - split: train path: hr-pl/train-* - config_name: hr-pt data_files: - split: train path: hr-pt/train-* - config_name: hr-ro data_files: - split: train path: hr-ro/train-* - config_name: hr-ru data_files: - split: train path: hr-ru/train-* - config_name: hr-si data_files: - split: train path: hr-si/train-* - config_name: hr-sk data_files: - split: train path: hr-sk/train-* - config_name: hr-sl data_files: - split: train path: hr-sl/train-* - config_name: hr-sq data_files: - split: train path: hr-sq/train-* - config_name: hr-sr data_files: - split: train path: hr-sr/train-* - config_name: hr-sv data_files: - split: train path: hr-sv/train-* - config_name: hr-ta data_files: - split: train path: hr-ta/train-* - config_name: hr-te data_files: - split: train path: hr-te/train-* - config_name: hr-th data_files: - split: train path: hr-th/train-* - config_name: hr-tl data_files: - split: train path: hr-tl/train-* - config_name: hr-tr data_files: - split: train path: hr-tr/train-* - config_name: hr-uk data_files: - split: train path: hr-uk/train-* - config_name: hr-ur data_files: - split: train path: hr-ur/train-* - config_name: hr-vi data_files: - split: train path: hr-vi/train-* - config_name: hu-af data_files: - split: train path: hu-af/train-* - config_name: hu-ar data_files: - split: train path: hu-ar/train-* - config_name: hu-bg data_files: - split: train path: hu-bg/train-* - config_name: hu-bn data_files: - split: train path: hu-bn/train-* - config_name: hu-bs data_files: - split: train path: hu-bs/train-* - config_name: hu-ca data_files: - split: train path: hu-ca/train-* - config_name: hu-cs data_files: - split: train path: hu-cs/train-* - config_name: hu-da data_files: - split: train path: hu-da/train-* - config_name: hu-de data_files: - split: train path: hu-de/train-* - config_name: hu-el data_files: - split: train path: hu-el/train-* - config_name: hu-en data_files: - split: train path: hu-en/train-* - config_name: hu-es data_files: - split: train path: hu-es/train-* - config_name: hu-et data_files: - split: train path: hu-et/train-* - config_name: hu-eu data_files: - split: train path: hu-eu/train-* - config_name: hu-fa data_files: - split: train path: hu-fa/train-* - config_name: hu-fi data_files: - split: train path: hu-fi/train-* - config_name: hu-fr data_files: - split: train path: hu-fr/train-* - config_name: hu-gl data_files: - split: train path: hu-gl/train-* - config_name: hu-he data_files: - split: train path: hu-he/train-* - config_name: hu-hi data_files: - split: train path: hu-hi/train-* - config_name: hu-hr data_files: - split: train path: hu-hr/train-* - config_name: hu-hy data_files: - split: train path: hu-hy/train-* - config_name: hu-id data_files: - split: train path: hu-id/train-* - config_name: hu-is data_files: - split: train path: hu-is/train-* - config_name: hu-it data_files: - split: train path: hu-it/train-* - config_name: hu-ja data_files: - split: train path: hu-ja/train-* - config_name: hu-ka data_files: - split: train path: hu-ka/train-* - config_name: hu-kk data_files: - split: train path: hu-kk/train-* - config_name: hu-ko data_files: - split: train path: hu-ko/train-* - config_name: hu-lt data_files: - split: train path: hu-lt/train-* - config_name: hu-lv data_files: - split: train path: hu-lv/train-* - config_name: hu-mk data_files: - split: train path: hu-mk/train-* - config_name: hu-ml data_files: - split: train path: hu-ml/train-* - config_name: hu-ms data_files: - split: train path: hu-ms/train-* - config_name: hu-nl data_files: - split: train path: hu-nl/train-* - config_name: hu-no data_files: - split: train path: hu-no/train-* - config_name: hu-pl data_files: - split: train path: hu-pl/train-* - config_name: hu-pt data_files: - split: train path: hu-pt/train-* - config_name: hu-ro data_files: - split: train path: hu-ro/train-* - config_name: hu-ru data_files: - split: train path: hu-ru/train-* - config_name: hu-si data_files: - split: train path: hu-si/train-* - config_name: hu-sk data_files: - split: train path: hu-sk/train-* - config_name: hu-sl data_files: - split: train path: hu-sl/train-* - config_name: hu-sq data_files: - split: train path: hu-sq/train-* - config_name: hu-sr data_files: - split: train path: hu-sr/train-* - config_name: hu-sv data_files: - split: train path: hu-sv/train-* - config_name: hu-ta data_files: - split: train path: hu-ta/train-* - config_name: hu-te data_files: - split: train path: hu-te/train-* - config_name: hu-th data_files: - split: train path: hu-th/train-* - config_name: hu-tl data_files: - split: train path: hu-tl/train-* - config_name: hu-tr data_files: - split: train path: hu-tr/train-* - config_name: hu-uk data_files: - split: train path: hu-uk/train-* - config_name: hu-ur data_files: - split: train path: hu-ur/train-* - config_name: hu-vi data_files: - split: train path: hu-vi/train-* - config_name: hy-ar data_files: - split: train path: hy-ar/train-* - config_name: hy-bg data_files: - split: train path: hy-bg/train-* - config_name: hy-bs data_files: - split: train path: hy-bs/train-* - config_name: hy-cs data_files: - split: train path: hy-cs/train-* - config_name: hy-de data_files: - split: train path: hy-de/train-* - config_name: hy-el data_files: - split: train path: hy-el/train-* - config_name: hy-en data_files: - split: train path: hy-en/train-* - config_name: hy-es data_files: - split: train path: hy-es/train-* - config_name: hy-et data_files: - split: train path: hy-et/train-* - config_name: hy-fi data_files: - split: train path: hy-fi/train-* - config_name: hy-fr data_files: - split: train path: hy-fr/train-* - config_name: hy-he data_files: - split: train path: hy-he/train-* - config_name: hy-hr data_files: - split: train path: hy-hr/train-* - config_name: hy-hu data_files: - split: train path: hy-hu/train-* - config_name: hy-id data_files: - split: train path: hy-id/train-* - config_name: hy-it data_files: - split: train path: hy-it/train-* - config_name: hy-mk data_files: - split: train path: hy-mk/train-* - config_name: hy-ml data_files: - split: train path: hy-ml/train-* - config_name: hy-nl data_files: - split: train path: hy-nl/train-* - config_name: hy-pl data_files: - split: train path: hy-pl/train-* - config_name: hy-pt data_files: - split: train path: hy-pt/train-* - config_name: hy-ro data_files: - split: train path: hy-ro/train-* - config_name: hy-ru data_files: - split: train path: hy-ru/train-* - config_name: hy-sk data_files: - split: train path: hy-sk/train-* - config_name: hy-sl data_files: - split: train path: hy-sl/train-* - config_name: hy-sq data_files: - split: train path: hy-sq/train-* - config_name: hy-sr data_files: - split: train path: hy-sr/train-* - config_name: hy-sv data_files: - split: train path: hy-sv/train-* - config_name: hy-tr data_files: - split: train path: hy-tr/train-* - config_name: id-af data_files: - split: train path: id-af/train-* - config_name: id-ar data_files: - split: train path: id-ar/train-* - config_name: id-bg data_files: - split: train path: id-bg/train-* - config_name: id-bn data_files: - split: train path: id-bn/train-* - config_name: id-bs data_files: - split: train path: id-bs/train-* - config_name: id-ca data_files: - split: train path: id-ca/train-* - config_name: id-cs data_files: - split: train path: id-cs/train-* - config_name: id-da data_files: - split: train path: id-da/train-* - config_name: id-de data_files: - split: train path: id-de/train-* - config_name: id-el data_files: - split: train path: id-el/train-* - config_name: id-en data_files: - split: train path: id-en/train-* - config_name: id-es data_files: - split: train path: id-es/train-* - config_name: id-et data_files: - split: train path: id-et/train-* - config_name: id-eu data_files: - split: train path: id-eu/train-* - config_name: id-fa data_files: - split: train path: id-fa/train-* - config_name: id-fi data_files: - split: train path: id-fi/train-* - config_name: id-fr data_files: - split: train path: id-fr/train-* - config_name: id-gl data_files: - split: train path: id-gl/train-* - config_name: id-he data_files: - split: train path: id-he/train-* - config_name: id-hi data_files: - split: train path: id-hi/train-* - config_name: id-hr data_files: - split: train path: id-hr/train-* - config_name: id-hu data_files: - split: train path: id-hu/train-* - config_name: id-hy data_files: - split: train path: id-hy/train-* - config_name: id-is data_files: - split: train path: id-is/train-* - config_name: id-it data_files: - split: train path: id-it/train-* - config_name: id-ja data_files: - split: train path: id-ja/train-* - config_name: id-ka data_files: - split: train path: id-ka/train-* - config_name: id-kk data_files: - split: train path: id-kk/train-* - config_name: id-ko data_files: - split: train path: id-ko/train-* - config_name: id-lt data_files: - split: train path: id-lt/train-* - config_name: id-lv data_files: - split: train path: id-lv/train-* - config_name: id-mk data_files: - split: train path: id-mk/train-* - config_name: id-ml data_files: - split: train path: id-ml/train-* - config_name: id-ms data_files: - split: train path: id-ms/train-* - config_name: id-nl data_files: - split: train path: id-nl/train-* - config_name: id-pl data_files: - split: train path: id-pl/train-* - config_name: id-pt data_files: - split: train path: id-pt/train-* - config_name: id-ro data_files: - split: train path: id-ro/train-* - config_name: id-ru data_files: - split: train path: id-ru/train-* - config_name: id-si data_files: - split: train path: id-si/train-* - config_name: id-sk data_files: - split: train path: id-sk/train-* - config_name: id-sl data_files: - split: train path: id-sl/train-* - config_name: id-sq data_files: - split: train path: id-sq/train-* - config_name: id-sr data_files: - split: train path: id-sr/train-* - config_name: id-sv data_files: - split: train path: id-sv/train-* - config_name: id-ta data_files: - split: train path: id-ta/train-* - config_name: id-te data_files: - split: train path: id-te/train-* - config_name: id-th data_files: - split: train path: id-th/train-* - config_name: id-tl data_files: - split: train path: id-tl/train-* - config_name: id-tr data_files: - split: train path: id-tr/train-* - config_name: id-uk data_files: - split: train path: id-uk/train-* - config_name: id-ur data_files: - split: train path: id-ur/train-* - config_name: id-vi data_files: - split: train path: id-vi/train-* - config_name: is-ar data_files: - split: train path: is-ar/train-* - config_name: is-bg data_files: - split: train path: is-bg/train-* - config_name: is-bn data_files: - split: train path: is-bn/train-* - config_name: is-bs data_files: - split: train path: is-bs/train-* - config_name: is-ca data_files: - split: train path: is-ca/train-* - config_name: is-cs data_files: - split: train path: is-cs/train-* - config_name: is-da data_files: - split: train path: is-da/train-* - config_name: is-de data_files: - split: train path: is-de/train-* - config_name: is-el data_files: - split: train path: is-el/train-* - config_name: is-en data_files: - split: train path: is-en/train-* - config_name: is-es data_files: - split: train path: is-es/train-* - config_name: is-et data_files: - split: train path: is-et/train-* - config_name: is-eu data_files: - split: train path: is-eu/train-* - config_name: is-fa data_files: - split: train path: is-fa/train-* - config_name: is-fi data_files: - split: train path: is-fi/train-* - config_name: is-fr data_files: - split: train path: is-fr/train-* - config_name: is-gl data_files: - split: train path: is-gl/train-* - config_name: is-he data_files: - split: train path: is-he/train-* - config_name: is-hi data_files: - split: train path: is-hi/train-* - config_name: is-hr data_files: - split: train path: is-hr/train-* - config_name: is-hu data_files: - split: train path: is-hu/train-* - config_name: is-id data_files: - split: train path: is-id/train-* - config_name: is-it data_files: - split: train path: is-it/train-* - config_name: is-ja data_files: - split: train path: is-ja/train-* - config_name: is-ka data_files: - split: train path: is-ka/train-* - config_name: is-kk data_files: - split: train path: is-kk/train-* - config_name: is-ko data_files: - split: train path: is-ko/train-* - config_name: is-lt data_files: - split: train path: is-lt/train-* - config_name: is-lv data_files: - split: train path: is-lv/train-* - config_name: is-mk data_files: - split: train path: is-mk/train-* - config_name: is-ml data_files: - split: train path: is-ml/train-* - config_name: is-ms data_files: - split: train path: is-ms/train-* - config_name: is-nl data_files: - split: train path: is-nl/train-* - config_name: is-no data_files: - split: train path: is-no/train-* - config_name: is-pl data_files: - split: train path: is-pl/train-* - config_name: is-pt data_files: - split: train path: is-pt/train-* - config_name: is-ro data_files: - split: train path: is-ro/train-* - config_name: is-ru data_files: - split: train path: is-ru/train-* - config_name: is-si data_files: - split: train path: is-si/train-* - config_name: is-sk data_files: - split: train path: is-sk/train-* - config_name: is-sl data_files: - split: train path: is-sl/train-* - config_name: is-sq data_files: - split: train path: is-sq/train-* - config_name: is-sr data_files: - split: train path: is-sr/train-* - config_name: is-sv data_files: - split: train path: is-sv/train-* - config_name: is-ta data_files: - split: train path: is-ta/train-* - config_name: is-th data_files: - split: train path: is-th/train-* - config_name: is-tl data_files: - split: train path: is-tl/train-* - config_name: is-tr data_files: - split: train path: is-tr/train-* - config_name: is-uk data_files: - split: train path: is-uk/train-* - config_name: is-ur data_files: - split: train path: is-ur/train-* - config_name: is-vi data_files: - split: train path: is-vi/train-* - config_name: it-af data_files: - split: train path: it-af/train-* - config_name: it-ar data_files: - split: train path: it-ar/train-* - config_name: it-bg data_files: - split: train path: it-bg/train-* - config_name: it-bn data_files: - split: train path: it-bn/train-* - config_name: it-bs data_files: - split: train path: it-bs/train-* - config_name: it-ca data_files: - split: train path: it-ca/train-* - config_name: it-cs data_files: - split: train path: it-cs/train-* - config_name: it-da data_files: - split: train path: it-da/train-* - config_name: it-de data_files: - split: train path: it-de/train-* - config_name: it-el data_files: - split: train path: it-el/train-* - config_name: it-en data_files: - split: train path: it-en/train-* - config_name: it-es data_files: - split: train path: it-es/train-* - config_name: it-et data_files: - split: train path: it-et/train-* - config_name: it-eu data_files: - split: train path: it-eu/train-* - config_name: it-fa data_files: - split: train path: it-fa/train-* - config_name: it-fi data_files: - split: train path: it-fi/train-* - config_name: it-fr data_files: - split: train path: it-fr/train-* - config_name: it-gl data_files: - split: train path: it-gl/train-* - config_name: it-he data_files: - split: train path: it-he/train-* - config_name: it-hi data_files: - split: train path: it-hi/train-* - config_name: it-hr data_files: - split: train path: it-hr/train-* - config_name: it-hu data_files: - split: train path: it-hu/train-* - config_name: it-hy data_files: - split: train path: it-hy/train-* - config_name: it-id data_files: - split: train path: it-id/train-* - config_name: it-is data_files: - split: train path: it-is/train-* - config_name: it-ja data_files: - split: train path: it-ja/train-* - config_name: it-ka data_files: - split: train path: it-ka/train-* - config_name: it-kk data_files: - split: train path: it-kk/train-* - config_name: it-ko data_files: - split: train path: it-ko/train-* - config_name: it-lt data_files: - split: train path: it-lt/train-* - config_name: it-lv data_files: - split: train path: it-lv/train-* - config_name: it-mk data_files: - split: train path: it-mk/train-* - config_name: it-ml data_files: - split: train path: it-ml/train-* - config_name: it-ms data_files: - split: train path: it-ms/train-* - config_name: it-nl data_files: - split: train path: it-nl/train-* - config_name: it-no data_files: - split: train path: it-no/train-* - config_name: it-pl data_files: - split: train path: it-pl/train-* - config_name: it-pt data_files: - split: train path: it-pt/train-* - config_name: it-ro data_files: - split: train path: it-ro/train-* - config_name: it-ru data_files: - split: train path: it-ru/train-* - config_name: it-si data_files: - split: train path: it-si/train-* - config_name: it-sk data_files: - split: train path: it-sk/train-* - config_name: it-sl data_files: - split: train path: it-sl/train-* - config_name: it-sq data_files: - split: train path: it-sq/train-* - config_name: it-sr data_files: - split: train path: it-sr/train-* - config_name: it-sv data_files: - split: train path: it-sv/train-* - config_name: it-ta data_files: - split: train path: it-ta/train-* - config_name: it-te data_files: - split: train path: it-te/train-* - config_name: it-th data_files: - split: train path: it-th/train-* - config_name: it-tl data_files: - split: train path: it-tl/train-* - config_name: it-tr data_files: - split: train path: it-tr/train-* - config_name: it-uk data_files: - split: train path: it-uk/train-* - config_name: it-ur data_files: - split: train path: it-ur/train-* - config_name: it-vi data_files: - split: train path: it-vi/train-* - config_name: ja-af data_files: - split: train path: ja-af/train-* - config_name: ja-ar data_files: - split: train path: ja-ar/train-* - config_name: ja-bg data_files: - split: train path: ja-bg/train-* - config_name: ja-bn data_files: - split: train path: ja-bn/train-* - config_name: ja-bs data_files: - split: train path: ja-bs/train-* - config_name: ja-ca data_files: - split: train path: ja-ca/train-* - config_name: ja-cs data_files: - split: train path: ja-cs/train-* - config_name: ja-da data_files: - split: train path: ja-da/train-* - config_name: ja-de data_files: - split: train path: ja-de/train-* - config_name: ja-el data_files: - split: train path: ja-el/train-* - config_name: ja-en data_files: - split: train path: ja-en/train-* - config_name: ja-es data_files: - split: train path: ja-es/train-* - config_name: ja-et data_files: - split: train path: ja-et/train-* - config_name: ja-eu data_files: - split: train path: ja-eu/train-* - config_name: ja-fa data_files: - split: train path: ja-fa/train-* - config_name: ja-fi data_files: - split: train path: ja-fi/train-* - config_name: ja-fr data_files: - split: train path: ja-fr/train-* - config_name: ja-gl data_files: - split: train path: ja-gl/train-* - config_name: ja-he data_files: - split: train path: ja-he/train-* - config_name: ja-hi data_files: - split: train path: ja-hi/train-* - config_name: ja-hr data_files: - split: train path: ja-hr/train-* - config_name: ja-hu data_files: - split: train path: ja-hu/train-* - config_name: ja-id data_files: - split: train path: ja-id/train-* - config_name: ja-is data_files: - split: train path: ja-is/train-* - config_name: ja-it data_files: - split: train path: ja-it/train-* - config_name: ja-ka data_files: - split: train path: ja-ka/train-* - config_name: ja-kk data_files: - split: train path: ja-kk/train-* - config_name: ja-ko data_files: - split: train path: ja-ko/train-* - config_name: ja-lt data_files: - split: train path: ja-lt/train-* - config_name: ja-lv data_files: - split: train path: ja-lv/train-* - config_name: ja-mk data_files: - split: train path: ja-mk/train-* - config_name: ja-ml data_files: - split: train path: ja-ml/train-* - config_name: ja-ms data_files: - split: train path: ja-ms/train-* - config_name: ja-nl data_files: - split: train path: ja-nl/train-* - config_name: ja-no data_files: - split: train path: ja-no/train-* - config_name: ja-pl data_files: - split: train path: ja-pl/train-* - config_name: ja-pt data_files: - split: train path: ja-pt/train-* - config_name: ja-ro data_files: - split: train path: ja-ro/train-* - config_name: ja-ru data_files: - split: train path: ja-ru/train-* - config_name: ja-si data_files: - split: train path: ja-si/train-* - config_name: ja-sk data_files: - split: train path: ja-sk/train-* - config_name: ja-sl data_files: - split: train path: ja-sl/train-* - config_name: ja-sq data_files: - split: train path: ja-sq/train-* - config_name: ja-sr data_files: - split: train path: ja-sr/train-* - config_name: ja-sv data_files: - split: train path: ja-sv/train-* - config_name: ja-ta data_files: - split: train path: ja-ta/train-* - config_name: ja-te data_files: - split: train path: ja-te/train-* - config_name: ja-th data_files: - split: train path: ja-th/train-* - config_name: ja-tl data_files: - split: train path: ja-tl/train-* - config_name: ja-tr data_files: - split: train path: ja-tr/train-* - config_name: ja-uk data_files: - split: train path: ja-uk/train-* - config_name: ja-ur data_files: - split: train path: ja-ur/train-* - config_name: ja-vi data_files: - split: train path: ja-vi/train-* - config_name: ka-ar data_files: - split: train path: ka-ar/train-* - config_name: ka-bg data_files: - split: train path: ka-bg/train-* - config_name: ka-bn data_files: - split: train path: ka-bn/train-* - config_name: ka-bs data_files: - split: train path: ka-bs/train-* - config_name: ka-ca data_files: - split: train path: ka-ca/train-* - config_name: ka-cs data_files: - split: train path: ka-cs/train-* - config_name: ka-da data_files: - split: train path: ka-da/train-* - config_name: ka-de data_files: - split: train path: ka-de/train-* - config_name: ka-el data_files: - split: train path: ka-el/train-* - config_name: ka-en data_files: - split: train path: ka-en/train-* - config_name: ka-es data_files: - split: train path: ka-es/train-* - config_name: ka-et data_files: - split: train path: ka-et/train-* - config_name: ka-eu data_files: - split: train path: ka-eu/train-* - config_name: ka-fa data_files: - split: train path: ka-fa/train-* - config_name: ka-fi data_files: - split: train path: ka-fi/train-* - config_name: ka-fr data_files: - split: train path: ka-fr/train-* - config_name: ka-gl data_files: - split: train path: ka-gl/train-* - config_name: ka-he data_files: - split: train path: ka-he/train-* - config_name: ka-hi data_files: - split: train path: ka-hi/train-* - config_name: ka-hr data_files: - split: train path: ka-hr/train-* - config_name: ka-hu data_files: - split: train path: ka-hu/train-* - config_name: ka-id data_files: - split: train path: ka-id/train-* - config_name: ka-is data_files: - split: train path: ka-is/train-* - config_name: ka-it data_files: - split: train path: ka-it/train-* - config_name: ka-ja data_files: - split: train path: ka-ja/train-* - config_name: ka-ko data_files: - split: train path: ka-ko/train-* - config_name: ka-lt data_files: - split: train path: ka-lt/train-* - config_name: ka-lv data_files: - split: train path: ka-lv/train-* - config_name: ka-mk data_files: - split: train path: ka-mk/train-* - config_name: ka-ml data_files: - split: train path: ka-ml/train-* - config_name: ka-ms data_files: - split: train path: ka-ms/train-* - config_name: ka-nl data_files: - split: train path: ka-nl/train-* - config_name: ka-no data_files: - split: train path: ka-no/train-* - config_name: ka-pl data_files: - split: train path: ka-pl/train-* - config_name: ka-pt data_files: - split: train path: ka-pt/train-* - config_name: ka-ro data_files: - split: train path: ka-ro/train-* - config_name: ka-ru data_files: - split: train path: ka-ru/train-* - config_name: ka-si data_files: - split: train path: ka-si/train-* - config_name: ka-sk data_files: - split: train path: ka-sk/train-* - config_name: ka-sl data_files: - split: train path: ka-sl/train-* - config_name: ka-sq data_files: - split: train path: ka-sq/train-* - config_name: ka-sr data_files: - split: train path: ka-sr/train-* - config_name: ka-sv data_files: - split: train path: ka-sv/train-* - config_name: ka-th data_files: - split: train path: ka-th/train-* - config_name: ka-tl data_files: - split: train path: ka-tl/train-* - config_name: ka-tr data_files: - split: train path: ka-tr/train-* - config_name: ka-uk data_files: - split: train path: ka-uk/train-* - config_name: ka-ur data_files: - split: train path: ka-ur/train-* - config_name: ka-vi data_files: - split: train path: ka-vi/train-* - config_name: kk-ar data_files: - split: train path: kk-ar/train-* - config_name: kk-bg data_files: - split: train path: kk-bg/train-* - config_name: kk-bs data_files: - split: train path: kk-bs/train-* - config_name: kk-cs data_files: - split: train path: kk-cs/train-* - config_name: kk-da data_files: - split: train path: kk-da/train-* - config_name: kk-de data_files: - split: train path: kk-de/train-* - config_name: kk-el data_files: - split: train path: kk-el/train-* - config_name: kk-en data_files: - split: train path: kk-en/train-* - config_name: kk-es data_files: - split: train path: kk-es/train-* - config_name: kk-et data_files: - split: train path: kk-et/train-* - config_name: kk-fa data_files: - split: train path: kk-fa/train-* - config_name: kk-fi data_files: - split: train path: kk-fi/train-* - config_name: kk-fr data_files: - split: train path: kk-fr/train-* - config_name: kk-he data_files: - split: train path: kk-he/train-* - config_name: kk-hr data_files: - split: train path: kk-hr/train-* - config_name: kk-hu data_files: - split: train path: kk-hu/train-* - config_name: kk-id data_files: - split: train path: kk-id/train-* - config_name: kk-is data_files: - split: train path: kk-is/train-* - config_name: kk-it data_files: - split: train path: kk-it/train-* - config_name: kk-ja data_files: - split: train path: kk-ja/train-* - config_name: kk-lt data_files: - split: train path: kk-lt/train-* - config_name: kk-lv data_files: - split: train path: kk-lv/train-* - config_name: kk-ms data_files: - split: train path: kk-ms/train-* - config_name: kk-nl data_files: - split: train path: kk-nl/train-* - config_name: kk-no data_files: - split: train path: kk-no/train-* - config_name: kk-pl data_files: - split: train path: kk-pl/train-* - config_name: kk-pt data_files: - split: train path: kk-pt/train-* - config_name: kk-ro data_files: - split: train path: kk-ro/train-* - config_name: kk-ru data_files: - split: train path: kk-ru/train-* - config_name: kk-sk data_files: - split: train path: kk-sk/train-* - config_name: kk-sl data_files: - split: train path: kk-sl/train-* - config_name: kk-sr data_files: - split: train path: kk-sr/train-* - config_name: kk-sv data_files: - split: train path: kk-sv/train-* - config_name: kk-th data_files: - split: train path: kk-th/train-* - config_name: kk-tr data_files: - split: train path: kk-tr/train-* - config_name: kk-uk data_files: - split: train path: kk-uk/train-* - config_name: kk-vi data_files: - split: train path: kk-vi/train-* - config_name: ko-ar data_files: - split: train path: ko-ar/train-* - config_name: ko-bg data_files: - split: train path: ko-bg/train-* - config_name: ko-bn data_files: - split: train path: ko-bn/train-* - config_name: ko-bs data_files: - split: train path: ko-bs/train-* - config_name: ko-ca data_files: - split: train path: ko-ca/train-* - config_name: ko-cs data_files: - split: train path: ko-cs/train-* - config_name: ko-da data_files: - split: train path: ko-da/train-* - config_name: ko-de data_files: - split: train path: ko-de/train-* - config_name: ko-el data_files: - split: train path: ko-el/train-* - config_name: ko-en data_files: - split: train path: ko-en/train-* - config_name: ko-es data_files: - split: train path: ko-es/train-* - config_name: ko-et data_files: - split: train path: ko-et/train-* - config_name: ko-eu data_files: - split: train path: ko-eu/train-* - config_name: ko-fa data_files: - split: train path: ko-fa/train-* - config_name: ko-fi data_files: - split: train path: ko-fi/train-* - config_name: ko-fr data_files: - split: train path: ko-fr/train-* - config_name: ko-gl data_files: - split: train path: ko-gl/train-* - config_name: ko-he data_files: - split: train path: ko-he/train-* - config_name: ko-hi data_files: - split: train path: ko-hi/train-* - config_name: ko-hr data_files: - split: train path: ko-hr/train-* - config_name: ko-hu data_files: - split: train path: ko-hu/train-* - config_name: ko-id data_files: - split: train path: ko-id/train-* - config_name: ko-is data_files: - split: train path: ko-is/train-* - config_name: ko-it data_files: - split: train path: ko-it/train-* - config_name: ko-ja data_files: - split: train path: ko-ja/train-* - config_name: ko-ka data_files: - split: train path: ko-ka/train-* - config_name: ko-lt data_files: - split: train path: ko-lt/train-* - config_name: ko-lv data_files: - split: train path: ko-lv/train-* - config_name: ko-mk data_files: - split: train path: ko-mk/train-* - config_name: ko-ml data_files: - split: train path: ko-ml/train-* - config_name: ko-ms data_files: - split: train path: ko-ms/train-* - config_name: ko-nl data_files: - split: train path: ko-nl/train-* - config_name: ko-no data_files: - split: train path: ko-no/train-* - config_name: ko-pl data_files: - split: train path: ko-pl/train-* - config_name: ko-pt data_files: - split: train path: ko-pt/train-* - config_name: ko-ro data_files: - split: train path: ko-ro/train-* - config_name: ko-ru data_files: - split: train path: ko-ru/train-* - config_name: ko-si data_files: - split: train path: ko-si/train-* - config_name: ko-sk data_files: - split: train path: ko-sk/train-* - config_name: ko-sl data_files: - split: train path: ko-sl/train-* - config_name: ko-sq data_files: - split: train path: ko-sq/train-* - config_name: ko-sr data_files: - split: train path: ko-sr/train-* - config_name: ko-sv data_files: - split: train path: ko-sv/train-* - config_name: ko-ta data_files: - split: train path: ko-ta/train-* - config_name: ko-te data_files: - split: train path: ko-te/train-* - config_name: ko-th data_files: - split: train path: ko-th/train-* - config_name: ko-tl data_files: - split: train path: ko-tl/train-* - config_name: ko-tr data_files: - split: train path: ko-tr/train-* - config_name: ko-uk data_files: - split: train path: ko-uk/train-* - config_name: ko-ur data_files: - split: train path: ko-ur/train-* - config_name: ko-vi data_files: - split: train path: ko-vi/train-* - config_name: lt-af data_files: - split: train path: lt-af/train-* - config_name: lt-ar data_files: - split: train path: lt-ar/train-* - config_name: lt-bg data_files: - split: train path: lt-bg/train-* - config_name: lt-bn data_files: - split: train path: lt-bn/train-* - config_name: lt-bs data_files: - split: train path: lt-bs/train-* - config_name: lt-ca data_files: - split: train path: lt-ca/train-* - config_name: lt-cs data_files: - split: train path: lt-cs/train-* - config_name: lt-da data_files: - split: train path: lt-da/train-* - config_name: lt-de data_files: - split: train path: lt-de/train-* - config_name: lt-el data_files: - split: train path: lt-el/train-* - config_name: lt-en data_files: - split: train path: lt-en/train-* - config_name: lt-es data_files: - split: train path: lt-es/train-* - config_name: lt-et data_files: - split: train path: lt-et/train-* - config_name: lt-eu data_files: - split: train path: lt-eu/train-* - config_name: lt-fa data_files: - split: train path: lt-fa/train-* - config_name: lt-fi data_files: - split: train path: lt-fi/train-* - config_name: lt-fr data_files: - split: train path: lt-fr/train-* - config_name: lt-gl data_files: - split: train path: lt-gl/train-* - config_name: lt-he data_files: - split: train path: lt-he/train-* - config_name: lt-hi data_files: - split: train path: lt-hi/train-* - config_name: lt-hr data_files: - split: train path: lt-hr/train-* - config_name: lt-hu data_files: - split: train path: lt-hu/train-* - config_name: lt-id data_files: - split: train path: lt-id/train-* - config_name: lt-is data_files: - split: train path: lt-is/train-* - config_name: lt-it data_files: - split: train path: lt-it/train-* - config_name: lt-ja data_files: - split: train path: lt-ja/train-* - config_name: lt-ka data_files: - split: train path: lt-ka/train-* - config_name: lt-kk data_files: - split: train path: lt-kk/train-* - config_name: lt-ko data_files: - split: train path: lt-ko/train-* - config_name: lt-lv data_files: - split: train path: lt-lv/train-* - config_name: lt-mk data_files: - split: train path: lt-mk/train-* - config_name: lt-ml data_files: - split: train path: lt-ml/train-* - config_name: lt-ms data_files: - split: train path: lt-ms/train-* - config_name: lt-nl data_files: - split: train path: lt-nl/train-* - config_name: lt-no data_files: - split: train path: lt-no/train-* - config_name: lt-pl data_files: - split: train path: lt-pl/train-* - config_name: lt-pt data_files: - split: train path: lt-pt/train-* - config_name: lt-ro data_files: - split: train path: lt-ro/train-* - config_name: lt-ru data_files: - split: train path: lt-ru/train-* - config_name: lt-si data_files: - split: train path: lt-si/train-* - config_name: lt-sk data_files: - split: train path: lt-sk/train-* - config_name: lt-sl data_files: - split: train path: lt-sl/train-* - config_name: lt-sq data_files: - split: train path: lt-sq/train-* - config_name: lt-sr data_files: - split: train path: lt-sr/train-* - config_name: lt-sv data_files: - split: train path: lt-sv/train-* - config_name: lt-ta data_files: - split: train path: lt-ta/train-* - config_name: lt-te data_files: - split: train path: lt-te/train-* - config_name: lt-th data_files: - split: train path: lt-th/train-* - config_name: lt-tl data_files: - split: train path: lt-tl/train-* - config_name: lt-tr data_files: - split: train path: lt-tr/train-* - config_name: lt-uk data_files: - split: train path: lt-uk/train-* - config_name: lt-ur data_files: - split: train path: lt-ur/train-* - config_name: lt-vi data_files: - split: train path: lt-vi/train-* - config_name: lv-af data_files: - split: train path: lv-af/train-* - config_name: lv-ar data_files: - split: train path: lv-ar/train-* - config_name: lv-bg data_files: - split: train path: lv-bg/train-* - config_name: lv-bn data_files: - split: train path: lv-bn/train-* - config_name: lv-bs data_files: - split: train path: lv-bs/train-* - config_name: lv-ca data_files: - split: train path: lv-ca/train-* - config_name: lv-cs data_files: - split: train path: lv-cs/train-* - config_name: lv-da data_files: - split: train path: lv-da/train-* - config_name: lv-de data_files: - split: train path: lv-de/train-* - config_name: lv-el data_files: - split: train path: lv-el/train-* - config_name: lv-en data_files: - split: train path: lv-en/train-* - config_name: lv-es data_files: - split: train path: lv-es/train-* - config_name: lv-et data_files: - split: train path: lv-et/train-* - config_name: lv-eu data_files: - split: train path: lv-eu/train-* - config_name: lv-fa data_files: - split: train path: lv-fa/train-* - config_name: lv-fi data_files: - split: train path: lv-fi/train-* - config_name: lv-fr data_files: - split: train path: lv-fr/train-* - config_name: lv-gl data_files: - split: train path: lv-gl/train-* - config_name: lv-he data_files: - split: train path: lv-he/train-* - config_name: lv-hi data_files: - split: train path: lv-hi/train-* - config_name: lv-hr data_files: - split: train path: lv-hr/train-* - config_name: lv-hu data_files: - split: train path: lv-hu/train-* - config_name: lv-id data_files: - split: train path: lv-id/train-* - config_name: lv-is data_files: - split: train path: lv-is/train-* - config_name: lv-it data_files: - split: train path: lv-it/train-* - config_name: lv-ja data_files: - split: train path: lv-ja/train-* - config_name: lv-ka data_files: - split: train path: lv-ka/train-* - config_name: lv-kk data_files: - split: train path: lv-kk/train-* - config_name: lv-ko data_files: - split: train path: lv-ko/train-* - config_name: lv-lt data_files: - split: train path: lv-lt/train-* - config_name: lv-mk data_files: - split: train path: lv-mk/train-* - config_name: lv-ml data_files: - split: train path: lv-ml/train-* - config_name: lv-ms data_files: - split: train path: lv-ms/train-* - config_name: lv-nl data_files: - split: train path: lv-nl/train-* - config_name: lv-no data_files: - split: train path: lv-no/train-* - config_name: lv-pl data_files: - split: train path: lv-pl/train-* - config_name: lv-pt data_files: - split: train path: lv-pt/train-* - config_name: lv-ro data_files: - split: train path: lv-ro/train-* - config_name: lv-ru data_files: - split: train path: lv-ru/train-* - config_name: lv-si data_files: - split: train path: lv-si/train-* - config_name: lv-sk data_files: - split: train path: lv-sk/train-* - config_name: lv-sl data_files: - split: train path: lv-sl/train-* - config_name: lv-sq data_files: - split: train path: lv-sq/train-* - config_name: lv-sr data_files: - split: train path: lv-sr/train-* - config_name: lv-sv data_files: - split: train path: lv-sv/train-* - config_name: lv-ta data_files: - split: train path: lv-ta/train-* - config_name: lv-te data_files: - split: train path: lv-te/train-* - config_name: lv-th data_files: - split: train path: lv-th/train-* - config_name: lv-tr data_files: - split: train path: lv-tr/train-* - config_name: lv-uk data_files: - split: train path: lv-uk/train-* - config_name: lv-ur data_files: - split: train path: lv-ur/train-* - config_name: lv-vi data_files: - split: train path: lv-vi/train-* - config_name: mk-af data_files: - split: train path: mk-af/train-* - config_name: mk-ar data_files: - split: train path: mk-ar/train-* - config_name: mk-bg data_files: - split: train path: mk-bg/train-* - config_name: mk-bn data_files: - split: train path: mk-bn/train-* - config_name: mk-bs data_files: - split: train path: mk-bs/train-* - config_name: mk-ca data_files: - split: train path: mk-ca/train-* - config_name: mk-cs data_files: - split: train path: mk-cs/train-* - config_name: mk-da data_files: - split: train path: mk-da/train-* - config_name: mk-de data_files: - split: train path: mk-de/train-* - config_name: mk-el data_files: - split: train path: mk-el/train-* - config_name: mk-en data_files: - split: train path: mk-en/train-* - config_name: mk-es data_files: - split: train path: mk-es/train-* - config_name: mk-et data_files: - split: train path: mk-et/train-* - config_name: mk-eu data_files: - split: train path: mk-eu/train-* - config_name: mk-fa data_files: - split: train path: mk-fa/train-* - config_name: mk-fi data_files: - split: train path: mk-fi/train-* - config_name: mk-fr data_files: - split: train path: mk-fr/train-* - config_name: mk-gl data_files: - split: train path: mk-gl/train-* - config_name: mk-he data_files: - split: train path: mk-he/train-* - config_name: mk-hi data_files: - split: train path: mk-hi/train-* - config_name: mk-hr data_files: - split: train path: mk-hr/train-* - config_name: mk-hu data_files: - split: train path: mk-hu/train-* - config_name: mk-hy data_files: - split: train path: mk-hy/train-* - config_name: mk-id data_files: - split: train path: mk-id/train-* - config_name: mk-is data_files: - split: train path: mk-is/train-* - config_name: mk-it data_files: - split: train path: mk-it/train-* - config_name: mk-ja data_files: - split: train path: mk-ja/train-* - config_name: mk-ka data_files: - split: train path: mk-ka/train-* - config_name: mk-ko data_files: - split: train path: mk-ko/train-* - config_name: mk-lt data_files: - split: train path: mk-lt/train-* - config_name: mk-lv data_files: - split: train path: mk-lv/train-* - config_name: mk-ml data_files: - split: train path: mk-ml/train-* - config_name: mk-ms data_files: - split: train path: mk-ms/train-* - config_name: mk-nl data_files: - split: train path: mk-nl/train-* - config_name: mk-no data_files: - split: train path: mk-no/train-* - config_name: mk-pl data_files: - split: train path: mk-pl/train-* - config_name: mk-pt data_files: - split: train path: mk-pt/train-* - config_name: mk-ro data_files: - split: train path: mk-ro/train-* - config_name: mk-ru data_files: - split: train path: mk-ru/train-* - config_name: mk-si data_files: - split: train path: mk-si/train-* - config_name: mk-sk data_files: - split: train path: mk-sk/train-* - config_name: mk-sl data_files: - split: train path: mk-sl/train-* - config_name: mk-sq data_files: - split: train path: mk-sq/train-* - config_name: mk-sr data_files: - split: train path: mk-sr/train-* - config_name: mk-sv data_files: - split: train path: mk-sv/train-* - config_name: mk-ta data_files: - split: train path: mk-ta/train-* - config_name: mk-te data_files: - split: train path: mk-te/train-* - config_name: mk-th data_files: - split: train path: mk-th/train-* - config_name: mk-tl data_files: - split: train path: mk-tl/train-* - config_name: mk-tr data_files: - split: train path: mk-tr/train-* - config_name: mk-uk data_files: - split: train path: mk-uk/train-* - config_name: mk-ur data_files: - split: train path: mk-ur/train-* - config_name: mk-vi data_files: - split: train path: mk-vi/train-* - config_name: ml-af data_files: - split: train path: ml-af/train-* - config_name: ml-ar data_files: - split: train path: ml-ar/train-* - config_name: ml-bg data_files: - split: train path: ml-bg/train-* - config_name: ml-bn data_files: - split: train path: ml-bn/train-* - config_name: ml-bs data_files: - split: train path: ml-bs/train-* - config_name: ml-ca data_files: - split: train path: ml-ca/train-* - config_name: ml-cs data_files: - split: train path: ml-cs/train-* - config_name: ml-da data_files: - split: train path: ml-da/train-* - config_name: ml-de data_files: - split: train path: ml-de/train-* - config_name: ml-el data_files: - split: train path: ml-el/train-* - config_name: ml-en data_files: - split: train path: ml-en/train-* - config_name: ml-es data_files: - split: train path: ml-es/train-* - config_name: ml-et data_files: - split: train path: ml-et/train-* - config_name: ml-eu data_files: - split: train path: ml-eu/train-* - config_name: ml-fa data_files: - split: train path: ml-fa/train-* - config_name: ml-fi data_files: - split: train path: ml-fi/train-* - config_name: ml-fr data_files: - split: train path: ml-fr/train-* - config_name: ml-gl data_files: - split: train path: ml-gl/train-* - config_name: ml-he data_files: - split: train path: ml-he/train-* - config_name: ml-hi data_files: - split: train path: ml-hi/train-* - config_name: ml-hr data_files: - split: train path: ml-hr/train-* - config_name: ml-hu data_files: - split: train path: ml-hu/train-* - config_name: ml-hy data_files: - split: train path: ml-hy/train-* - config_name: ml-id data_files: - split: train path: ml-id/train-* - config_name: ml-is data_files: - split: train path: ml-is/train-* - config_name: ml-it data_files: - split: train path: ml-it/train-* - config_name: ml-ja data_files: - split: train path: ml-ja/train-* - config_name: ml-ka data_files: - split: train path: ml-ka/train-* - config_name: ml-ko data_files: - split: train path: ml-ko/train-* - config_name: ml-lt data_files: - split: train path: ml-lt/train-* - config_name: ml-lv data_files: - split: train path: ml-lv/train-* - config_name: ml-mk data_files: - split: train path: ml-mk/train-* - config_name: ml-ms data_files: - split: train path: ml-ms/train-* - config_name: ml-nl data_files: - split: train path: ml-nl/train-* - config_name: ml-no data_files: - split: train path: ml-no/train-* - config_name: ml-pl data_files: - split: train path: ml-pl/train-* - config_name: ml-pt data_files: - split: train path: ml-pt/train-* - config_name: ml-ro data_files: - split: train path: ml-ro/train-* - config_name: ml-ru data_files: - split: train path: ml-ru/train-* - config_name: ml-si data_files: - split: train path: ml-si/train-* - config_name: ml-sk data_files: - split: train path: ml-sk/train-* - config_name: ml-sl data_files: - split: train path: ml-sl/train-* - config_name: ml-sq data_files: - split: train path: ml-sq/train-* - config_name: ml-sr data_files: - split: train path: ml-sr/train-* - config_name: ml-sv data_files: - split: train path: ml-sv/train-* - config_name: ml-ta data_files: - split: train path: ml-ta/train-* - config_name: ml-th data_files: - split: train path: ml-th/train-* - config_name: ml-tl data_files: - split: train path: ml-tl/train-* - config_name: ml-tr data_files: - split: train path: ml-tr/train-* - config_name: ml-uk data_files: - split: train path: ml-uk/train-* - config_name: ml-ur data_files: - split: train path: ml-ur/train-* - config_name: ml-vi data_files: - split: train path: ml-vi/train-* - config_name: ms-af data_files: - split: train path: ms-af/train-* - config_name: ms-ar data_files: - split: train path: ms-ar/train-* - config_name: ms-bg data_files: - split: train path: ms-bg/train-* - config_name: ms-bn data_files: - split: train path: ms-bn/train-* - config_name: ms-bs data_files: - split: train path: ms-bs/train-* - config_name: ms-ca data_files: - split: train path: ms-ca/train-* - config_name: ms-cs data_files: - split: train path: ms-cs/train-* - config_name: ms-da data_files: - split: train path: ms-da/train-* - config_name: ms-de data_files: - split: train path: ms-de/train-* - config_name: ms-el data_files: - split: train path: ms-el/train-* - config_name: ms-en data_files: - split: train path: ms-en/train-* - config_name: ms-es data_files: - split: train path: ms-es/train-* - config_name: ms-et data_files: - split: train path: ms-et/train-* - config_name: ms-eu data_files: - split: train path: ms-eu/train-* - config_name: ms-fa data_files: - split: train path: ms-fa/train-* - config_name: ms-fi data_files: - split: train path: ms-fi/train-* - config_name: ms-fr data_files: - split: train path: ms-fr/train-* - config_name: ms-gl data_files: - split: train path: ms-gl/train-* - config_name: ms-he data_files: - split: train path: ms-he/train-* - config_name: ms-hi data_files: - split: train path: ms-hi/train-* - config_name: ms-hr data_files: - split: train path: ms-hr/train-* - config_name: ms-hu data_files: - split: train path: ms-hu/train-* - config_name: ms-id data_files: - split: train path: ms-id/train-* - config_name: ms-is data_files: - split: train path: ms-is/train-* - config_name: ms-it data_files: - split: train path: ms-it/train-* - config_name: ms-ja data_files: - split: train path: ms-ja/train-* - config_name: ms-ka data_files: - split: train path: ms-ka/train-* - config_name: ms-kk data_files: - split: train path: ms-kk/train-* - config_name: ms-ko data_files: - split: train path: ms-ko/train-* - config_name: ms-lt data_files: - split: train path: ms-lt/train-* - config_name: ms-lv data_files: - split: train path: ms-lv/train-* - config_name: ms-mk data_files: - split: train path: ms-mk/train-* - config_name: ms-ml data_files: - split: train path: ms-ml/train-* - config_name: ms-nl data_files: - split: train path: ms-nl/train-* - config_name: ms-no data_files: - split: train path: ms-no/train-* - config_name: ms-pl data_files: - split: train path: ms-pl/train-* - config_name: ms-pt data_files: - split: train path: ms-pt/train-* - config_name: ms-ro data_files: - split: train path: ms-ro/train-* - config_name: ms-ru data_files: - split: train path: ms-ru/train-* - config_name: ms-si data_files: - split: train path: ms-si/train-* - config_name: ms-sk data_files: - split: train path: ms-sk/train-* - config_name: ms-sl data_files: - split: train path: ms-sl/train-* - config_name: ms-sq data_files: - split: train path: ms-sq/train-* - config_name: ms-sr data_files: - split: train path: ms-sr/train-* - config_name: ms-sv data_files: - split: train path: ms-sv/train-* - config_name: ms-ta data_files: - split: train path: ms-ta/train-* - config_name: ms-te data_files: - split: train path: ms-te/train-* - config_name: ms-th data_files: - split: train path: ms-th/train-* - config_name: ms-tl data_files: - split: train path: ms-tl/train-* - config_name: ms-tr data_files: - split: train path: ms-tr/train-* - config_name: ms-uk data_files: - split: train path: ms-uk/train-* - config_name: ms-ur data_files: - split: train path: ms-ur/train-* - config_name: ms-vi data_files: - split: train path: ms-vi/train-* - config_name: nl-af data_files: - split: train path: nl-af/train-* - config_name: nl-ar data_files: - split: train path: nl-ar/train-* - config_name: nl-bg data_files: - split: train path: nl-bg/train-* - config_name: nl-bn data_files: - split: train path: nl-bn/train-* - config_name: nl-bs data_files: - split: train path: nl-bs/train-* - config_name: nl-ca data_files: - split: train path: nl-ca/train-* - config_name: nl-cs data_files: - split: train path: nl-cs/train-* - config_name: nl-da data_files: - split: train path: nl-da/train-* - config_name: nl-de data_files: - split: train path: nl-de/train-* - config_name: nl-el data_files: - split: train path: nl-el/train-* - config_name: nl-en data_files: - split: train path: nl-en/train-* - config_name: nl-es data_files: - split: train path: nl-es/train-* - config_name: nl-et data_files: - split: train path: nl-et/train-* - config_name: nl-eu data_files: - split: train path: nl-eu/train-* - config_name: nl-fa data_files: - split: train path: nl-fa/train-* - config_name: nl-fi data_files: - split: train path: nl-fi/train-* - config_name: nl-fr data_files: - split: train path: nl-fr/train-* - config_name: nl-gl data_files: - split: train path: nl-gl/train-* - config_name: nl-he data_files: - split: train path: nl-he/train-* - config_name: nl-hi data_files: - split: train path: nl-hi/train-* - config_name: nl-hr data_files: - split: train path: nl-hr/train-* - config_name: nl-hu data_files: - split: train path: nl-hu/train-* - config_name: nl-hy data_files: - split: train path: nl-hy/train-* - config_name: nl-id data_files: - split: train path: nl-id/train-* - config_name: nl-is data_files: - split: train path: nl-is/train-* - config_name: nl-it data_files: - split: train path: nl-it/train-* - config_name: nl-ja data_files: - split: train path: nl-ja/train-* - config_name: nl-ka data_files: - split: train path: nl-ka/train-* - config_name: nl-kk data_files: - split: train path: nl-kk/train-* - config_name: nl-ko data_files: - split: train path: nl-ko/train-* - config_name: nl-lt data_files: - split: train path: nl-lt/train-* - config_name: nl-lv data_files: - split: train path: nl-lv/train-* - config_name: nl-mk data_files: - split: train path: nl-mk/train-* - config_name: nl-ml data_files: - split: train path: nl-ml/train-* - config_name: nl-ms data_files: - split: train path: nl-ms/train-* - config_name: nl-no data_files: - split: train path: nl-no/train-* - config_name: nl-pl data_files: - split: train path: nl-pl/train-* - config_name: nl-pt data_files: - split: train path: nl-pt/train-* - config_name: nl-ro data_files: - split: train path: nl-ro/train-* - config_name: nl-ru data_files: - split: train path: nl-ru/train-* - config_name: nl-si data_files: - split: train path: nl-si/train-* - config_name: nl-sk data_files: - split: train path: nl-sk/train-* - config_name: nl-sl data_files: - split: train path: nl-sl/train-* - config_name: nl-sq data_files: - split: train path: nl-sq/train-* - config_name: nl-sr data_files: - split: train path: nl-sr/train-* - config_name: nl-sv data_files: - split: train path: nl-sv/train-* - config_name: nl-ta data_files: - split: train path: nl-ta/train-* - config_name: nl-te data_files: - split: train path: nl-te/train-* - config_name: nl-th data_files: - split: train path: nl-th/train-* - config_name: nl-tl data_files: - split: train path: nl-tl/train-* - config_name: nl-tr data_files: - split: train path: nl-tr/train-* - config_name: nl-uk data_files: - split: train path: nl-uk/train-* - config_name: nl-ur data_files: - split: train path: nl-ur/train-* - config_name: nl-vi data_files: - split: train path: nl-vi/train-* - config_name: no-af data_files: - split: train path: no-af/train-* - config_name: no-ar data_files: - split: train path: no-ar/train-* - config_name: no-bg data_files: - split: train path: no-bg/train-* - config_name: no-bn data_files: - split: train path: no-bn/train-* - config_name: no-bs data_files: - split: train path: no-bs/train-* - config_name: no-ca data_files: - split: train path: no-ca/train-* - config_name: no-cs data_files: - split: train path: no-cs/train-* - config_name: no-da data_files: - split: train path: no-da/train-* - config_name: no-de data_files: - split: train path: no-de/train-* - config_name: no-el data_files: - split: train path: no-el/train-* - config_name: no-en data_files: - split: train path: no-en/train-* - config_name: no-es data_files: - split: train path: no-es/train-* - config_name: no-et data_files: - split: train path: no-et/train-* - config_name: no-eu data_files: - split: train path: no-eu/train-* - config_name: no-fa data_files: - split: train path: no-fa/train-* - config_name: no-fi data_files: - split: train path: no-fi/train-* - config_name: no-fr data_files: - split: train path: no-fr/train-* - config_name: no-gl data_files: - split: train path: no-gl/train-* - config_name: no-he data_files: - split: train path: no-he/train-* - config_name: no-hi data_files: - split: train path: no-hi/train-* - config_name: no-hr data_files: - split: train path: no-hr/train-* - config_name: no-hu data_files: - split: train path: no-hu/train-* - config_name: no-is data_files: - split: train path: no-is/train-* - config_name: no-it data_files: - split: train path: no-it/train-* - config_name: no-ja data_files: - split: train path: no-ja/train-* - config_name: no-ka data_files: - split: train path: no-ka/train-* - config_name: no-kk data_files: - split: train path: no-kk/train-* - config_name: no-ko data_files: - split: train path: no-ko/train-* - config_name: no-lt data_files: - split: train path: no-lt/train-* - config_name: no-lv data_files: - split: train path: no-lv/train-* - config_name: no-mk data_files: - split: train path: no-mk/train-* - config_name: no-ml data_files: - split: train path: no-ml/train-* - config_name: no-ms data_files: - split: train path: no-ms/train-* - config_name: no-nl data_files: - split: train path: no-nl/train-* - config_name: no-pl data_files: - split: train path: no-pl/train-* - config_name: no-pt data_files: - split: train path: no-pt/train-* - config_name: no-ro data_files: - split: train path: no-ro/train-* - config_name: no-ru data_files: - split: train path: no-ru/train-* - config_name: no-si data_files: - split: train path: no-si/train-* - config_name: no-sk data_files: - split: train path: no-sk/train-* - config_name: no-sl data_files: - split: train path: no-sl/train-* - config_name: no-sq data_files: - split: train path: no-sq/train-* - config_name: no-sr data_files: - split: train path: no-sr/train-* - config_name: no-sv data_files: - split: train path: no-sv/train-* - config_name: no-ta data_files: - split: train path: no-ta/train-* - config_name: no-te data_files: - split: train path: no-te/train-* - config_name: no-th data_files: - split: train path: no-th/train-* - config_name: no-tl data_files: - split: train path: no-tl/train-* - config_name: no-tr data_files: - split: train path: no-tr/train-* - config_name: no-uk data_files: - split: train path: no-uk/train-* - config_name: no-ur data_files: - split: train path: no-ur/train-* - config_name: no-vi data_files: - split: train path: no-vi/train-* - config_name: pl-af data_files: - split: train path: pl-af/train-* - config_name: pl-ar data_files: - split: train path: pl-ar/train-* - config_name: pl-bg data_files: - split: train path: pl-bg/train-* - config_name: pl-bn data_files: - split: train path: pl-bn/train-* - config_name: pl-bs data_files: - split: train path: pl-bs/train-* - config_name: pl-ca data_files: - split: train path: pl-ca/train-* - config_name: pl-cs data_files: - split: train path: pl-cs/train-* - config_name: pl-da data_files: - split: train path: pl-da/train-* - config_name: pl-de data_files: - split: train path: pl-de/train-* - config_name: pl-el data_files: - split: train path: pl-el/train-* - config_name: pl-en data_files: - split: train path: pl-en/train-* - config_name: pl-es data_files: - split: train path: pl-es/train-* - config_name: pl-et data_files: - split: train path: pl-et/train-* - config_name: pl-eu data_files: - split: train path: pl-eu/train-* - config_name: pl-fa data_files: - split: train path: pl-fa/train-* - config_name: pl-fi data_files: - split: train path: pl-fi/train-* - config_name: pl-fr data_files: - split: train path: pl-fr/train-* - config_name: pl-gl data_files: - split: train path: pl-gl/train-* - config_name: pl-he data_files: - split: train path: pl-he/train-* - config_name: pl-hi data_files: - split: train path: pl-hi/train-* - config_name: pl-hr data_files: - split: train path: pl-hr/train-* - config_name: pl-hu data_files: - split: train path: pl-hu/train-* - config_name: pl-hy data_files: - split: train path: pl-hy/train-* - config_name: pl-id data_files: - split: train path: pl-id/train-* - config_name: pl-is data_files: - split: train path: pl-is/train-* - config_name: pl-it data_files: - split: train path: pl-it/train-* - config_name: pl-ja data_files: - split: train path: pl-ja/train-* - config_name: pl-ka data_files: - split: train path: pl-ka/train-* - config_name: pl-kk data_files: - split: train path: pl-kk/train-* - config_name: pl-ko data_files: - split: train path: pl-ko/train-* - config_name: pl-lt data_files: - split: train path: pl-lt/train-* - config_name: pl-lv data_files: - split: train path: pl-lv/train-* - config_name: pl-mk data_files: - split: train path: pl-mk/train-* - config_name: pl-ml data_files: - split: train path: pl-ml/train-* - config_name: pl-ms data_files: - split: train path: pl-ms/train-* - config_name: pl-nl data_files: - split: train path: pl-nl/train-* - config_name: pl-no data_files: - split: train path: pl-no/train-* - config_name: pl-pt data_files: - split: train path: pl-pt/train-* - config_name: pl-ro data_files: - split: train path: pl-ro/train-* - config_name: pl-ru data_files: - split: train path: pl-ru/train-* - config_name: pl-si data_files: - split: train path: pl-si/train-* - config_name: pl-sk data_files: - split: train path: pl-sk/train-* - config_name: pl-sl data_files: - split: train path: pl-sl/train-* - config_name: pl-sq data_files: - split: train path: pl-sq/train-* - config_name: pl-sr data_files: - split: train path: pl-sr/train-* - config_name: pl-sv data_files: - split: train path: pl-sv/train-* - config_name: pl-ta data_files: - split: train path: pl-ta/train-* - config_name: pl-te data_files: - split: train path: pl-te/train-* - config_name: pl-th data_files: - split: train path: pl-th/train-* - config_name: pl-tl data_files: - split: train path: pl-tl/train-* - config_name: pl-tr data_files: - split: train path: pl-tr/train-* - config_name: pl-uk data_files: - split: train path: pl-uk/train-* - config_name: pl-ur data_files: - split: train path: pl-ur/train-* - config_name: pl-vi data_files: - split: train path: pl-vi/train-* - config_name: pt-af data_files: - split: train path: pt-af/train-* - config_name: pt-ar data_files: - split: train path: pt-ar/train-* - config_name: pt-bg data_files: - split: train path: pt-bg/train-* - config_name: pt-bn data_files: - split: train path: pt-bn/train-* - config_name: pt-bs data_files: - split: train path: pt-bs/train-* - config_name: pt-ca data_files: - split: train path: pt-ca/train-* - config_name: pt-cs data_files: - split: train path: pt-cs/train-* - config_name: pt-da data_files: - split: train path: pt-da/train-* - config_name: pt-de data_files: - split: train path: pt-de/train-* - config_name: pt-el data_files: - split: train path: pt-el/train-* - config_name: pt-en data_files: - split: train path: pt-en/train-* - config_name: pt-es data_files: - split: train path: pt-es/train-* - config_name: pt-et data_files: - split: train path: pt-et/train-* - config_name: pt-eu data_files: - split: train path: pt-eu/train-* - config_name: pt-fa data_files: - split: train path: pt-fa/train-* - config_name: pt-fi data_files: - split: train path: pt-fi/train-* - config_name: pt-fr data_files: - split: train path: pt-fr/train-* - config_name: pt-gl data_files: - split: train path: pt-gl/train-* - config_name: pt-he data_files: - split: train path: pt-he/train-* - config_name: pt-hi data_files: - split: train path: pt-hi/train-* - config_name: pt-hr data_files: - split: train path: pt-hr/train-* - config_name: pt-hu data_files: - split: train path: pt-hu/train-* - config_name: pt-hy data_files: - split: train path: pt-hy/train-* - config_name: pt-id data_files: - split: train path: pt-id/train-* - config_name: pt-is data_files: - split: train path: pt-is/train-* - config_name: pt-it data_files: - split: train path: pt-it/train-* - config_name: pt-ja data_files: - split: train path: pt-ja/train-* - config_name: pt-ka data_files: - split: train path: pt-ka/train-* - config_name: pt-kk data_files: - split: train path: pt-kk/train-* - config_name: pt-ko data_files: - split: train path: pt-ko/train-* - config_name: pt-lt data_files: - split: train path: pt-lt/train-* - config_name: pt-lv data_files: - split: train path: pt-lv/train-* - config_name: pt-mk data_files: - split: train path: pt-mk/train-* - config_name: pt-ml data_files: - split: train path: pt-ml/train-* - config_name: pt-ms data_files: - split: train path: pt-ms/train-* - config_name: pt-nl data_files: - split: train path: pt-nl/train-* - config_name: pt-no data_files: - split: train path: pt-no/train-* - config_name: pt-pl data_files: - split: train path: pt-pl/train-* - config_name: pt-ro data_files: - split: train path: pt-ro/train-* - config_name: pt-ru data_files: - split: train path: pt-ru/train-* - config_name: pt-si data_files: - split: train path: pt-si/train-* - config_name: pt-sk data_files: - split: train path: pt-sk/train-* - config_name: pt-sl data_files: - split: train path: pt-sl/train-* - config_name: pt-sq data_files: - split: train path: pt-sq/train-* - config_name: pt-sr data_files: - split: train path: pt-sr/train-* - config_name: pt-sv data_files: - split: train path: pt-sv/train-* - config_name: pt-ta data_files: - split: train path: pt-ta/train-* - config_name: pt-te data_files: - split: train path: pt-te/train-* - config_name: pt-th data_files: - split: train path: pt-th/train-* - config_name: pt-tl data_files: - split: train path: pt-tl/train-* - config_name: pt-tr data_files: - split: train path: pt-tr/train-* - config_name: pt-uk data_files: - split: train path: pt-uk/train-* - config_name: pt-ur data_files: - split: train path: pt-ur/train-* - config_name: pt-vi data_files: - split: train path: pt-vi/train-* - config_name: ro-af data_files: - split: train path: ro-af/train-* - config_name: ro-ar data_files: - split: train path: ro-ar/train-* - config_name: ro-bg data_files: - split: train path: ro-bg/train-* - config_name: ro-bn data_files: - split: train path: ro-bn/train-* - config_name: ro-bs data_files: - split: train path: ro-bs/train-* - config_name: ro-ca data_files: - split: train path: ro-ca/train-* - config_name: ro-cs data_files: - split: train path: ro-cs/train-* - config_name: ro-da data_files: - split: train path: ro-da/train-* - config_name: ro-de data_files: - split: train path: ro-de/train-* - config_name: ro-el data_files: - split: train path: ro-el/train-* - config_name: ro-en data_files: - split: train path: ro-en/train-* - config_name: ro-es data_files: - split: train path: ro-es/train-* - config_name: ro-et data_files: - split: train path: ro-et/train-* - config_name: ro-eu data_files: - split: train path: ro-eu/train-* - config_name: ro-fa data_files: - split: train path: ro-fa/train-* - config_name: ro-fi data_files: - split: train path: ro-fi/train-* - config_name: ro-fr data_files: - split: train path: ro-fr/train-* - config_name: ro-gl data_files: - split: train path: ro-gl/train-* - config_name: ro-he data_files: - split: train path: ro-he/train-* - config_name: ro-hi data_files: - split: train path: ro-hi/train-* - config_name: ro-hr data_files: - split: train path: ro-hr/train-* - config_name: ro-hu data_files: - split: train path: ro-hu/train-* - config_name: ro-hy data_files: - split: train path: ro-hy/train-* - config_name: ro-id data_files: - split: train path: ro-id/train-* - config_name: ro-is data_files: - split: train path: ro-is/train-* - config_name: ro-it data_files: - split: train path: ro-it/train-* - config_name: ro-ja data_files: - split: train path: ro-ja/train-* - config_name: ro-ka data_files: - split: train path: ro-ka/train-* - config_name: ro-kk data_files: - split: train path: ro-kk/train-* - config_name: ro-ko data_files: - split: train path: ro-ko/train-* - config_name: ro-lt data_files: - split: train path: ro-lt/train-* - config_name: ro-lv data_files: - split: train path: ro-lv/train-* - config_name: ro-mk data_files: - split: train path: ro-mk/train-* - config_name: ro-ml data_files: - split: train path: ro-ml/train-* - config_name: ro-ms data_files: - split: train path: ro-ms/train-* - config_name: ro-nl data_files: - split: train path: ro-nl/train-* - config_name: ro-no data_files: - split: train path: ro-no/train-* - config_name: ro-pl data_files: - split: train path: ro-pl/train-* - config_name: ro-pt data_files: - split: train path: ro-pt/train-* - config_name: ro-ru data_files: - split: train path: ro-ru/train-* - config_name: ro-si data_files: - split: train path: ro-si/train-* - config_name: ro-sk data_files: - split: train path: ro-sk/train-* - config_name: ro-sl data_files: - split: train path: ro-sl/train-* - config_name: ro-sq data_files: - split: train path: ro-sq/train-* - config_name: ro-sr data_files: - split: train path: ro-sr/train-* - config_name: ro-sv data_files: - split: train path: ro-sv/train-* - config_name: ro-ta data_files: - split: train path: ro-ta/train-* - config_name: ro-te data_files: - split: train path: ro-te/train-* - config_name: ro-th data_files: - split: train path: ro-th/train-* - config_name: ro-tl data_files: - split: train path: ro-tl/train-* - config_name: ro-tr data_files: - split: train path: ro-tr/train-* - config_name: ro-uk data_files: - split: train path: ro-uk/train-* - config_name: ro-ur data_files: - split: train path: ro-ur/train-* - config_name: ro-vi data_files: - split: train path: ro-vi/train-* - config_name: ru-af data_files: - split: train path: ru-af/train-* - config_name: ru-ar data_files: - split: train path: ru-ar/train-* - config_name: ru-bg data_files: - split: train path: ru-bg/train-* - config_name: ru-bn data_files: - split: train path: ru-bn/train-* - config_name: ru-bs data_files: - split: train path: ru-bs/train-* - config_name: ru-ca data_files: - split: train path: ru-ca/train-* - config_name: ru-cs data_files: - split: train path: ru-cs/train-* - config_name: ru-da data_files: - split: train path: ru-da/train-* - config_name: ru-de data_files: - split: train path: ru-de/train-* - config_name: ru-el data_files: - split: train path: ru-el/train-* - config_name: ru-en data_files: - split: train path: ru-en/train-* - config_name: ru-es data_files: - split: train path: ru-es/train-* - config_name: ru-et data_files: - split: train path: ru-et/train-* - config_name: ru-eu data_files: - split: train path: ru-eu/train-* - config_name: ru-fa data_files: - split: train path: ru-fa/train-* - config_name: ru-fi data_files: - split: train path: ru-fi/train-* - config_name: ru-fr data_files: - split: train path: ru-fr/train-* - config_name: ru-gl data_files: - split: train path: ru-gl/train-* - config_name: ru-he data_files: - split: train path: ru-he/train-* - config_name: ru-hi data_files: - split: train path: ru-hi/train-* - config_name: ru-hr data_files: - split: train path: ru-hr/train-* - config_name: ru-hu data_files: - split: train path: ru-hu/train-* - config_name: ru-hy data_files: - split: train path: ru-hy/train-* - config_name: ru-id data_files: - split: train path: ru-id/train-* - config_name: ru-is data_files: - split: train path: ru-is/train-* - config_name: ru-it data_files: - split: train path: ru-it/train-* - config_name: ru-ja data_files: - split: train path: ru-ja/train-* - config_name: ru-ka data_files: - split: train path: ru-ka/train-* - config_name: ru-kk data_files: - split: train path: ru-kk/train-* - config_name: ru-ko data_files: - split: train path: ru-ko/train-* - config_name: ru-lt data_files: - split: train path: ru-lt/train-* - config_name: ru-lv data_files: - split: train path: ru-lv/train-* - config_name: ru-mk data_files: - split: train path: ru-mk/train-* - config_name: ru-ml data_files: - split: train path: ru-ml/train-* - config_name: ru-ms data_files: - split: train path: ru-ms/train-* - config_name: ru-nl data_files: - split: train path: ru-nl/train-* - config_name: ru-no data_files: - split: train path: ru-no/train-* - config_name: ru-pl data_files: - split: train path: ru-pl/train-* - config_name: ru-pt data_files: - split: train path: ru-pt/train-* - config_name: ru-ro data_files: - split: train path: ru-ro/train-* - config_name: ru-si data_files: - split: train path: ru-si/train-* - config_name: ru-sk data_files: - split: train path: ru-sk/train-* - config_name: ru-sl data_files: - split: train path: ru-sl/train-* - config_name: ru-sq data_files: - split: train path: ru-sq/train-* - config_name: ru-sr data_files: - split: train path: ru-sr/train-* - config_name: ru-sv data_files: - split: train path: ru-sv/train-* - config_name: ru-ta data_files: - split: train path: ru-ta/train-* - config_name: ru-te data_files: - split: train path: ru-te/train-* - config_name: ru-th data_files: - split: train path: ru-th/train-* - config_name: ru-tl data_files: - split: train path: ru-tl/train-* - config_name: ru-tr data_files: - split: train path: ru-tr/train-* - config_name: ru-uk data_files: - split: train path: ru-uk/train-* - config_name: ru-ur data_files: - split: train path: ru-ur/train-* - config_name: ru-vi data_files: - split: train path: ru-vi/train-* - config_name: si-af data_files: - split: train path: si-af/train-* - config_name: si-ar data_files: - split: train path: si-ar/train-* - config_name: si-bg data_files: - split: train path: si-bg/train-* - config_name: si-bn data_files: - split: train path: si-bn/train-* - config_name: si-bs data_files: - split: train path: si-bs/train-* - config_name: si-ca data_files: - split: train path: si-ca/train-* - config_name: si-cs data_files: - split: train path: si-cs/train-* - config_name: si-da data_files: - split: train path: si-da/train-* - config_name: si-de data_files: - split: train path: si-de/train-* - config_name: si-el data_files: - split: train path: si-el/train-* - config_name: si-en data_files: - split: train path: si-en/train-* - config_name: si-es data_files: - split: train path: si-es/train-* - config_name: si-et data_files: - split: train path: si-et/train-* - config_name: si-eu data_files: - split: train path: si-eu/train-* - config_name: si-fa data_files: - split: train path: si-fa/train-* - config_name: si-fi data_files: - split: train path: si-fi/train-* - config_name: si-fr data_files: - split: train path: si-fr/train-* - config_name: si-gl data_files: - split: train path: si-gl/train-* - config_name: si-he data_files: - split: train path: si-he/train-* - config_name: si-hi data_files: - split: train path: si-hi/train-* - config_name: si-hr data_files: - split: train path: si-hr/train-* - config_name: si-hu data_files: - split: train path: si-hu/train-* - config_name: si-id data_files: - split: train path: si-id/train-* - config_name: si-is data_files: - split: train path: si-is/train-* - config_name: si-it data_files: - split: train path: si-it/train-* - config_name: si-ja data_files: - split: train path: si-ja/train-* - config_name: si-ka data_files: - split: train path: si-ka/train-* - config_name: si-ko data_files: - split: train path: si-ko/train-* - config_name: si-lt data_files: - split: train path: si-lt/train-* - config_name: si-lv data_files: - split: train path: si-lv/train-* - config_name: si-mk data_files: - split: train path: si-mk/train-* - config_name: si-ml data_files: - split: train path: si-ml/train-* - config_name: si-ms data_files: - split: train path: si-ms/train-* - config_name: si-nl data_files: - split: train path: si-nl/train-* - config_name: si-no data_files: - split: train path: si-no/train-* - config_name: si-pl data_files: - split: train path: si-pl/train-* - config_name: si-pt data_files: - split: train path: si-pt/train-* - config_name: si-ro data_files: - split: train path: si-ro/train-* - config_name: si-ru data_files: - split: train path: si-ru/train-* - config_name: si-sk data_files: - split: train path: si-sk/train-* - config_name: si-sl data_files: - split: train path: si-sl/train-* - config_name: si-sq data_files: - split: train path: si-sq/train-* - config_name: si-sr data_files: - split: train path: si-sr/train-* - config_name: si-sv data_files: - split: train path: si-sv/train-* - config_name: si-ta data_files: - split: train path: si-ta/train-* - config_name: si-te data_files: - split: train path: si-te/train-* - config_name: si-th data_files: - split: train path: si-th/train-* - config_name: si-tl data_files: - split: train path: si-tl/train-* - config_name: si-tr data_files: - split: train path: si-tr/train-* - config_name: si-uk data_files: - split: train path: si-uk/train-* - config_name: si-ur data_files: - split: train path: si-ur/train-* - config_name: si-vi data_files: - split: train path: si-vi/train-* - config_name: sk-af data_files: - split: train path: sk-af/train-* - config_name: sk-ar data_files: - split: train path: sk-ar/train-* - config_name: sk-bg data_files: - split: train path: sk-bg/train-* - config_name: sk-bn data_files: - split: train path: sk-bn/train-* - config_name: sk-bs data_files: - split: train path: sk-bs/train-* - config_name: sk-ca data_files: - split: train path: sk-ca/train-* - config_name: sk-cs data_files: - split: train path: sk-cs/train-* - config_name: sk-da data_files: - split: train path: sk-da/train-* - config_name: sk-de data_files: - split: train path: sk-de/train-* - config_name: sk-el data_files: - split: train path: sk-el/train-* - config_name: sk-en data_files: - split: train path: sk-en/train-* - config_name: sk-es data_files: - split: train path: sk-es/train-* - config_name: sk-et data_files: - split: train path: sk-et/train-* - config_name: sk-eu data_files: - split: train path: sk-eu/train-* - config_name: sk-fa data_files: - split: train path: sk-fa/train-* - config_name: sk-fi data_files: - split: train path: sk-fi/train-* - config_name: sk-fr data_files: - split: train path: sk-fr/train-* - config_name: sk-gl data_files: - split: train path: sk-gl/train-* - config_name: sk-he data_files: - split: train path: sk-he/train-* - config_name: sk-hi data_files: - split: train path: sk-hi/train-* - config_name: sk-hr data_files: - split: train path: sk-hr/train-* - config_name: sk-hu data_files: - split: train path: sk-hu/train-* - config_name: sk-hy data_files: - split: train path: sk-hy/train-* - config_name: sk-id data_files: - split: train path: sk-id/train-* - config_name: sk-is data_files: - split: train path: sk-is/train-* - config_name: sk-it data_files: - split: train path: sk-it/train-* - config_name: sk-ja data_files: - split: train path: sk-ja/train-* - config_name: sk-ka data_files: - split: train path: sk-ka/train-* - config_name: sk-kk data_files: - split: train path: sk-kk/train-* - config_name: sk-ko data_files: - split: train path: sk-ko/train-* - config_name: sk-lt data_files: - split: train path: sk-lt/train-* - config_name: sk-lv data_files: - split: train path: sk-lv/train-* - config_name: sk-mk data_files: - split: train path: sk-mk/train-* - config_name: sk-ml data_files: - split: train path: sk-ml/train-* - config_name: sk-ms data_files: - split: train path: sk-ms/train-* - config_name: sk-nl data_files: - split: train path: sk-nl/train-* - config_name: sk-no data_files: - split: train path: sk-no/train-* - config_name: sk-pl data_files: - split: train path: sk-pl/train-* - config_name: sk-pt data_files: - split: train path: sk-pt/train-* - config_name: sk-ro data_files: - split: train path: sk-ro/train-* - config_name: sk-ru data_files: - split: train path: sk-ru/train-* - config_name: sk-si data_files: - split: train path: sk-si/train-* - config_name: sk-sl data_files: - split: train path: sk-sl/train-* - config_name: sk-sq data_files: - split: train path: sk-sq/train-* - config_name: sk-sr data_files: - split: train path: sk-sr/train-* - config_name: sk-sv data_files: - split: train path: sk-sv/train-* - config_name: sk-ta data_files: - split: train path: sk-ta/train-* - config_name: sk-te data_files: - split: train path: sk-te/train-* - config_name: sk-th data_files: - split: train path: sk-th/train-* - config_name: sk-tl data_files: - split: train path: sk-tl/train-* - config_name: sk-tr data_files: - split: train path: sk-tr/train-* - config_name: sk-uk data_files: - split: train path: sk-uk/train-* - config_name: sk-ur data_files: - split: train path: sk-ur/train-* - config_name: sk-vi data_files: - split: train path: sk-vi/train-* - config_name: sl-af data_files: - split: train path: sl-af/train-* - config_name: sl-ar data_files: - split: train path: sl-ar/train-* - config_name: sl-bg data_files: - split: train path: sl-bg/train-* - config_name: sl-bn data_files: - split: train path: sl-bn/train-* - config_name: sl-bs data_files: - split: train path: sl-bs/train-* - config_name: sl-ca data_files: - split: train path: sl-ca/train-* - config_name: sl-cs data_files: - split: train path: sl-cs/train-* - config_name: sl-da data_files: - split: train path: sl-da/train-* - config_name: sl-de data_files: - split: train path: sl-de/train-* - config_name: sl-el data_files: - split: train path: sl-el/train-* - config_name: sl-en data_files: - split: train path: sl-en/train-* - config_name: sl-es data_files: - split: train path: sl-es/train-* - config_name: sl-et data_files: - split: train path: sl-et/train-* - config_name: sl-eu data_files: - split: train path: sl-eu/train-* - config_name: sl-fa data_files: - split: train path: sl-fa/train-* - config_name: sl-fi data_files: - split: train path: sl-fi/train-* - config_name: sl-fr data_files: - split: train path: sl-fr/train-* - config_name: sl-gl data_files: - split: train path: sl-gl/train-* - config_name: sl-he data_files: - split: train path: sl-he/train-* - config_name: sl-hi data_files: - split: train path: sl-hi/train-* - config_name: sl-hr data_files: - split: train path: sl-hr/train-* - config_name: sl-hu data_files: - split: train path: sl-hu/train-* - config_name: sl-hy data_files: - split: train path: sl-hy/train-* - config_name: sl-id data_files: - split: train path: sl-id/train-* - config_name: sl-is data_files: - split: train path: sl-is/train-* - config_name: sl-it data_files: - split: train path: sl-it/train-* - config_name: sl-ja data_files: - split: train path: sl-ja/train-* - config_name: sl-ka data_files: - split: train path: sl-ka/train-* - config_name: sl-kk data_files: - split: train path: sl-kk/train-* - config_name: sl-ko data_files: - split: train path: sl-ko/train-* - config_name: sl-lt data_files: - split: train path: sl-lt/train-* - config_name: sl-lv data_files: - split: train path: sl-lv/train-* - config_name: sl-mk data_files: - split: train path: sl-mk/train-* - config_name: sl-ml data_files: - split: train path: sl-ml/train-* - config_name: sl-ms data_files: - split: train path: sl-ms/train-* - config_name: sl-nl data_files: - split: train path: sl-nl/train-* - config_name: sl-no data_files: - split: train path: sl-no/train-* - config_name: sl-pl data_files: - split: train path: sl-pl/train-* - config_name: sl-pt data_files: - split: train path: sl-pt/train-* - config_name: sl-ro data_files: - split: train path: sl-ro/train-* - config_name: sl-ru data_files: - split: train path: sl-ru/train-* - config_name: sl-si data_files: - split: train path: sl-si/train-* - config_name: sl-sk data_files: - split: train path: sl-sk/train-* - config_name: sl-sq data_files: - split: train path: sl-sq/train-* - config_name: sl-sr data_files: - split: train path: sl-sr/train-* - config_name: sl-sv data_files: - split: train path: sl-sv/train-* - config_name: sl-ta data_files: - split: train path: sl-ta/train-* - config_name: sl-te data_files: - split: train path: sl-te/train-* - config_name: sl-th data_files: - split: train path: sl-th/train-* - config_name: sl-tl data_files: - split: train path: sl-tl/train-* - config_name: sl-tr data_files: - split: train path: sl-tr/train-* - config_name: sl-uk data_files: - split: train path: sl-uk/train-* - config_name: sl-ur data_files: - split: train path: sl-ur/train-* - config_name: sl-vi data_files: - split: train path: sl-vi/train-* - config_name: sq-af data_files: - split: train path: sq-af/train-* - config_name: sq-ar data_files: - split: train path: sq-ar/train-* - config_name: sq-bg data_files: - split: train path: sq-bg/train-* - config_name: sq-bn data_files: - split: train path: sq-bn/train-* - config_name: sq-bs data_files: - split: train path: sq-bs/train-* - config_name: sq-ca data_files: - split: train path: sq-ca/train-* - config_name: sq-cs data_files: - split: train path: sq-cs/train-* - config_name: sq-da data_files: - split: train path: sq-da/train-* - config_name: sq-de data_files: - split: train path: sq-de/train-* - config_name: sq-el data_files: - split: train path: sq-el/train-* - config_name: sq-en data_files: - split: train path: sq-en/train-* - config_name: sq-es data_files: - split: train path: sq-es/train-* - config_name: sq-et data_files: - split: train path: sq-et/train-* - config_name: sq-eu data_files: - split: train path: sq-eu/train-* - config_name: sq-fa data_files: - split: train path: sq-fa/train-* - config_name: sq-fi data_files: - split: train path: sq-fi/train-* - config_name: sq-fr data_files: - split: train path: sq-fr/train-* - config_name: sq-gl data_files: - split: train path: sq-gl/train-* - config_name: sq-he data_files: - split: train path: sq-he/train-* - config_name: sq-hi data_files: - split: train path: sq-hi/train-* - config_name: sq-hr data_files: - split: train path: sq-hr/train-* - config_name: sq-hu data_files: - split: train path: sq-hu/train-* - config_name: sq-hy data_files: - split: train path: sq-hy/train-* - config_name: sq-id data_files: - split: train path: sq-id/train-* - config_name: sq-is data_files: - split: train path: sq-is/train-* - config_name: sq-it data_files: - split: train path: sq-it/train-* - config_name: sq-ja data_files: - split: train path: sq-ja/train-* - config_name: sq-ka data_files: - split: train path: sq-ka/train-* - config_name: sq-ko data_files: - split: train path: sq-ko/train-* - config_name: sq-lt data_files: - split: train path: sq-lt/train-* - config_name: sq-lv data_files: - split: train path: sq-lv/train-* - config_name: sq-mk data_files: - split: train path: sq-mk/train-* - config_name: sq-ml data_files: - split: train path: sq-ml/train-* - config_name: sq-ms data_files: - split: train path: sq-ms/train-* - config_name: sq-nl data_files: - split: train path: sq-nl/train-* - config_name: sq-no data_files: - split: train path: sq-no/train-* - config_name: sq-pl data_files: - split: train path: sq-pl/train-* - config_name: sq-pt data_files: - split: train path: sq-pt/train-* - config_name: sq-ro data_files: - split: train path: sq-ro/train-* - config_name: sq-ru data_files: - split: train path: sq-ru/train-* - config_name: sq-si data_files: - split: train path: sq-si/train-* - config_name: sq-sk data_files: - split: train path: sq-sk/train-* - config_name: sq-sl data_files: - split: train path: sq-sl/train-* - config_name: sq-sr data_files: - split: train path: sq-sr/train-* - config_name: sq-sv data_files: - split: train path: sq-sv/train-* - config_name: sq-ta data_files: - split: train path: sq-ta/train-* - config_name: sq-te data_files: - split: train path: sq-te/train-* - config_name: sq-th data_files: - split: train path: sq-th/train-* - config_name: sq-tl data_files: - split: train path: sq-tl/train-* - config_name: sq-tr data_files: - split: train path: sq-tr/train-* - config_name: sq-uk data_files: - split: train path: sq-uk/train-* - config_name: sq-ur data_files: - split: train path: sq-ur/train-* - config_name: sq-vi data_files: - split: train path: sq-vi/train-* - config_name: sr-af data_files: - split: train path: sr-af/train-* - config_name: sr-ar data_files: - split: train path: sr-ar/train-* - config_name: sr-bg data_files: - split: train path: sr-bg/train-* - config_name: sr-bn data_files: - split: train path: sr-bn/train-* - config_name: sr-bs data_files: - split: train path: sr-bs/train-* - config_name: sr-ca data_files: - split: train path: sr-ca/train-* - config_name: sr-cs data_files: - split: train path: sr-cs/train-* - config_name: sr-da data_files: - split: train path: sr-da/train-* - config_name: sr-de data_files: - split: train path: sr-de/train-* - config_name: sr-el data_files: - split: train path: sr-el/train-* - config_name: sr-en data_files: - split: train path: sr-en/train-* - config_name: sr-es data_files: - split: train path: sr-es/train-* - config_name: sr-et data_files: - split: train path: sr-et/train-* - config_name: sr-eu data_files: - split: train path: sr-eu/train-* - config_name: sr-fa data_files: - split: train path: sr-fa/train-* - config_name: sr-fi data_files: - split: train path: sr-fi/train-* - config_name: sr-fr data_files: - split: train path: sr-fr/train-* - config_name: sr-gl data_files: - split: train path: sr-gl/train-* - config_name: sr-he data_files: - split: train path: sr-he/train-* - config_name: sr-hi data_files: - split: train path: sr-hi/train-* - config_name: sr-hr data_files: - split: train path: sr-hr/train-* - config_name: sr-hu data_files: - split: train path: sr-hu/train-* - config_name: sr-hy data_files: - split: train path: sr-hy/train-* - config_name: sr-id data_files: - split: train path: sr-id/train-* - config_name: sr-is data_files: - split: train path: sr-is/train-* - config_name: sr-it data_files: - split: train path: sr-it/train-* - config_name: sr-ja data_files: - split: train path: sr-ja/train-* - config_name: sr-ka data_files: - split: train path: sr-ka/train-* - config_name: sr-kk data_files: - split: train path: sr-kk/train-* - config_name: sr-ko data_files: - split: train path: sr-ko/train-* - config_name: sr-lt data_files: - split: train path: sr-lt/train-* - config_name: sr-lv data_files: - split: train path: sr-lv/train-* - config_name: sr-mk data_files: - split: train path: sr-mk/train-* - config_name: sr-ml data_files: - split: train path: sr-ml/train-* - config_name: sr-ms data_files: - split: train path: sr-ms/train-* - config_name: sr-nl data_files: - split: train path: sr-nl/train-* - config_name: sr-no data_files: - split: train path: sr-no/train-* - config_name: sr-pl data_files: - split: train path: sr-pl/train-* - config_name: sr-pt data_files: - split: train path: sr-pt/train-* - config_name: sr-ro data_files: - split: train path: sr-ro/train-* - config_name: sr-ru data_files: - split: train path: sr-ru/train-* - config_name: sr-si data_files: - split: train path: sr-si/train-* - config_name: sr-sk data_files: - split: train path: sr-sk/train-* - config_name: sr-sl data_files: - split: train path: sr-sl/train-* - config_name: sr-sq data_files: - split: train path: sr-sq/train-* - config_name: sr-sv data_files: - split: train path: sr-sv/train-* - config_name: sr-ta data_files: - split: train path: sr-ta/train-* - config_name: sr-te data_files: - split: train path: sr-te/train-* - config_name: sr-th data_files: - split: train path: sr-th/train-* - config_name: sr-tl data_files: - split: train path: sr-tl/train-* - config_name: sr-tr data_files: - split: train path: sr-tr/train-* - config_name: sr-uk data_files: - split: train path: sr-uk/train-* - config_name: sr-ur data_files: - split: train path: sr-ur/train-* - config_name: sr-vi data_files: - split: train path: sr-vi/train-* - config_name: sv-af data_files: - split: train path: sv-af/train-* - config_name: sv-ar data_files: - split: train path: sv-ar/train-* - config_name: sv-bg data_files: - split: train path: sv-bg/train-* - config_name: sv-bn data_files: - split: train path: sv-bn/train-* - config_name: sv-bs data_files: - split: train path: sv-bs/train-* - config_name: sv-ca data_files: - split: train path: sv-ca/train-* - config_name: sv-cs data_files: - split: train path: sv-cs/train-* - config_name: sv-da data_files: - split: train path: sv-da/train-* - config_name: sv-de data_files: - split: train path: sv-de/train-* - config_name: sv-el data_files: - split: train path: sv-el/train-* - config_name: sv-en data_files: - split: train path: sv-en/train-* - config_name: sv-es data_files: - split: train path: sv-es/train-* - config_name: sv-et data_files: - split: train path: sv-et/train-* - config_name: sv-eu data_files: - split: train path: sv-eu/train-* - config_name: sv-fa data_files: - split: train path: sv-fa/train-* - config_name: sv-fi data_files: - split: train path: sv-fi/train-* - config_name: sv-fr data_files: - split: train path: sv-fr/train-* - config_name: sv-gl data_files: - split: train path: sv-gl/train-* - config_name: sv-he data_files: - split: train path: sv-he/train-* - config_name: sv-hi data_files: - split: train path: sv-hi/train-* - config_name: sv-hr data_files: - split: train path: sv-hr/train-* - config_name: sv-hu data_files: - split: train path: sv-hu/train-* - config_name: sv-hy data_files: - split: train path: sv-hy/train-* - config_name: sv-id data_files: - split: train path: sv-id/train-* - config_name: sv-is data_files: - split: train path: sv-is/train-* - config_name: sv-it data_files: - split: train path: sv-it/train-* - config_name: sv-ja data_files: - split: train path: sv-ja/train-* - config_name: sv-ka data_files: - split: train path: sv-ka/train-* - config_name: sv-kk data_files: - split: train path: sv-kk/train-* - config_name: sv-ko data_files: - split: train path: sv-ko/train-* - config_name: sv-lt data_files: - split: train path: sv-lt/train-* - config_name: sv-lv data_files: - split: train path: sv-lv/train-* - config_name: sv-mk data_files: - split: train path: sv-mk/train-* - config_name: sv-ml data_files: - split: train path: sv-ml/train-* - config_name: sv-ms data_files: - split: train path: sv-ms/train-* - config_name: sv-nl data_files: - split: train path: sv-nl/train-* - config_name: sv-no data_files: - split: train path: sv-no/train-* - config_name: sv-pl data_files: - split: train path: sv-pl/train-* - config_name: sv-pt data_files: - split: train path: sv-pt/train-* - config_name: sv-ro data_files: - split: train path: sv-ro/train-* - config_name: sv-ru data_files: - split: train path: sv-ru/train-* - config_name: sv-si data_files: - split: train path: sv-si/train-* - config_name: sv-sk data_files: - split: train path: sv-sk/train-* - config_name: sv-sl data_files: - split: train path: sv-sl/train-* - config_name: sv-sq data_files: - split: train path: sv-sq/train-* - config_name: sv-sr data_files: - split: train path: sv-sr/train-* - config_name: sv-ta data_files: - split: train path: sv-ta/train-* - config_name: sv-te data_files: - split: train path: sv-te/train-* - config_name: sv-th data_files: - split: train path: sv-th/train-* - config_name: sv-tl data_files: - split: train path: sv-tl/train-* - config_name: sv-tr data_files: - split: train path: sv-tr/train-* - config_name: sv-uk data_files: - split: train path: sv-uk/train-* - config_name: sv-ur data_files: - split: train path: sv-ur/train-* - config_name: sv-vi data_files: - split: train path: sv-vi/train-* - config_name: ta-af data_files: - split: train path: ta-af/train-* - config_name: ta-ar data_files: - split: train path: ta-ar/train-* - config_name: ta-bg data_files: - split: train path: ta-bg/train-* - config_name: ta-bn data_files: - split: train path: ta-bn/train-* - config_name: ta-bs data_files: - split: train path: ta-bs/train-* - config_name: ta-cs data_files: - split: train path: ta-cs/train-* - config_name: ta-da data_files: - split: train path: ta-da/train-* - config_name: ta-de data_files: - split: train path: ta-de/train-* - config_name: ta-el data_files: - split: train path: ta-el/train-* - config_name: ta-en data_files: - split: train path: ta-en/train-* - config_name: ta-es data_files: - split: train path: ta-es/train-* - config_name: ta-et data_files: - split: train path: ta-et/train-* - config_name: ta-eu data_files: - split: train path: ta-eu/train-* - config_name: ta-fa data_files: - split: train path: ta-fa/train-* - config_name: ta-fi data_files: - split: train path: ta-fi/train-* - config_name: ta-fr data_files: - split: train path: ta-fr/train-* - config_name: ta-he data_files: - split: train path: ta-he/train-* - config_name: ta-hi data_files: - split: train path: ta-hi/train-* - config_name: ta-hr data_files: - split: train path: ta-hr/train-* - config_name: ta-hu data_files: - split: train path: ta-hu/train-* - config_name: ta-id data_files: - split: train path: ta-id/train-* - config_name: ta-is data_files: - split: train path: ta-is/train-* - config_name: ta-it data_files: - split: train path: ta-it/train-* - config_name: ta-ja data_files: - split: train path: ta-ja/train-* - config_name: ta-ko data_files: - split: train path: ta-ko/train-* - config_name: ta-lt data_files: - split: train path: ta-lt/train-* - config_name: ta-lv data_files: - split: train path: ta-lv/train-* - config_name: ta-mk data_files: - split: train path: ta-mk/train-* - config_name: ta-ml data_files: - split: train path: ta-ml/train-* - config_name: ta-ms data_files: - split: train path: ta-ms/train-* - config_name: ta-nl data_files: - split: train path: ta-nl/train-* - config_name: ta-no data_files: - split: train path: ta-no/train-* - config_name: ta-pl data_files: - split: train path: ta-pl/train-* - config_name: ta-pt data_files: - split: train path: ta-pt/train-* - config_name: ta-ro data_files: - split: train path: ta-ro/train-* - config_name: ta-ru data_files: - split: train path: ta-ru/train-* - config_name: ta-si data_files: - split: train path: ta-si/train-* - config_name: ta-sk data_files: - split: train path: ta-sk/train-* - config_name: ta-sl data_files: - split: train path: ta-sl/train-* - config_name: ta-sq data_files: - split: train path: ta-sq/train-* - config_name: ta-sr data_files: - split: train path: ta-sr/train-* - config_name: ta-sv data_files: - split: train path: ta-sv/train-* - config_name: ta-te data_files: - split: train path: ta-te/train-* - config_name: ta-th data_files: - split: train path: ta-th/train-* - config_name: ta-tr data_files: - split: train path: ta-tr/train-* - config_name: ta-vi data_files: - split: train path: ta-vi/train-* - config_name: te-ar data_files: - split: train path: te-ar/train-* - config_name: te-bg data_files: - split: train path: te-bg/train-* - config_name: te-bs data_files: - split: train path: te-bs/train-* - config_name: te-cs data_files: - split: train path: te-cs/train-* - config_name: te-da data_files: - split: train path: te-da/train-* - config_name: te-de data_files: - split: train path: te-de/train-* - config_name: te-el data_files: - split: train path: te-el/train-* - config_name: te-en data_files: - split: train path: te-en/train-* - config_name: te-es data_files: - split: train path: te-es/train-* - config_name: te-et data_files: - split: train path: te-et/train-* - config_name: te-eu data_files: - split: train path: te-eu/train-* - config_name: te-fa data_files: - split: train path: te-fa/train-* - config_name: te-fi data_files: - split: train path: te-fi/train-* - config_name: te-fr data_files: - split: train path: te-fr/train-* - config_name: te-he data_files: - split: train path: te-he/train-* - config_name: te-hi data_files: - split: train path: te-hi/train-* - config_name: te-hr data_files: - split: train path: te-hr/train-* - config_name: te-hu data_files: - split: train path: te-hu/train-* - config_name: te-id data_files: - split: train path: te-id/train-* - config_name: te-it data_files: - split: train path: te-it/train-* - config_name: te-ja data_files: - split: train path: te-ja/train-* - config_name: te-ko data_files: - split: train path: te-ko/train-* - config_name: te-lt data_files: - split: train path: te-lt/train-* - config_name: te-lv data_files: - split: train path: te-lv/train-* - config_name: te-mk data_files: - split: train path: te-mk/train-* - config_name: te-ms data_files: - split: train path: te-ms/train-* - config_name: te-nl data_files: - split: train path: te-nl/train-* - config_name: te-no data_files: - split: train path: te-no/train-* - config_name: te-pl data_files: - split: train path: te-pl/train-* - config_name: te-pt data_files: - split: train path: te-pt/train-* - config_name: te-ro data_files: - split: train path: te-ro/train-* - config_name: te-ru data_files: - split: train path: te-ru/train-* - config_name: te-si data_files: - split: train path: te-si/train-* - config_name: te-sk data_files: - split: train path: te-sk/train-* - config_name: te-sl data_files: - split: train path: te-sl/train-* - config_name: te-sq data_files: - split: train path: te-sq/train-* - config_name: te-sr data_files: - split: train path: te-sr/train-* - config_name: te-sv data_files: - split: train path: te-sv/train-* - config_name: te-ta data_files: - split: train path: te-ta/train-* - config_name: te-th data_files: - split: train path: te-th/train-* - config_name: te-tr data_files: - split: train path: te-tr/train-* - config_name: te-vi data_files: - split: train path: te-vi/train-* - config_name: th-af data_files: - split: train path: th-af/train-* - config_name: th-ar data_files: - split: train path: th-ar/train-* - config_name: th-bg data_files: - split: train path: th-bg/train-* - config_name: th-bn data_files: - split: train path: th-bn/train-* - config_name: th-bs data_files: - split: train path: th-bs/train-* - config_name: th-ca data_files: - split: train path: th-ca/train-* - config_name: th-cs data_files: - split: train path: th-cs/train-* - config_name: th-da data_files: - split: train path: th-da/train-* - config_name: th-de data_files: - split: train path: th-de/train-* - config_name: th-el data_files: - split: train path: th-el/train-* - config_name: th-en data_files: - split: train path: th-en/train-* - config_name: th-es data_files: - split: train path: th-es/train-* - config_name: th-et data_files: - split: train path: th-et/train-* - config_name: th-eu data_files: - split: train path: th-eu/train-* - config_name: th-fa data_files: - split: train path: th-fa/train-* - config_name: th-fi data_files: - split: train path: th-fi/train-* - config_name: th-fr data_files: - split: train path: th-fr/train-* - config_name: th-gl data_files: - split: train path: th-gl/train-* - config_name: th-he data_files: - split: train path: th-he/train-* - config_name: th-hi data_files: - split: train path: th-hi/train-* - config_name: th-hr data_files: - split: train path: th-hr/train-* - config_name: th-hu data_files: - split: train path: th-hu/train-* - config_name: th-id data_files: - split: train path: th-id/train-* - config_name: th-is data_files: - split: train path: th-is/train-* - config_name: th-it data_files: - split: train path: th-it/train-* - config_name: th-ja data_files: - split: train path: th-ja/train-* - config_name: th-ka data_files: - split: train path: th-ka/train-* - config_name: th-kk data_files: - split: train path: th-kk/train-* - config_name: th-ko data_files: - split: train path: th-ko/train-* - config_name: th-lt data_files: - split: train path: th-lt/train-* - config_name: th-lv data_files: - split: train path: th-lv/train-* - config_name: th-mk data_files: - split: train path: th-mk/train-* - config_name: th-ml data_files: - split: train path: th-ml/train-* - config_name: th-ms data_files: - split: train path: th-ms/train-* - config_name: th-nl data_files: - split: train path: th-nl/train-* - config_name: th-no data_files: - split: train path: th-no/train-* - config_name: th-pl data_files: - split: train path: th-pl/train-* - config_name: th-pt data_files: - split: train path: th-pt/train-* - config_name: th-ro data_files: - split: train path: th-ro/train-* - config_name: th-ru data_files: - split: train path: th-ru/train-* - config_name: th-si data_files: - split: train path: th-si/train-* - config_name: th-sk data_files: - split: train path: th-sk/train-* - config_name: th-sl data_files: - split: train path: th-sl/train-* - config_name: th-sq data_files: - split: train path: th-sq/train-* - config_name: th-sr data_files: - split: train path: th-sr/train-* - config_name: th-sv data_files: - split: train path: th-sv/train-* - config_name: th-ta data_files: - split: train path: th-ta/train-* - config_name: th-te data_files: - split: train path: th-te/train-* - config_name: th-tl data_files: - split: train path: th-tl/train-* - config_name: th-tr data_files: - split: train path: th-tr/train-* - config_name: th-uk data_files: - split: train path: th-uk/train-* - config_name: th-ur data_files: - split: train path: th-ur/train-* - config_name: th-vi data_files: - split: train path: th-vi/train-* - config_name: tl-ar data_files: - split: train path: tl-ar/train-* - config_name: tl-bg data_files: - split: train path: tl-bg/train-* - config_name: tl-bn data_files: - split: train path: tl-bn/train-* - config_name: tl-bs data_files: - split: train path: tl-bs/train-* - config_name: tl-cs data_files: - split: train path: tl-cs/train-* - config_name: tl-da data_files: - split: train path: tl-da/train-* - config_name: tl-de data_files: - split: train path: tl-de/train-* - config_name: tl-el data_files: - split: train path: tl-el/train-* - config_name: tl-en data_files: - split: train path: tl-en/train-* - config_name: tl-es data_files: - split: train path: tl-es/train-* - config_name: tl-et data_files: - split: train path: tl-et/train-* - config_name: tl-eu data_files: - split: train path: tl-eu/train-* - config_name: tl-fa data_files: - split: train path: tl-fa/train-* - config_name: tl-fi data_files: - split: train path: tl-fi/train-* - config_name: tl-fr data_files: - split: train path: tl-fr/train-* - config_name: tl-he data_files: - split: train path: tl-he/train-* - config_name: tl-hi data_files: - split: train path: tl-hi/train-* - config_name: tl-hr data_files: - split: train path: tl-hr/train-* - config_name: tl-hu data_files: - split: train path: tl-hu/train-* - config_name: tl-id data_files: - split: train path: tl-id/train-* - config_name: tl-is data_files: - split: train path: tl-is/train-* - config_name: tl-it data_files: - split: train path: tl-it/train-* - config_name: tl-ja data_files: - split: train path: tl-ja/train-* - config_name: tl-ka data_files: - split: train path: tl-ka/train-* - config_name: tl-ko data_files: - split: train path: tl-ko/train-* - config_name: tl-lt data_files: - split: train path: tl-lt/train-* - config_name: tl-mk data_files: - split: train path: tl-mk/train-* - config_name: tl-ml data_files: - split: train path: tl-ml/train-* - config_name: tl-ms data_files: - split: train path: tl-ms/train-* - config_name: tl-nl data_files: - split: train path: tl-nl/train-* - config_name: tl-no data_files: - split: train path: tl-no/train-* - config_name: tl-pl data_files: - split: train path: tl-pl/train-* - config_name: tl-pt data_files: - split: train path: tl-pt/train-* - config_name: tl-ro data_files: - split: train path: tl-ro/train-* - config_name: tl-ru data_files: - split: train path: tl-ru/train-* - config_name: tl-si data_files: - split: train path: tl-si/train-* - config_name: tl-sk data_files: - split: train path: tl-sk/train-* - config_name: tl-sl data_files: - split: train path: tl-sl/train-* - config_name: tl-sq data_files: - split: train path: tl-sq/train-* - config_name: tl-sr data_files: - split: train path: tl-sr/train-* - config_name: tl-sv data_files: - split: train path: tl-sv/train-* - config_name: tl-th data_files: - split: train path: tl-th/train-* - config_name: tl-tr data_files: - split: train path: tl-tr/train-* - config_name: tl-uk data_files: - split: train path: tl-uk/train-* - config_name: tl-vi data_files: - split: train path: tl-vi/train-* - config_name: tr-af data_files: - split: train path: tr-af/train-* - config_name: tr-ar data_files: - split: train path: tr-ar/train-* - config_name: tr-bg data_files: - split: train path: tr-bg/train-* - config_name: tr-bn data_files: - split: train path: tr-bn/train-* - config_name: tr-bs data_files: - split: train path: tr-bs/train-* - config_name: tr-ca data_files: - split: train path: tr-ca/train-* - config_name: tr-cs data_files: - split: train path: tr-cs/train-* - config_name: tr-da data_files: - split: train path: tr-da/train-* - config_name: tr-de data_files: - split: train path: tr-de/train-* - config_name: tr-el data_files: - split: train path: tr-el/train-* - config_name: tr-en data_files: - split: train path: tr-en/train-* - config_name: tr-es data_files: - split: train path: tr-es/train-* - config_name: tr-et data_files: - split: train path: tr-et/train-* - config_name: tr-eu data_files: - split: train path: tr-eu/train-* - config_name: tr-fa data_files: - split: train path: tr-fa/train-* - config_name: tr-fi data_files: - split: train path: tr-fi/train-* - config_name: tr-fr data_files: - split: train path: tr-fr/train-* - config_name: tr-gl data_files: - split: train path: tr-gl/train-* - config_name: tr-he data_files: - split: train path: tr-he/train-* - config_name: tr-hi data_files: - split: train path: tr-hi/train-* - config_name: tr-hr data_files: - split: train path: tr-hr/train-* - config_name: tr-hu data_files: - split: train path: tr-hu/train-* - config_name: tr-hy data_files: - split: train path: tr-hy/train-* - config_name: tr-id data_files: - split: train path: tr-id/train-* - config_name: tr-is data_files: - split: train path: tr-is/train-* - config_name: tr-it data_files: - split: train path: tr-it/train-* - config_name: tr-ja data_files: - split: train path: tr-ja/train-* - config_name: tr-ka data_files: - split: train path: tr-ka/train-* - config_name: tr-kk data_files: - split: train path: tr-kk/train-* - config_name: tr-ko data_files: - split: train path: tr-ko/train-* - config_name: tr-lt data_files: - split: train path: tr-lt/train-* - config_name: tr-lv data_files: - split: train path: tr-lv/train-* - config_name: tr-mk data_files: - split: train path: tr-mk/train-* - config_name: tr-ml data_files: - split: train path: tr-ml/train-* - config_name: tr-ms data_files: - split: train path: tr-ms/train-* - config_name: tr-nl data_files: - split: train path: tr-nl/train-* - config_name: tr-no data_files: - split: train path: tr-no/train-* - config_name: tr-pl data_files: - split: train path: tr-pl/train-* - config_name: tr-pt data_files: - split: train path: tr-pt/train-* - config_name: tr-ro data_files: - split: train path: tr-ro/train-* - config_name: tr-ru data_files: - split: train path: tr-ru/train-* - config_name: tr-si data_files: - split: train path: tr-si/train-* - config_name: tr-sk data_files: - split: train path: tr-sk/train-* - config_name: tr-sl data_files: - split: train path: tr-sl/train-* - config_name: tr-sq data_files: - split: train path: tr-sq/train-* - config_name: tr-sr data_files: - split: train path: tr-sr/train-* - config_name: tr-sv data_files: - split: train path: tr-sv/train-* - config_name: tr-ta data_files: - split: train path: tr-ta/train-* - config_name: tr-te data_files: - split: train path: tr-te/train-* - config_name: tr-th data_files: - split: train path: tr-th/train-* - config_name: tr-tl data_files: - split: train path: tr-tl/train-* - config_name: tr-uk data_files: - split: train path: tr-uk/train-* - config_name: tr-ur data_files: - split: train path: tr-ur/train-* - config_name: tr-vi data_files: - split: train path: tr-vi/train-* - config_name: uk-af data_files: - split: train path: uk-af/train-* - config_name: uk-ar data_files: - split: train path: uk-ar/train-* - config_name: uk-bg data_files: - split: train path: uk-bg/train-* - config_name: uk-bn data_files: - split: train path: uk-bn/train-* - config_name: uk-bs data_files: - split: train path: uk-bs/train-* - config_name: uk-ca data_files: - split: train path: uk-ca/train-* - config_name: uk-cs data_files: - split: train path: uk-cs/train-* - config_name: uk-da data_files: - split: train path: uk-da/train-* - config_name: uk-de data_files: - split: train path: uk-de/train-* - config_name: uk-el data_files: - split: train path: uk-el/train-* - config_name: uk-en data_files: - split: train path: uk-en/train-* - config_name: uk-es data_files: - split: train path: uk-es/train-* - config_name: uk-et data_files: - split: train path: uk-et/train-* - config_name: uk-eu data_files: - split: train path: uk-eu/train-* - config_name: uk-fa data_files: - split: train path: uk-fa/train-* - config_name: uk-fi data_files: - split: train path: uk-fi/train-* - config_name: uk-fr data_files: - split: train path: uk-fr/train-* - config_name: uk-gl data_files: - split: train path: uk-gl/train-* - config_name: uk-he data_files: - split: train path: uk-he/train-* - config_name: uk-hi data_files: - split: train path: uk-hi/train-* - config_name: uk-hr data_files: - split: train path: uk-hr/train-* - config_name: uk-hu data_files: - split: train path: uk-hu/train-* - config_name: uk-id data_files: - split: train path: uk-id/train-* - config_name: uk-is data_files: - split: train path: uk-is/train-* - config_name: uk-it data_files: - split: train path: uk-it/train-* - config_name: uk-ja data_files: - split: train path: uk-ja/train-* - config_name: uk-ka data_files: - split: train path: uk-ka/train-* - config_name: uk-kk data_files: - split: train path: uk-kk/train-* - config_name: uk-ko data_files: - split: train path: uk-ko/train-* - config_name: uk-lt data_files: - split: train path: uk-lt/train-* - config_name: uk-lv data_files: - split: train path: uk-lv/train-* - config_name: uk-mk data_files: - split: train path: uk-mk/train-* - config_name: uk-ml data_files: - split: train path: uk-ml/train-* - config_name: uk-ms data_files: - split: train path: uk-ms/train-* - config_name: uk-nl data_files: - split: train path: uk-nl/train-* - config_name: uk-no data_files: - split: train path: uk-no/train-* - config_name: uk-pl data_files: - split: train path: uk-pl/train-* - config_name: uk-pt data_files: - split: train path: uk-pt/train-* - config_name: uk-ro data_files: - split: train path: uk-ro/train-* - config_name: uk-ru data_files: - split: train path: uk-ru/train-* - config_name: uk-si data_files: - split: train path: uk-si/train-* - config_name: uk-sk data_files: - split: train path: uk-sk/train-* - config_name: uk-sl data_files: - split: train path: uk-sl/train-* - config_name: uk-sq data_files: - split: train path: uk-sq/train-* - config_name: uk-sr data_files: - split: train path: uk-sr/train-* - config_name: uk-sv data_files: - split: train path: uk-sv/train-* - config_name: uk-th data_files: - split: train path: uk-th/train-* - config_name: uk-tl data_files: - split: train path: uk-tl/train-* - config_name: uk-tr data_files: - split: train path: uk-tr/train-* - config_name: uk-ur data_files: - split: train path: uk-ur/train-* - config_name: uk-vi data_files: - split: train path: uk-vi/train-* - config_name: ur-ar data_files: - split: train path: ur-ar/train-* - config_name: ur-bg data_files: - split: train path: ur-bg/train-* - config_name: ur-bn data_files: - split: train path: ur-bn/train-* - config_name: ur-bs data_files: - split: train path: ur-bs/train-* - config_name: ur-cs data_files: - split: train path: ur-cs/train-* - config_name: ur-da data_files: - split: train path: ur-da/train-* - config_name: ur-de data_files: - split: train path: ur-de/train-* - config_name: ur-el data_files: - split: train path: ur-el/train-* - config_name: ur-en data_files: - split: train path: ur-en/train-* - config_name: ur-es data_files: - split: train path: ur-es/train-* - config_name: ur-et data_files: - split: train path: ur-et/train-* - config_name: ur-eu data_files: - split: train path: ur-eu/train-* - config_name: ur-fa data_files: - split: train path: ur-fa/train-* - config_name: ur-fi data_files: - split: train path: ur-fi/train-* - config_name: ur-fr data_files: - split: train path: ur-fr/train-* - config_name: ur-gl data_files: - split: train path: ur-gl/train-* - config_name: ur-he data_files: - split: train path: ur-he/train-* - config_name: ur-hi data_files: - split: train path: ur-hi/train-* - config_name: ur-hr data_files: - split: train path: ur-hr/train-* - config_name: ur-hu data_files: - split: train path: ur-hu/train-* - config_name: ur-id data_files: - split: train path: ur-id/train-* - config_name: ur-is data_files: - split: train path: ur-is/train-* - config_name: ur-it data_files: - split: train path: ur-it/train-* - config_name: ur-ja data_files: - split: train path: ur-ja/train-* - config_name: ur-ka data_files: - split: train path: ur-ka/train-* - config_name: ur-ko data_files: - split: train path: ur-ko/train-* - config_name: ur-lt data_files: - split: train path: ur-lt/train-* - config_name: ur-lv data_files: - split: train path: ur-lv/train-* - config_name: ur-mk data_files: - split: train path: ur-mk/train-* - config_name: ur-ml data_files: - split: train path: ur-ml/train-* - config_name: ur-ms data_files: - split: train path: ur-ms/train-* - config_name: ur-nl data_files: - split: train path: ur-nl/train-* - config_name: ur-no data_files: - split: train path: ur-no/train-* - config_name: ur-pl data_files: - split: train path: ur-pl/train-* - config_name: ur-pt data_files: - split: train path: ur-pt/train-* - config_name: ur-ro data_files: - split: train path: ur-ro/train-* - config_name: ur-ru data_files: - split: train path: ur-ru/train-* - config_name: ur-si data_files: - split: train path: ur-si/train-* - config_name: ur-sk data_files: - split: train path: ur-sk/train-* - config_name: ur-sl data_files: - split: train path: ur-sl/train-* - config_name: ur-sq data_files: - split: train path: ur-sq/train-* - config_name: ur-sr data_files: - split: train path: ur-sr/train-* - config_name: ur-sv data_files: - split: train path: ur-sv/train-* - config_name: ur-th data_files: - split: train path: ur-th/train-* - config_name: ur-tr data_files: - split: train path: ur-tr/train-* - config_name: ur-uk data_files: - split: train path: ur-uk/train-* - config_name: ur-vi data_files: - split: train path: ur-vi/train-* - config_name: vi-af data_files: - split: train path: vi-af/train-* - config_name: vi-ar data_files: - split: train path: vi-ar/train-* - config_name: vi-bg data_files: - split: train path: vi-bg/train-* - config_name: vi-bn data_files: - split: train path: vi-bn/train-* - config_name: vi-bs data_files: - split: train path: vi-bs/train-* - config_name: vi-ca data_files: - split: train path: vi-ca/train-* - config_name: vi-cs data_files: - split: train path: vi-cs/train-* - config_name: vi-da data_files: - split: train path: vi-da/train-* - config_name: vi-de data_files: - split: train path: vi-de/train-* - config_name: vi-el data_files: - split: train path: vi-el/train-* - config_name: vi-en data_files: - split: train path: vi-en/train-* - config_name: vi-es data_files: - split: train path: vi-es/train-* - config_name: vi-et data_files: - split: train path: vi-et/train-* - config_name: vi-eu data_files: - split: train path: vi-eu/train-* - config_name: vi-fa data_files: - split: train path: vi-fa/train-* - config_name: vi-fi data_files: - split: train path: vi-fi/train-* - config_name: vi-fr data_files: - split: train path: vi-fr/train-* - config_name: vi-gl data_files: - split: train path: vi-gl/train-* - config_name: vi-he data_files: - split: train path: vi-he/train-* - config_name: vi-hi data_files: - split: train path: vi-hi/train-* - config_name: vi-hr data_files: - split: train path: vi-hr/train-* - config_name: vi-hu data_files: - split: train path: vi-hu/train-* - config_name: vi-id data_files: - split: train path: vi-id/train-* - config_name: vi-is data_files: - split: train path: vi-is/train-* - config_name: vi-it data_files: - split: train path: vi-it/train-* - config_name: vi-ja data_files: - split: train path: vi-ja/train-* - config_name: vi-ka data_files: - split: train path: vi-ka/train-* - config_name: vi-kk data_files: - split: train path: vi-kk/train-* - config_name: vi-ko data_files: - split: train path: vi-ko/train-* - config_name: vi-lt data_files: - split: train path: vi-lt/train-* - config_name: vi-lv data_files: - split: train path: vi-lv/train-* - config_name: vi-mk data_files: - split: train path: vi-mk/train-* - config_name: vi-ml data_files: - split: train path: vi-ml/train-* - config_name: vi-ms data_files: - split: train path: vi-ms/train-* - config_name: vi-nl data_files: - split: train path: vi-nl/train-* - config_name: vi-no data_files: - split: train path: vi-no/train-* - config_name: vi-pl data_files: - split: train path: vi-pl/train-* - config_name: vi-pt data_files: - split: train path: vi-pt/train-* - config_name: vi-ro data_files: - split: train path: vi-ro/train-* - config_name: vi-ru data_files: - split: train path: vi-ru/train-* - config_name: vi-si data_files: - split: train path: vi-si/train-* - config_name: vi-sk data_files: - split: train path: vi-sk/train-* - config_name: vi-sl data_files: - split: train path: vi-sl/train-* - config_name: vi-sq data_files: - split: train path: vi-sq/train-* - config_name: vi-sr data_files: - split: train path: vi-sr/train-* - config_name: vi-sv data_files: - split: train path: vi-sv/train-* - config_name: vi-ta data_files: - split: train path: vi-ta/train-* - config_name: vi-te data_files: - split: train path: vi-te/train-* - config_name: vi-th data_files: - split: train path: vi-th/train-* - config_name: vi-tl data_files: - split: train path: vi-tl/train-* - config_name: vi-tr data_files: - split: train path: vi-tr/train-* - config_name: vi-uk data_files: - split: train path: vi-uk/train-* - config_name: vi-ur data_files: - split: train path: vi-ur/train-* --- # Dataset Card for OpenSubtitles-v2018 ## Dataset Description - **Homepage:** https://opus.nlpl.eu/OpenSubtitles/corpus/version/OpenSubtitles ### Dataset Summary This is a new collection of translated movie subtitles from http://www.opensubtitles.org/. IMPORTANT: If you use the OpenSubtitle corpus: Please, add a link to http://www.opensubtitles.org/ to your website and to your reports and publications produced with the data! This is a slightly cleaner version of the subtitle collection using improved sentence alignment and better language checking. ### Citation Information P. Lison and J. Tiedemann, 2016, OpenSubtitles2016: Extracting Large Parallel Corpora from Movie and TV Subtitles. In Proceedings of the 10th International Conference on Language Resources and Evaluation (LREC 2016)
提供机构:
alamios
5,000+
优质数据集
54 个
任务类型
进入经典数据集
二维码
社区交流群

面向社区/商业的数据集话题

二维码
科研交流群

面向高校/科研机构的开源数据集话题

数据驱动未来

携手共赢发展

商业合作