OpenLLM-BPI/flores
收藏Hugging Face2025-11-21 更新2025-12-20 收录
下载链接:
https://hf-mirror.com/datasets/OpenLLM-BPI/flores
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: arb_Arab-deu_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_arb_Arab
dtype: string
- name: sentence_deu_Latn
dtype: string
splits:
- name: dev
num_bytes: 473698
num_examples: 997
- name: devtest
num_bytes: 495566
num_examples: 1012
download_size: 481669
dataset_size: 969264
- config_name: arb_Arab-eng_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_arb_Arab
dtype: string
- name: sentence_eng_Latn
dtype: string
splits:
- name: dev
num_bytes: 450052
num_examples: 997
- name: devtest
num_bytes: 471304
num_examples: 1012
download_size: 450633
dataset_size: 921356
- config_name: arb_Arab-fra_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_arb_Arab
dtype: string
- name: sentence_fra_Latn
dtype: string
splits:
- name: dev
num_bytes: 479335
num_examples: 997
- name: devtest
num_bytes: 503135
num_examples: 1012
download_size: 483348
dataset_size: 982470
- config_name: arb_Arab-ita_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_arb_Arab
dtype: string
- name: sentence_ita_Latn
dtype: string
splits:
- name: dev
num_bytes: 472950
num_examples: 997
- name: devtest
num_bytes: 496467
num_examples: 1012
download_size: 481178
dataset_size: 969417
- config_name: arb_Arab-nld_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_arb_Arab
dtype: string
- name: sentence_nld_Latn
dtype: string
splits:
- name: dev
num_bytes: 464040
num_examples: 997
- name: devtest
num_bytes: 486939
num_examples: 1012
download_size: 467654
dataset_size: 950979
- config_name: arb_Arab-por_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_arb_Arab
dtype: string
- name: sentence_por_Latn
dtype: string
splits:
- name: dev
num_bytes: 464662
num_examples: 997
- name: devtest
num_bytes: 486656
num_examples: 1012
download_size: 470283
dataset_size: 951318
- config_name: arb_Arab-spa_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_arb_Arab
dtype: string
- name: sentence_spa_Latn
dtype: string
splits:
- name: dev
num_bytes: 476840
num_examples: 997
- name: devtest
num_bytes: 499107
num_examples: 1012
download_size: 484347
dataset_size: 975947
- config_name: deu_Latn-arb_Arab
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_deu_Latn
dtype: string
- name: sentence_arb_Arab
dtype: string
splits:
- name: dev
num_bytes: 473698
num_examples: 997
- name: devtest
num_bytes: 495566
num_examples: 1012
download_size: 481669
dataset_size: 969264
- config_name: deu_Latn-eng_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_deu_Latn
dtype: string
- name: sentence_eng_Latn
dtype: string
splits:
- name: dev
num_bytes: 398412
num_examples: 997
- name: devtest
num_bytes: 416399
num_examples: 1012
download_size: 429099
dataset_size: 814811
- config_name: deu_Latn-fra_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_deu_Latn
dtype: string
- name: sentence_fra_Latn
dtype: string
splits:
- name: dev
num_bytes: 427695
num_examples: 997
- name: devtest
num_bytes: 448230
num_examples: 1012
download_size: 461814
dataset_size: 875925
- config_name: deu_Latn-ita_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_deu_Latn
dtype: string
- name: sentence_ita_Latn
dtype: string
splits:
- name: dev
num_bytes: 421310
num_examples: 997
- name: devtest
num_bytes: 441562
num_examples: 1012
download_size: 459644
dataset_size: 862872
- config_name: deu_Latn-nld_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_deu_Latn
dtype: string
- name: sentence_nld_Latn
dtype: string
splits:
- name: dev
num_bytes: 412400
num_examples: 997
- name: devtest
num_bytes: 432034
num_examples: 1012
download_size: 446120
dataset_size: 844434
- config_name: deu_Latn-por_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_deu_Latn
dtype: string
- name: sentence_por_Latn
dtype: string
splits:
- name: dev
num_bytes: 413022
num_examples: 997
- name: devtest
num_bytes: 431751
num_examples: 1012
download_size: 448749
dataset_size: 844773
- config_name: deu_Latn-spa_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_deu_Latn
dtype: string
- name: sentence_spa_Latn
dtype: string
splits:
- name: dev
num_bytes: 425200
num_examples: 997
- name: devtest
num_bytes: 444202
num_examples: 1012
download_size: 462813
dataset_size: 869402
- config_name: eng_Latn-arb_Arab
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_eng_Latn
dtype: string
- name: sentence_arb_Arab
dtype: string
splits:
- name: dev
num_bytes: 450052
num_examples: 997
- name: devtest
num_bytes: 471304
num_examples: 1012
download_size: 450633
dataset_size: 921356
- config_name: eng_Latn-deu_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_eng_Latn
dtype: string
- name: sentence_deu_Latn
dtype: string
splits:
- name: dev
num_bytes: 398412
num_examples: 997
- name: devtest
num_bytes: 416399
num_examples: 1012
download_size: 429099
dataset_size: 814811
- config_name: eng_Latn-fra_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_eng_Latn
dtype: string
- name: sentence_fra_Latn
dtype: string
splits:
- name: dev
num_bytes: 404049
num_examples: 997
- name: devtest
num_bytes: 423968
num_examples: 1012
download_size: 430778
dataset_size: 828017
- config_name: eng_Latn-ita_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_eng_Latn
dtype: string
- name: sentence_ita_Latn
dtype: string
splits:
- name: dev
num_bytes: 397664
num_examples: 997
- name: devtest
num_bytes: 417300
num_examples: 1012
download_size: 428608
dataset_size: 814964
- config_name: eng_Latn-nld_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_eng_Latn
dtype: string
- name: sentence_nld_Latn
dtype: string
splits:
- name: dev
num_bytes: 388754
num_examples: 997
- name: devtest
num_bytes: 407772
num_examples: 1012
download_size: 415084
dataset_size: 796526
- config_name: eng_Latn-por_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_eng_Latn
dtype: string
- name: sentence_por_Latn
dtype: string
splits:
- name: dev
num_bytes: 389376
num_examples: 997
- name: devtest
num_bytes: 407489
num_examples: 1012
download_size: 417713
dataset_size: 796865
- config_name: eng_Latn-spa_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_eng_Latn
dtype: string
- name: sentence_spa_Latn
dtype: string
splits:
- name: dev
num_bytes: 401554
num_examples: 997
- name: devtest
num_bytes: 419940
num_examples: 1012
download_size: 431777
dataset_size: 821494
- config_name: fra_Latn-arb_Arab
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_fra_Latn
dtype: string
- name: sentence_arb_Arab
dtype: string
splits:
- name: dev
num_bytes: 479335
num_examples: 997
- name: devtest
num_bytes: 503135
num_examples: 1012
download_size: 483348
dataset_size: 982470
- config_name: fra_Latn-deu_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_fra_Latn
dtype: string
- name: sentence_deu_Latn
dtype: string
splits:
- name: dev
num_bytes: 427695
num_examples: 997
- name: devtest
num_bytes: 448230
num_examples: 1012
download_size: 461814
dataset_size: 875925
- config_name: fra_Latn-eng_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_fra_Latn
dtype: string
- name: sentence_eng_Latn
dtype: string
splits:
- name: dev
num_bytes: 404049
num_examples: 997
- name: devtest
num_bytes: 423968
num_examples: 1012
download_size: 430778
dataset_size: 828017
- config_name: fra_Latn-ita_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_fra_Latn
dtype: string
- name: sentence_ita_Latn
dtype: string
splits:
- name: dev
num_bytes: 426947
num_examples: 997
- name: devtest
num_bytes: 449131
num_examples: 1012
download_size: 461323
dataset_size: 876078
- config_name: fra_Latn-nld_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_fra_Latn
dtype: string
- name: sentence_nld_Latn
dtype: string
splits:
- name: dev
num_bytes: 418037
num_examples: 997
- name: devtest
num_bytes: 439603
num_examples: 1012
download_size: 447799
dataset_size: 857640
- config_name: fra_Latn-por_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_fra_Latn
dtype: string
- name: sentence_por_Latn
dtype: string
splits:
- name: dev
num_bytes: 418659
num_examples: 997
- name: devtest
num_bytes: 439320
num_examples: 1012
download_size: 450428
dataset_size: 857979
- config_name: fra_Latn-spa_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_fra_Latn
dtype: string
- name: sentence_spa_Latn
dtype: string
splits:
- name: dev
num_bytes: 430837
num_examples: 997
- name: devtest
num_bytes: 451771
num_examples: 1012
download_size: 464492
dataset_size: 882608
- config_name: ita_Latn-arb_Arab
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_ita_Latn
dtype: string
- name: sentence_arb_Arab
dtype: string
splits:
- name: dev
num_bytes: 472950
num_examples: 997
- name: devtest
num_bytes: 496467
num_examples: 1012
download_size: 481178
dataset_size: 969417
- config_name: ita_Latn-deu_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_ita_Latn
dtype: string
- name: sentence_deu_Latn
dtype: string
splits:
- name: dev
num_bytes: 421310
num_examples: 997
- name: devtest
num_bytes: 441562
num_examples: 1012
download_size: 459644
dataset_size: 862872
- config_name: ita_Latn-eng_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_ita_Latn
dtype: string
- name: sentence_eng_Latn
dtype: string
splits:
- name: dev
num_bytes: 397664
num_examples: 997
- name: devtest
num_bytes: 417300
num_examples: 1012
download_size: 428608
dataset_size: 814964
- config_name: ita_Latn-fra_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_ita_Latn
dtype: string
- name: sentence_fra_Latn
dtype: string
splits:
- name: dev
num_bytes: 426947
num_examples: 997
- name: devtest
num_bytes: 449131
num_examples: 1012
download_size: 461323
dataset_size: 876078
- config_name: ita_Latn-nld_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_ita_Latn
dtype: string
- name: sentence_nld_Latn
dtype: string
splits:
- name: dev
num_bytes: 411652
num_examples: 997
- name: devtest
num_bytes: 432935
num_examples: 1012
download_size: 445629
dataset_size: 844587
- config_name: ita_Latn-por_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_ita_Latn
dtype: string
- name: sentence_por_Latn
dtype: string
splits:
- name: dev
num_bytes: 412274
num_examples: 997
- name: devtest
num_bytes: 432652
num_examples: 1012
download_size: 448258
dataset_size: 844926
- config_name: ita_Latn-spa_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_ita_Latn
dtype: string
- name: sentence_spa_Latn
dtype: string
splits:
- name: dev
num_bytes: 424452
num_examples: 997
- name: devtest
num_bytes: 445103
num_examples: 1012
download_size: 462322
dataset_size: 869555
- config_name: nld_Latn-arb_Arab
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_nld_Latn
dtype: string
- name: sentence_arb_Arab
dtype: string
splits:
- name: dev
num_bytes: 464040
num_examples: 997
- name: devtest
num_bytes: 486939
num_examples: 1012
download_size: 467654
dataset_size: 950979
- config_name: nld_Latn-deu_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_nld_Latn
dtype: string
- name: sentence_deu_Latn
dtype: string
splits:
- name: dev
num_bytes: 412400
num_examples: 997
- name: devtest
num_bytes: 432034
num_examples: 1012
download_size: 446120
dataset_size: 844434
- config_name: nld_Latn-eng_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_nld_Latn
dtype: string
- name: sentence_eng_Latn
dtype: string
splits:
- name: dev
num_bytes: 388754
num_examples: 997
- name: devtest
num_bytes: 407772
num_examples: 1012
download_size: 415084
dataset_size: 796526
- config_name: nld_Latn-fra_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_nld_Latn
dtype: string
- name: sentence_fra_Latn
dtype: string
splits:
- name: dev
num_bytes: 418037
num_examples: 997
- name: devtest
num_bytes: 439603
num_examples: 1012
download_size: 447799
dataset_size: 857640
- config_name: nld_Latn-ita_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_nld_Latn
dtype: string
- name: sentence_ita_Latn
dtype: string
splits:
- name: dev
num_bytes: 411652
num_examples: 997
- name: devtest
num_bytes: 432935
num_examples: 1012
download_size: 445629
dataset_size: 844587
- config_name: nld_Latn-por_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_nld_Latn
dtype: string
- name: sentence_por_Latn
dtype: string
splits:
- name: dev
num_bytes: 403364
num_examples: 997
- name: devtest
num_bytes: 423124
num_examples: 1012
download_size: 434734
dataset_size: 826488
- config_name: nld_Latn-spa_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_nld_Latn
dtype: string
- name: sentence_spa_Latn
dtype: string
splits:
- name: dev
num_bytes: 415542
num_examples: 997
- name: devtest
num_bytes: 435575
num_examples: 1012
download_size: 448798
dataset_size: 851117
- config_name: por_Latn-arb_Arab
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_por_Latn
dtype: string
- name: sentence_arb_Arab
dtype: string
splits:
- name: dev
num_bytes: 464662
num_examples: 997
- name: devtest
num_bytes: 486656
num_examples: 1012
download_size: 470283
dataset_size: 951318
- config_name: por_Latn-deu_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_por_Latn
dtype: string
- name: sentence_deu_Latn
dtype: string
splits:
- name: dev
num_bytes: 413022
num_examples: 997
- name: devtest
num_bytes: 431751
num_examples: 1012
download_size: 448749
dataset_size: 844773
- config_name: por_Latn-eng_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_por_Latn
dtype: string
- name: sentence_eng_Latn
dtype: string
splits:
- name: dev
num_bytes: 389376
num_examples: 997
- name: devtest
num_bytes: 407489
num_examples: 1012
download_size: 417713
dataset_size: 796865
- config_name: por_Latn-fra_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_por_Latn
dtype: string
- name: sentence_fra_Latn
dtype: string
splits:
- name: dev
num_bytes: 418659
num_examples: 997
- name: devtest
num_bytes: 439320
num_examples: 1012
download_size: 450428
dataset_size: 857979
- config_name: por_Latn-ita_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_por_Latn
dtype: string
- name: sentence_ita_Latn
dtype: string
splits:
- name: dev
num_bytes: 412274
num_examples: 997
- name: devtest
num_bytes: 432652
num_examples: 1012
download_size: 448258
dataset_size: 844926
- config_name: por_Latn-nld_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_por_Latn
dtype: string
- name: sentence_nld_Latn
dtype: string
splits:
- name: dev
num_bytes: 403364
num_examples: 997
- name: devtest
num_bytes: 423124
num_examples: 1012
download_size: 434734
dataset_size: 826488
- config_name: por_Latn-spa_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_por_Latn
dtype: string
- name: sentence_spa_Latn
dtype: string
splits:
- name: dev
num_bytes: 416164
num_examples: 997
- name: devtest
num_bytes: 435292
num_examples: 1012
download_size: 451427
dataset_size: 851456
- config_name: spa_Latn-arb_Arab
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_spa_Latn
dtype: string
- name: sentence_arb_Arab
dtype: string
splits:
- name: dev
num_bytes: 476840
num_examples: 997
- name: devtest
num_bytes: 499107
num_examples: 1012
download_size: 484347
dataset_size: 975947
- config_name: spa_Latn-deu_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_spa_Latn
dtype: string
- name: sentence_deu_Latn
dtype: string
splits:
- name: dev
num_bytes: 425200
num_examples: 997
- name: devtest
num_bytes: 444202
num_examples: 1012
download_size: 462813
dataset_size: 869402
- config_name: spa_Latn-eng_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_spa_Latn
dtype: string
- name: sentence_eng_Latn
dtype: string
splits:
- name: dev
num_bytes: 401554
num_examples: 997
- name: devtest
num_bytes: 419940
num_examples: 1012
download_size: 431777
dataset_size: 821494
- config_name: spa_Latn-fra_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_spa_Latn
dtype: string
- name: sentence_fra_Latn
dtype: string
splits:
- name: dev
num_bytes: 430837
num_examples: 997
- name: devtest
num_bytes: 451771
num_examples: 1012
download_size: 464492
dataset_size: 882608
- config_name: spa_Latn-ita_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_spa_Latn
dtype: string
- name: sentence_ita_Latn
dtype: string
splits:
- name: dev
num_bytes: 424452
num_examples: 997
- name: devtest
num_bytes: 445103
num_examples: 1012
download_size: 462322
dataset_size: 869555
- config_name: spa_Latn-nld_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_spa_Latn
dtype: string
- name: sentence_nld_Latn
dtype: string
splits:
- name: dev
num_bytes: 415542
num_examples: 997
- name: devtest
num_bytes: 435575
num_examples: 1012
download_size: 448798
dataset_size: 851117
- config_name: spa_Latn-por_Latn
features:
- name: id
dtype: int32
- name: URL
dtype: string
- name: domain
dtype: string
- name: topic
dtype: string
- name: has_image
dtype: int32
- name: has_hyperlink
dtype: int32
- name: sentence_spa_Latn
dtype: string
- name: sentence_por_Latn
dtype: string
splits:
- name: dev
num_bytes: 416164
num_examples: 997
- name: devtest
num_bytes: 435292
num_examples: 1012
download_size: 451427
dataset_size: 851456
configs:
- config_name: arb_Arab-deu_Latn
data_files:
- split: dev
path: arb_Arab-deu_Latn/dev-*
- split: devtest
path: arb_Arab-deu_Latn/devtest-*
- config_name: arb_Arab-eng_Latn
data_files:
- split: dev
path: arb_Arab-eng_Latn/dev-*
- split: devtest
path: arb_Arab-eng_Latn/devtest-*
- config_name: arb_Arab-fra_Latn
data_files:
- split: dev
path: arb_Arab-fra_Latn/dev-*
- split: devtest
path: arb_Arab-fra_Latn/devtest-*
- config_name: arb_Arab-ita_Latn
data_files:
- split: dev
path: arb_Arab-ita_Latn/dev-*
- split: devtest
path: arb_Arab-ita_Latn/devtest-*
- config_name: arb_Arab-nld_Latn
data_files:
- split: dev
path: arb_Arab-nld_Latn/dev-*
- split: devtest
path: arb_Arab-nld_Latn/devtest-*
- config_name: arb_Arab-por_Latn
data_files:
- split: dev
path: arb_Arab-por_Latn/dev-*
- split: devtest
path: arb_Arab-por_Latn/devtest-*
- config_name: arb_Arab-spa_Latn
data_files:
- split: dev
path: arb_Arab-spa_Latn/dev-*
- split: devtest
path: arb_Arab-spa_Latn/devtest-*
- config_name: deu_Latn-arb_Arab
data_files:
- split: dev
path: deu_Latn-arb_Arab/dev-*
- split: devtest
path: deu_Latn-arb_Arab/devtest-*
- config_name: deu_Latn-eng_Latn
data_files:
- split: dev
path: deu_Latn-eng_Latn/dev-*
- split: devtest
path: deu_Latn-eng_Latn/devtest-*
- config_name: deu_Latn-fra_Latn
data_files:
- split: dev
path: deu_Latn-fra_Latn/dev-*
- split: devtest
path: deu_Latn-fra_Latn/devtest-*
- config_name: deu_Latn-ita_Latn
data_files:
- split: dev
path: deu_Latn-ita_Latn/dev-*
- split: devtest
path: deu_Latn-ita_Latn/devtest-*
- config_name: deu_Latn-nld_Latn
data_files:
- split: dev
path: deu_Latn-nld_Latn/dev-*
- split: devtest
path: deu_Latn-nld_Latn/devtest-*
- config_name: deu_Latn-por_Latn
data_files:
- split: dev
path: deu_Latn-por_Latn/dev-*
- split: devtest
path: deu_Latn-por_Latn/devtest-*
- config_name: deu_Latn-spa_Latn
data_files:
- split: dev
path: deu_Latn-spa_Latn/dev-*
- split: devtest
path: deu_Latn-spa_Latn/devtest-*
- config_name: eng_Latn-arb_Arab
data_files:
- split: dev
path: eng_Latn-arb_Arab/dev-*
- split: devtest
path: eng_Latn-arb_Arab/devtest-*
- config_name: eng_Latn-deu_Latn
data_files:
- split: dev
path: eng_Latn-deu_Latn/dev-*
- split: devtest
path: eng_Latn-deu_Latn/devtest-*
- config_name: eng_Latn-fra_Latn
data_files:
- split: dev
path: eng_Latn-fra_Latn/dev-*
- split: devtest
path: eng_Latn-fra_Latn/devtest-*
- config_name: eng_Latn-ita_Latn
data_files:
- split: dev
path: eng_Latn-ita_Latn/dev-*
- split: devtest
path: eng_Latn-ita_Latn/devtest-*
- config_name: eng_Latn-nld_Latn
data_files:
- split: dev
path: eng_Latn-nld_Latn/dev-*
- split: devtest
path: eng_Latn-nld_Latn/devtest-*
- config_name: eng_Latn-por_Latn
data_files:
- split: dev
path: eng_Latn-por_Latn/dev-*
- split: devtest
path: eng_Latn-por_Latn/devtest-*
- config_name: eng_Latn-spa_Latn
data_files:
- split: dev
path: eng_Latn-spa_Latn/dev-*
- split: devtest
path: eng_Latn-spa_Latn/devtest-*
- config_name: fra_Latn-arb_Arab
data_files:
- split: dev
path: fra_Latn-arb_Arab/dev-*
- split: devtest
path: fra_Latn-arb_Arab/devtest-*
- config_name: fra_Latn-deu_Latn
data_files:
- split: dev
path: fra_Latn-deu_Latn/dev-*
- split: devtest
path: fra_Latn-deu_Latn/devtest-*
- config_name: fra_Latn-eng_Latn
data_files:
- split: dev
path: fra_Latn-eng_Latn/dev-*
- split: devtest
path: fra_Latn-eng_Latn/devtest-*
- config_name: fra_Latn-ita_Latn
data_files:
- split: dev
path: fra_Latn-ita_Latn/dev-*
- split: devtest
path: fra_Latn-ita_Latn/devtest-*
- config_name: fra_Latn-nld_Latn
data_files:
- split: dev
path: fra_Latn-nld_Latn/dev-*
- split: devtest
path: fra_Latn-nld_Latn/devtest-*
- config_name: fra_Latn-por_Latn
data_files:
- split: dev
path: fra_Latn-por_Latn/dev-*
- split: devtest
path: fra_Latn-por_Latn/devtest-*
- config_name: fra_Latn-spa_Latn
data_files:
- split: dev
path: fra_Latn-spa_Latn/dev-*
- split: devtest
path: fra_Latn-spa_Latn/devtest-*
- config_name: ita_Latn-arb_Arab
data_files:
- split: dev
path: ita_Latn-arb_Arab/dev-*
- split: devtest
path: ita_Latn-arb_Arab/devtest-*
- config_name: ita_Latn-deu_Latn
data_files:
- split: dev
path: ita_Latn-deu_Latn/dev-*
- split: devtest
path: ita_Latn-deu_Latn/devtest-*
- config_name: ita_Latn-eng_Latn
data_files:
- split: dev
path: ita_Latn-eng_Latn/dev-*
- split: devtest
path: ita_Latn-eng_Latn/devtest-*
- config_name: ita_Latn-fra_Latn
data_files:
- split: dev
path: ita_Latn-fra_Latn/dev-*
- split: devtest
path: ita_Latn-fra_Latn/devtest-*
- config_name: ita_Latn-nld_Latn
data_files:
- split: dev
path: ita_Latn-nld_Latn/dev-*
- split: devtest
path: ita_Latn-nld_Latn/devtest-*
- config_name: ita_Latn-por_Latn
data_files:
- split: dev
path: ita_Latn-por_Latn/dev-*
- split: devtest
path: ita_Latn-por_Latn/devtest-*
- config_name: ita_Latn-spa_Latn
data_files:
- split: dev
path: ita_Latn-spa_Latn/dev-*
- split: devtest
path: ita_Latn-spa_Latn/devtest-*
- config_name: nld_Latn-arb_Arab
data_files:
- split: dev
path: nld_Latn-arb_Arab/dev-*
- split: devtest
path: nld_Latn-arb_Arab/devtest-*
- config_name: nld_Latn-deu_Latn
data_files:
- split: dev
path: nld_Latn-deu_Latn/dev-*
- split: devtest
path: nld_Latn-deu_Latn/devtest-*
- config_name: nld_Latn-eng_Latn
data_files:
- split: dev
path: nld_Latn-eng_Latn/dev-*
- split: devtest
path: nld_Latn-eng_Latn/devtest-*
- config_name: nld_Latn-fra_Latn
data_files:
- split: dev
path: nld_Latn-fra_Latn/dev-*
- split: devtest
path: nld_Latn-fra_Latn/devtest-*
- config_name: nld_Latn-ita_Latn
data_files:
- split: dev
path: nld_Latn-ita_Latn/dev-*
- split: devtest
path: nld_Latn-ita_Latn/devtest-*
- config_name: nld_Latn-por_Latn
data_files:
- split: dev
path: nld_Latn-por_Latn/dev-*
- split: devtest
path: nld_Latn-por_Latn/devtest-*
- config_name: nld_Latn-spa_Latn
data_files:
- split: dev
path: nld_Latn-spa_Latn/dev-*
- split: devtest
path: nld_Latn-spa_Latn/devtest-*
- config_name: por_Latn-arb_Arab
data_files:
- split: dev
path: por_Latn-arb_Arab/dev-*
- split: devtest
path: por_Latn-arb_Arab/devtest-*
- config_name: por_Latn-deu_Latn
data_files:
- split: dev
path: por_Latn-deu_Latn/dev-*
- split: devtest
path: por_Latn-deu_Latn/devtest-*
- config_name: por_Latn-eng_Latn
data_files:
- split: dev
path: por_Latn-eng_Latn/dev-*
- split: devtest
path: por_Latn-eng_Latn/devtest-*
- config_name: por_Latn-fra_Latn
data_files:
- split: dev
path: por_Latn-fra_Latn/dev-*
- split: devtest
path: por_Latn-fra_Latn/devtest-*
- config_name: por_Latn-ita_Latn
data_files:
- split: dev
path: por_Latn-ita_Latn/dev-*
- split: devtest
path: por_Latn-ita_Latn/devtest-*
- config_name: por_Latn-nld_Latn
data_files:
- split: dev
path: por_Latn-nld_Latn/dev-*
- split: devtest
path: por_Latn-nld_Latn/devtest-*
- config_name: por_Latn-spa_Latn
data_files:
- split: dev
path: por_Latn-spa_Latn/dev-*
- split: devtest
path: por_Latn-spa_Latn/devtest-*
- config_name: spa_Latn-arb_Arab
data_files:
- split: dev
path: spa_Latn-arb_Arab/dev-*
- split: devtest
path: spa_Latn-arb_Arab/devtest-*
- config_name: spa_Latn-deu_Latn
data_files:
- split: dev
path: spa_Latn-deu_Latn/dev-*
- split: devtest
path: spa_Latn-deu_Latn/devtest-*
- config_name: spa_Latn-eng_Latn
data_files:
- split: dev
path: spa_Latn-eng_Latn/dev-*
- split: devtest
path: spa_Latn-eng_Latn/devtest-*
- config_name: spa_Latn-fra_Latn
data_files:
- split: dev
path: spa_Latn-fra_Latn/dev-*
- split: devtest
path: spa_Latn-fra_Latn/devtest-*
- config_name: spa_Latn-ita_Latn
data_files:
- split: dev
path: spa_Latn-ita_Latn/dev-*
- split: devtest
path: spa_Latn-ita_Latn/devtest-*
- config_name: spa_Latn-nld_Latn
data_files:
- split: dev
path: spa_Latn-nld_Latn/dev-*
- split: devtest
path: spa_Latn-nld_Latn/devtest-*
- config_name: spa_Latn-por_Latn
data_files:
- split: dev
path: spa_Latn-por_Latn/dev-*
- split: devtest
path: spa_Latn-por_Latn/devtest-*
---
提供机构:
OpenLLM-BPI



