daniazie/parallel_asian_treebank_sents
收藏Hugging Face2026-04-11 更新2026-04-26 收录
下载链接:
https://hf-mirror.com/datasets/daniazie/parallel_asian_treebank_sents
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: en_bg
features:
- name: id
dtype: string
- name: src
dtype: string
- name: ref
dtype: string
- name: url
dtype: string
- name: src_lang
dtype: string
- name: tgt_lang
dtype: string
splits:
- name: train
num_bytes: 11564379
num_examples: 18088
- name: dev
num_bytes: 627556
num_examples: 1000
- name: test
num_bytes: 645930
num_examples: 1018
download_size: 4940607
dataset_size: 12837865
- config_name: en_fil
features:
- name: id
dtype: string
- name: src
dtype: string
- name: ref
dtype: string
- name: url
dtype: string
- name: src_lang
dtype: string
- name: tgt_lang
dtype: string
splits:
- name: train
num_bytes: 7960851
num_examples: 18079
- name: dev
num_bytes: 427376
num_examples: 994
- name: test
num_bytes: 444427
num_examples: 1016
download_size: 3997966
dataset_size: 8832654
- config_name: en_hi
features:
- name: id
dtype: string
- name: src
dtype: string
- name: ref
dtype: string
- name: url
dtype: string
- name: src_lang
dtype: string
- name: tgt_lang
dtype: string
splits:
- name: train
num_bytes: 11285663
num_examples: 18088
- name: dev
num_bytes: 614200
num_examples: 1000
- name: test
num_bytes: 632754
num_examples: 1018
download_size: 4832598
dataset_size: 12532617
- config_name: en_id
features:
- name: id
dtype: string
- name: src
dtype: string
- name: ref
dtype: string
- name: url
dtype: string
- name: src_lang
dtype: string
- name: tgt_lang
dtype: string
splits:
- name: train
num_bytes: 7752304
num_examples: 18088
- name: dev
num_bytes: 417537
num_examples: 1000
- name: test
num_bytes: 432353
num_examples: 1018
download_size: 3830638
dataset_size: 8602194
- config_name: en_khm
features:
- name: id
dtype: string
- name: src
dtype: string
- name: ref
dtype: string
- name: url
dtype: string
- name: src_lang
dtype: string
- name: tgt_lang
dtype: string
splits:
- name: train
num_bytes: 12501563
num_examples: 18088
- name: dev
num_bytes: 682225
num_examples: 1000
- name: test
num_bytes: 699666
num_examples: 1018
download_size: 5197387
dataset_size: 13883454
- config_name: en_lo
features:
- name: id
dtype: string
- name: src
dtype: string
- name: ref
dtype: string
- name: url
dtype: string
- name: src_lang
dtype: string
- name: tgt_lang
dtype: string
splits:
- name: train
num_bytes: 11549697
num_examples: 18079
- name: dev
num_bytes: 618956
num_examples: 994
- name: test
num_bytes: 643732
num_examples: 1016
download_size: 4944063
dataset_size: 12812385
- config_name: en_ms
features:
- name: id
dtype: string
- name: src
dtype: string
- name: ref
dtype: string
- name: url
dtype: string
- name: src_lang
dtype: string
- name: tgt_lang
dtype: string
splits:
- name: train
num_bytes: 7712724
num_examples: 18088
- name: dev
num_bytes: 417184
num_examples: 1000
- name: test
num_bytes: 431317
num_examples: 1018
download_size: 3836925
dataset_size: 8561225
- config_name: en_my
features:
- name: id
dtype: string
- name: src
dtype: string
- name: ref
dtype: string
- name: url
dtype: string
- name: src_lang
dtype: string
- name: tgt_lang
dtype: string
splits:
- name: train
num_bytes: 14491167
num_examples: 18088
- name: dev
num_bytes: 786845
num_examples: 1000
- name: test
num_bytes: 806191
num_examples: 1018
download_size: 5691460
dataset_size: 16084203
- config_name: en_th
features:
- name: id
dtype: string
- name: src
dtype: string
- name: ref
dtype: string
- name: url
dtype: string
- name: src_lang
dtype: string
- name: tgt_lang
dtype: string
splits:
- name: train
num_bytes: 11378289
num_examples: 18079
- name: dev
num_bytes: 615068
num_examples: 994
- name: test
num_bytes: 632117
num_examples: 1016
download_size: 4932617
dataset_size: 12625474
- config_name: en_vi
features:
- name: id
dtype: string
- name: src
dtype: string
- name: ref
dtype: string
- name: url
dtype: string
- name: src_lang
dtype: string
- name: tgt_lang
dtype: string
splits:
- name: train
num_bytes: 8487954
num_examples: 18088
- name: dev
num_bytes: 459907
num_examples: 1000
- name: test
num_bytes: 475219
num_examples: 1018
download_size: 4124675
dataset_size: 9423080
- config_name: en_zh
features:
- name: id
dtype: string
- name: src
dtype: string
- name: ref
dtype: string
- name: url
dtype: string
- name: src_lang
dtype: string
- name: tgt_lang
dtype: string
splits:
- name: train
num_bytes: 7396298
num_examples: 18088
- name: dev
num_bytes: 401152
num_examples: 1000
- name: test
num_bytes: 415809
num_examples: 1018
download_size: 3777239
dataset_size: 8213259
configs:
- config_name: en_bg
data_files:
- split: train
path: en_bg/train-*
- split: dev
path: en_bg/dev-*
- split: test
path: en_bg/test-*
- config_name: en_fil
data_files:
- split: train
path: en_fil/train-*
- split: dev
path: en_fil/dev-*
- split: test
path: en_fil/test-*
- config_name: en_hi
data_files:
- split: train
path: en_hi/train-*
- split: dev
path: en_hi/dev-*
- split: test
path: en_hi/test-*
- config_name: en_id
data_files:
- split: train
path: en_id/train-*
- split: dev
path: en_id/dev-*
- split: test
path: en_id/test-*
- config_name: en_khm
data_files:
- split: train
path: en_khm/train-*
- split: dev
path: en_khm/dev-*
- split: test
path: en_khm/test-*
- config_name: en_lo
data_files:
- split: train
path: en_lo/train-*
- split: dev
path: en_lo/dev-*
- split: test
path: en_lo/test-*
- config_name: en_ms
data_files:
- split: train
path: en_ms/train-*
- split: dev
path: en_ms/dev-*
- split: test
path: en_ms/test-*
- config_name: en_my
data_files:
- split: train
path: en_my/train-*
- split: dev
path: en_my/dev-*
- split: test
path: en_my/test-*
- config_name: en_th
data_files:
- split: train
path: en_th/train-*
- split: dev
path: en_th/dev-*
- split: test
path: en_th/test-*
- config_name: en_vi
data_files:
- split: train
path: en_vi/train-*
- split: dev
path: en_vi/dev-*
- split: test
path: en_vi/test-*
- config_name: en_zh
data_files:
- split: train
path: en_zh/train-*
- split: dev
path: en_zh/dev-*
- split: test
path: en_zh/test-*
---
提供机构:
daniazie



