amanuelbyte/cleaned-translations-upload
收藏Hugging Face2026-02-01 更新2026-03-29 收录
下载链接:
https://hf-mirror.com/datasets/amanuelbyte/cleaned-translations-upload
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: afr_Latn
features:
- name: source_sentence
dtype: large_string
- name: target_sentence
dtype: large_string
splits:
- name: train
num_bytes: 44035025
num_examples: 143909
download_size: 29324033
dataset_size: 44035025
- config_name: amh_Ethi
features:
- name: source_sentence
dtype: large_string
- name: target_sentence
dtype: large_string
splits:
- name: train
num_bytes: 895372
num_examples: 4973
download_size: 482400
dataset_size: 895372
- config_name: arz_Arab
features:
- name: source_sentence
dtype: large_string
- name: target_sentence
dtype: large_string
splits:
- name: train
num_bytes: 102256361
num_examples: 318356
download_size: 57996430
dataset_size: 102256361
- config_name: hau_Latn
features:
- name: source_sentence
dtype: large_string
- name: target_sentence
dtype: large_string
splits:
- name: train
num_bytes: 42846358
num_examples: 121837
download_size: 25080018
dataset_size: 42846358
- config_name: lin_Latn
features:
- name: source_sentence
dtype: large_string
- name: target_sentence
dtype: large_string
splits:
- name: train
num_bytes: 168321
num_examples: 862
download_size: 75457
dataset_size: 168321
- config_name: som_Latn
features:
- name: source_sentence
dtype: large_string
- name: target_sentence
dtype: large_string
splits:
- name: train
num_bytes: 1260369582
num_examples: 2803648
download_size: 705793095
dataset_size: 1260369582
- config_name: swh_Latn
features:
- name: source_sentence
dtype: large_string
- name: target_sentence
dtype: large_string
splits:
- name: train
num_bytes: 617122804
num_examples: 1547098
download_size: 380356474
dataset_size: 617122804
- config_name: wol_Latn
features:
- name: source_sentence
dtype: large_string
- name: target_sentence
dtype: large_string
splits:
- name: train
num_bytes: 704359
num_examples: 6232
download_size: 431563
dataset_size: 704359
- config_name: yor_Latn
features:
- name: source_sentence
dtype: large_string
- name: target_sentence
dtype: large_string
splits:
- name: train
num_bytes: 137815
num_examples: 614
download_size: 83137
dataset_size: 137815
- config_name: zul_Latn
features:
- name: source_sentence
dtype: large_string
- name: target_sentence
dtype: large_string
splits:
- name: train
num_bytes: 2720230
num_examples: 6782
download_size: 1759154
dataset_size: 2720230
configs:
- config_name: afr_Latn
data_files:
- split: train
path: afr_Latn/train-*
- config_name: amh_Ethi
data_files:
- split: train
path: amh_Ethi/train-*
- config_name: arz_Arab
data_files:
- split: train
path: arz_Arab/train-*
- config_name: hau_Latn
data_files:
- split: train
path: hau_Latn/train-*
- config_name: lin_Latn
data_files:
- split: train
path: lin_Latn/train-*
- config_name: som_Latn
data_files:
- split: train
path: som_Latn/train-*
- config_name: swh_Latn
data_files:
- split: train
path: swh_Latn/train-*
- config_name: wol_Latn
data_files:
- split: train
path: wol_Latn/train-*
- config_name: yor_Latn
data_files:
- split: train
path: yor_Latn/train-*
- config_name: zul_Latn
data_files:
- split: train
path: zul_Latn/train-*
---
提供机构:
amanuelbyte



