loicmagne/open-subtitles-250-bitext-mining
收藏数据集概述
本数据集包含多个配置文件,每个配置文件对应一个特定的数据文件。以下是数据集的主要配置信息:
配置文件列表
-
config_name: ka-ml
data_files: data/ka-ml.jsonl -
config_name: br-sr
data_files: data/br-sr.jsonl -
config_name: bg-br
data_files: data/bg-br.jsonl -
config_name: kk-lv
data_files: data/kk-lv.jsonl -
config_name: br-sk
data_files: data/br-sk.jsonl -
config_name: br-fi
data_files: data/br-fi.jsonl -
config_name: eu-ze_zh
data_files: data/eu-ze_zh.jsonl -
config_name: kk-nl
data_files: data/kk-nl.jsonl -
config_name: kk-vi
data_files: data/kk-vi.jsonl -
config_name: ja-kk
data_files: data/ja-kk.jsonl -
config_name: br-sv
data_files: data/br-sv.jsonl -
config_name: kk-zh_cn
data_files: data/kk-zh_cn.jsonl -
config_name: kk-ms
data_files: data/kk-ms.jsonl -
config_name: br-et
data_files: data/br-et.jsonl -
config_name: br-hu
data_files: data/br-hu.jsonl -
config_name: eo-kk
data_files: data/eo-kk.jsonl -
config_name: br-tr
data_files: data/br-tr.jsonl -
config_name: ko-tl
data_files: data/ko-tl.jsonl -
config_name: te-zh_tw
data_files: data/te-zh_tw.jsonl -
config_name: br-hr
data_files: data/br-hr.jsonl -
config_name: br-nl
data_files: data/br-nl.jsonl -
config_name: ka-si
data_files: data/ka-si.jsonl -
config_name: br-cs
data_files: data/br-cs.jsonl -
config_name: br-is
data_files: data/br-is.jsonl -
config_name: br-ro
data_files: data/br-ro.jsonl -
config_name: br-de
data_files: data/br-de.jsonl -
config_name: et-kk
data_files: data/et-kk.jsonl -
config_name: fr-hy
data_files: data/fr-hy.jsonl -
config_name: br-no
data_files: data/br-no.jsonl -
config_name: is-ko
data_files: data/is-ko.jsonl -
config_name: br-da
data_files: data/br-da.jsonl -
config_name: br-en
data_files: data/br-en.jsonl -
config_name: eo-lt
data_files: data/eo-lt.jsonl -
config_name: is-ze_zh
data_files: data/is-ze_zh.jsonl -
config_name: eu-ko
data_files: data/eu-ko.jsonl -
config_name: br-it
data_files: data/br-it.jsonl -
config_name: br-id
data_files: data/br-id.jsonl -
config_name: eu-zh_cn
data_files: data/eu-zh_cn.jsonl -
config_name: is-ja
data_files: data/is-ja.jsonl -
config_name: br-sl
data_files: data/br-sl.jsonl -
config_name: br-gl
data_files: data/br-gl.jsonl -
config_name: br-pt_br
data_files: data/br-pt_br.jsonl -
config_name: br-es
data_files: data/br-es.jsonl -
config_name: br-pt
data_files: data/br-pt.jsonl -
config_name: is-th
data_files: data/is-th.jsonl -
config_name: fa-is
data_files: data/fa-is.jsonl -
config_name: br-ca
data_files: data/br-ca.jsonl -
config_name: eu-ka
data_files: data/eu-ka.jsonl -
config_name: is-zh_cn
data_files: data/is-zh_cn.jsonl -
config_name: eu-ur
data_files: data/eu-ur.jsonl -
config_name: id-kk
data_files: data/id-kk.jsonl -
config_name: br-sq
data_files: data/br-sq.jsonl -
config_name: eu-ja
data_files: data/eu-ja.jsonl -
config_name: uk-ur
data_files: data/uk-ur.jsonl -
config_name: is-zh_tw
data_files: data/is-zh_tw.jsonl -
config_name: ka-ko
data_files: data/ka-ko.jsonl -
config_name: eu-zh_tw
data_files: data/eu-zh_tw.jsonl -
config_name: eu-th
data_files: data/eu-th.jsonl -
config_name: eu-is
data_files: data/eu-is.jsonl -
config_name: is-tl
data_files: data/is-tl.jsonl -
config_name: br-eo
data_files: data/br-eo.jsonl -
config_name: eo-ze_zh
data_files: data/eo-ze_zh.jsonl -
config_name: eu-te
data_files: data/eu-te.jsonl -
config_name: ar-kk
data_files: data/ar-kk.jsonl -
config_name: eo-lv
data_files: data/eo-lv.jsonl -
config_name: ko-ze_zh
data_files: data/ko-ze_zh.jsonl -
config_name: ml-ze_zh
data_files: data/ml-ze_zh.jsonl -
config_name: is-lt
data_files: data/is-lt.jsonl -
config_name: br-fr
data_files: data/br-fr.jsonl -
config_name: ko-te
data_files: data/ko-te.jsonl -
config_name: kk-sl
data_files: data/kk-sl.jsonl -
config_name: eu-fa
data_files: data/eu-fa.jsonl -
config_name: eo-ko
data_files: data/eo-ko.jsonl -
config_name: ka-ze_en
data_files: data/ka-ze_en.jsonl -
config_name: eo-eu
data_files: data/eo-eu.jsonl -
config_name: ta-zh_tw
data_files: data/ta-zh_tw.jsonl -
config_name: eu-lv
data_files: data/eu-lv.jsonl -
config_name: ko-lv
data_files: data/ko-lv.jsonl -
config_name: lt-tl
data_files: data/lt-tl.jsonl -
config_name: eu-si
data_files: data/eu-si.jsonl -
config_name: hy-ru
data_files: data/hy-ru.jsonl -
config_name: ar-is
data_files: data/ar-is.jsonl -
config_name: eu-lt
data_files: data/eu-lt.jsonl -
config_name: eu-tl
data_files: data/eu-tl.jsonl -
config_name: eu-uk
data_files: data/eu-uk.jsonl -
config_name: ka-ze_zh
data_files: data/ka-ze_zh.jsonl -
config_name: si-ze_zh
data_files: data/si-ze_zh.jsonl -
config_name: el-is
data_files: data/el-is.jsonl -
config_name: bn-is
data_files: data/bn-is.jsonl -
config_name: ko-ze_en
data_files: data/ko-ze_en.jsonl -
config_name: eo-si
data_files: data/eo-si.jsonl -
config_name: cs-kk
data_files: data/cs-kk.jsonl -
config_name: is-uk
data_files: data/is-uk.jsonl -
config_name: eu-ze_en
data_files: data/eu-ze_en.jsonl -
config_name: ta-ze_zh
data_files: data/ta-ze_zh.jsonl -
config_name: is-pl
data_files: data/is-pl.jsonl -
config_name: is-mk
data_files: data/is-mk.jsonl -
config_name: eu-ta
data_files: data/eu-ta.jsonl -
config_name: ko-lt
data_files: data/ko-lt.jsonl -
config_name: is-lv
data_files: data/is-lv.jsonl -
config_name: fa-ko
data_files: data/fa-ko.jsonl -
config_name: bn-ko
data_files: data/bn-ko.jsonl -
config_name: hi-is
data_files: data/hi-is.jsonl -
config_name: bn-ze_zh
data_files: data/bn-ze_zh.jsonl -
config_name: bn-eu
data_files: data/bn-eu.jsonl -
config_name: bn-ja
data_files: data/bn-ja.jsonl -
config_name: is-ml
data_files: data/is-ml.jsonl -
config_name: eu-ru
data_files: data/eu-ru.jsonl -
config_name: ko-ta
data_files: data/ko-ta.jsonl -
config_name: is-vi
data_files: data/is-vi.jsonl -
config_name: ja-tl
data_files: data/ja-tl.jsonl -
config_name: eu-mk
data_files: data/eu-mk.jsonl -
config_name: eu-he
data_files: data/eu-he.jsonl -
config_name: ka-zh_tw
data_files: data/ka-zh_tw.jsonl -
config_name: ka-zh_cn
data_files: data/ka-zh_cn.jsonl -
config_name: si-tl
data_files: data/si-tl.jsonl -
config_name: is-kk
data_files: data/is-kk.jsonl -
config_name: eu-fi
data_files: data/eu-fi.jsonl -
config_name: fi-ko
data_files: data/fi-ko.jsonl -
config_name: is-ur
data_files: data/is-ur.jsonl -
config_name: ka-th
data_files: data/ka-th.jsonl -
config_name: ko-ur
data_files: data/ko-ur.jsonl -
config_name: eo-ja
data_files: data/eo-ja.jsonl -
config_name: he-is
data_files: data/he-is.jsonl -
config_name: is-tr
data_files: data/is-tr.jsonl -
config_name: ka-ur
data_files: data/ka-ur.jsonl -
config_name: et-ko
data_files: data/et-ko.jsonl -
config_name: eu-vi
data_files: data/eu-vi.jsonl -
config_name: is-sk
data_files: data/is-sk.jsonl -
config_name: gl-is
data_files: data/gl-is.jsonl -
config_name: fr-is
data_files: data/fr-is.jsonl -
config_name: is-sq
data_files: data/is-sq.jsonl -
config_name: hu-is
data_files: data/hu-is.jsonl -
config_name: fr-kk
data_files: data/fr-kk.jsonl -
config_name: eu-sq
data_files: data/eu-sq.jsonl -
config_name: is-ru
data_files: data/is-ru.jsonl -
config_name: ja-ka
data_files: data/ja-ka.jsonl -
config_name: fi-tl
data_files: data/fi-tl.jsonl -
config_name: ka-lv
data_files: data/ka-lv.jsonl -
config_name: fi-is
data_files: data/fi-is.jsonl -
config_name: is-si
data_files: data/is-si.jsonl -
config_name: ar-ko
data_files: data/ar-ko.jsonl -
config_name: ko-sl
data_files: data/ko-sl.jsonl -
config_name: ar-eu
data_files: data/ar-eu.jsonl -
config_name: ko-si
data_files: data/ko-si.jsonl -
config_name: bg-is
data_files: data/bg-is.jsonl -
config_name: eu-hu



