abhik1505040/doc-aligned-crossSum-subset
收藏Hugging Face2024-06-19 更新2024-06-29 收录
下载链接:
https://hf-mirror.com/datasets/abhik1505040/doc-aligned-crossSum-subset
下载链接
链接失效反馈官方服务:
资源简介:
---
configs:
- config_name: bengali-welsh
data_files: "data/bengali-welsh.jsonl"
- config_name: english-tigrinya
data_files: "data/english-tigrinya.jsonl"
- config_name: english-persian
data_files: "data/english-persian.jsonl"
- config_name: english-gujarati
data_files: "data/english-gujarati.jsonl"
- config_name: bengali-japanese
data_files: "data/bengali-japanese.jsonl"
- config_name: french-arabic
data_files: "data/french-arabic.jsonl"
- config_name: oromo-arabic
data_files: "data/oromo-arabic.jsonl"
- config_name: arabic-vietnamese
data_files: "data/arabic-vietnamese.jsonl"
- config_name: bengali-uzbek
data_files: "data/bengali-uzbek.jsonl"
- config_name: arabic-burmese
data_files: "data/arabic-burmese.jsonl"
- config_name: bengali-spanish
data_files: "data/bengali-spanish.jsonl"
- config_name: arabic-pashto
data_files: "data/arabic-pashto.jsonl"
- config_name: arabic-ukrainian
data_files: "data/arabic-ukrainian.jsonl"
- config_name: english-pashto
data_files: "data/english-pashto.jsonl"
- config_name: english-russian
data_files: "data/english-russian.jsonl"
- config_name: arabic-yoruba
data_files: "data/arabic-yoruba.jsonl"
- config_name: arabic-swahili
data_files: "data/arabic-swahili.jsonl"
- config_name: english-spanish
data_files: "data/english-spanish.jsonl"
- config_name: english-pidgin
data_files: "data/english-pidgin.jsonl"
- config_name: bengali-marathi
data_files: "data/bengali-marathi.jsonl"
- config_name: english-thai
data_files: "data/english-thai.jsonl"
- config_name: bengali-tamil
data_files: "data/bengali-tamil.jsonl"
- config_name: arabic-chinese_traditional
data_files: "data/arabic-chinese_traditional.jsonl"
- config_name: english-vietnamese
data_files: "data/english-vietnamese.jsonl"
- config_name: english-indonesian
data_files: "data/english-indonesian.jsonl"
- config_name: english-telugu
data_files: "data/english-telugu.jsonl"
- config_name: english-turkish
data_files: "data/english-turkish.jsonl"
- config_name: bengali-turkish
data_files: "data/bengali-turkish.jsonl"
- config_name: arabic-kyrgyz
data_files: "data/arabic-kyrgyz.jsonl"
- config_name: bengali-arabic
data_files: "data/bengali-arabic.jsonl"
- config_name: amharic-arabic
data_files: "data/amharic-arabic.jsonl"
- config_name: arabic-chinese_simplified
data_files: "data/arabic-chinese_simplified.jsonl"
- config_name: arabic-tigrinya
data_files: "data/arabic-tigrinya.jsonl"
- config_name: english-kyrgyz
data_files: "data/english-kyrgyz.jsonl"
- config_name: arabic-telugu
data_files: "data/arabic-telugu.jsonl"
- config_name: arabic-somali
data_files: "data/arabic-somali.jsonl"
- config_name: bengali-telugu
data_files: "data/bengali-telugu.jsonl"
- config_name: bengali-chinese_simplified
data_files: "data/bengali-chinese_simplified.jsonl"
- config_name: bengali-serbian_cyrillic
data_files: "data/bengali-serbian_cyrillic.jsonl"
- config_name: bengali-french
data_files: "data/bengali-french.jsonl"
- config_name: bengali-kirundi
data_files: "data/bengali-kirundi.jsonl"
- config_name: bengali-kyrgyz
data_files: "data/bengali-kyrgyz.jsonl"
- config_name: arabic-indonesian
data_files: "data/arabic-indonesian.jsonl"
- config_name: bengali-indonesian
data_files: "data/bengali-indonesian.jsonl"
- config_name: arabic-portuguese
data_files: "data/arabic-portuguese.jsonl"
- config_name: arabic-urdu
data_files: "data/arabic-urdu.jsonl"
- config_name: english-ukrainian
data_files: "data/english-ukrainian.jsonl"
- config_name: arabic-spanish
data_files: "data/arabic-spanish.jsonl"
- config_name: arabic-serbian_latin
data_files: "data/arabic-serbian_latin.jsonl"
- config_name: english-igbo
data_files: "data/english-igbo.jsonl"
- config_name: arabic-uzbek
data_files: "data/arabic-uzbek.jsonl"
- config_name: arabic-gujarati
data_files: "data/arabic-gujarati.jsonl"
- config_name: bengali-oromo
data_files: "data/bengali-oromo.jsonl"
- config_name: english-japanese
data_files: "data/english-japanese.jsonl"
- config_name: bengali-yoruba
data_files: "data/bengali-yoruba.jsonl"
- config_name: bengali-portuguese
data_files: "data/bengali-portuguese.jsonl"
- config_name: english-marathi
data_files: "data/english-marathi.jsonl"
- config_name: bengali-hausa
data_files: "data/bengali-hausa.jsonl"
- config_name: arabic-thai
data_files: "data/arabic-thai.jsonl"
- config_name: english-tamil
data_files: "data/english-tamil.jsonl"
- config_name: english-serbian_cyrillic
data_files: "data/english-serbian_cyrillic.jsonl"
- config_name: english-swahili
data_files: "data/english-swahili.jsonl"
- config_name: arabic-scottish_gaelic
data_files: "data/arabic-scottish_gaelic.jsonl"
- config_name: bengali-burmese
data_files: "data/bengali-burmese.jsonl"
- config_name: bengali-russian
data_files: "data/bengali-russian.jsonl"
- config_name: bengali-vietnamese
data_files: "data/bengali-vietnamese.jsonl"
- config_name: bengali-english
data_files: "data/bengali-english.jsonl"
- config_name: english-somali
data_files: "data/english-somali.jsonl"
- config_name: arabic-hausa
data_files: "data/arabic-hausa.jsonl"
- config_name: arabic-kirundi
data_files: "data/arabic-kirundi.jsonl"
- config_name: arabic-persian
data_files: "data/arabic-persian.jsonl"
- config_name: english-chinese_simplified
data_files: "data/english-chinese_simplified.jsonl"
- config_name: bengali-swahili
data_files: "data/bengali-swahili.jsonl"
- config_name: bengali-somali
data_files: "data/bengali-somali.jsonl"
- config_name: english-chinese_traditional
data_files: "data/english-chinese_traditional.jsonl"
- config_name: bengali-gujarati
data_files: "data/bengali-gujarati.jsonl"
- config_name: english-scottish_gaelic
data_files: "data/english-scottish_gaelic.jsonl"
- config_name: arabic-nepali
data_files: "data/arabic-nepali.jsonl"
- config_name: english-urdu
data_files: "data/english-urdu.jsonl"
- config_name: english-punjabi
data_files: "data/english-punjabi.jsonl"
- config_name: arabic-azerbaijani
data_files: "data/arabic-azerbaijani.jsonl"
- config_name: arabic-serbian_cyrillic
data_files: "data/arabic-serbian_cyrillic.jsonl"
- config_name: arabic-sinhala
data_files: "data/arabic-sinhala.jsonl"
- config_name: bengali-hindi
data_files: "data/bengali-hindi.jsonl"
- config_name: english-hausa
data_files: "data/english-hausa.jsonl"
- config_name: bengali-thai
data_files: "data/bengali-thai.jsonl"
- config_name: arabic-russian
data_files: "data/arabic-russian.jsonl"
- config_name: english-welsh
data_files: "data/english-welsh.jsonl"
- config_name: english-azerbaijani
data_files: "data/english-azerbaijani.jsonl"
- config_name: bengali-ukrainian
data_files: "data/bengali-ukrainian.jsonl"
- config_name: bengali-korean
data_files: "data/bengali-korean.jsonl"
- config_name: arabic-tamil
data_files: "data/arabic-tamil.jsonl"
- config_name: arabic-korean
data_files: "data/arabic-korean.jsonl"
- config_name: bengali-punjabi
data_files: "data/bengali-punjabi.jsonl"
- config_name: bengali-tigrinya
data_files: "data/bengali-tigrinya.jsonl"
- config_name: arabic-hindi
data_files: "data/arabic-hindi.jsonl"
- config_name: arabic-pidgin
data_files: "data/arabic-pidgin.jsonl"
- config_name: english-french
data_files: "data/english-french.jsonl"
- config_name: english-oromo
data_files: "data/english-oromo.jsonl"
- config_name: english-uzbek
data_files: "data/english-uzbek.jsonl"
- config_name: bengali-persian
data_files: "data/bengali-persian.jsonl"
- config_name: english-serbian_latin
data_files: "data/english-serbian_latin.jsonl"
- config_name: arabic-igbo
data_files: "data/arabic-igbo.jsonl"
- config_name: arabic-punjabi
data_files: "data/arabic-punjabi.jsonl"
- config_name: bengali-chinese_traditional
data_files: "data/bengali-chinese_traditional.jsonl"
- config_name: english-kirundi
data_files: "data/english-kirundi.jsonl"
- config_name: bengali-pidgin
data_files: "data/bengali-pidgin.jsonl"
- config_name: english-korean
data_files: "data/english-korean.jsonl"
- config_name: arabic-japanese
data_files: "data/arabic-japanese.jsonl"
- config_name: english-yoruba
data_files: "data/english-yoruba.jsonl"
- config_name: english-burmese
data_files: "data/english-burmese.jsonl"
- config_name: bengali-sinhala
data_files: "data/bengali-sinhala.jsonl"
- config_name: english-hindi
data_files: "data/english-hindi.jsonl"
- config_name: bengali-pashto
data_files: "data/bengali-pashto.jsonl"
- config_name: english-amharic
data_files: "data/english-amharic.jsonl"
- config_name: arabic-turkish
data_files: "data/arabic-turkish.jsonl"
- config_name: arabic-marathi
data_files: "data/arabic-marathi.jsonl"
- config_name: bengali-igbo
data_files: "data/bengali-igbo.jsonl"
- config_name: bengali-nepali
data_files: "data/bengali-nepali.jsonl"
- config_name: bengali-azerbaijani
data_files: "data/bengali-azerbaijani.jsonl"
- config_name: bengali-serbian_latin
data_files: "data/bengali-serbian_latin.jsonl"
- config_name: english-arabic
data_files: "data/english-arabic.jsonl"
- config_name: english-portuguese
data_files: "data/english-portuguese.jsonl"
- config_name: english-nepali
data_files: "data/english-nepali.jsonl"
- config_name: bengali-urdu
data_files: "data/bengali-urdu.jsonl"
- config_name: english-sinhala
data_files: "data/english-sinhala.jsonl"
- config_name: bengali-amharic
data_files: "data/bengali-amharic.jsonl"
- config_name: arabic-welsh
data_files: "data/arabic-welsh.jsonl"
---
提供机构:
abhik1505040
原始信息汇总
数据集概述
数据集配置
语言对
- Bengali-Welsh
- English-Tigrinya
- English-Persian
- English-Gujarati
- Bengali-Japanese
- French-Arabic
- Oromo-Arabic
- Arabic-Vietnamese
- Bengali-Uzbek
- Arabic-Burmese
- Bengali-Spanish
- Arabic-Pashto
- Arabic-Ukrainian
- English-Pashto
- English-Russian
- Arabic-Yoruba
- Arabic-Swahili
- English-Spanish
- English-Pidgin
- Bengali-Marathi
- English-Thai
- Bengali-Tamil
- Arabic-Chinese_Traditional
- English-Vietnamese
- English-Indonesian
- English-Telugu
- English-Turkish
- Bengali-Turkish
- Arabic-Kyrgyz
- Bengali-Arabic
- Amharic-Arabic
- Arabic-Chinese_Simplified
- Arabic-Tigrinya
- English-Kyrgyz
- Arabic-Telugu
- Arabic-Somali
- Bengali-Telugu
- Bengali-Chinese_Simplified
- Bengali-Serbian_Cyrillic
- Bengali-French
- Bengali-Kirundi
- Bengali-Kyrgyz
- Arabic-Indonesian
- Bengali-Indonesian
- Arabic-Portuguese
- Arabic-Urdu
- English-Ukrainian
- Arabic-Spanish
- Arabic-Serbian_Latin
- English-Igbo
- Arabic-Uzbek
- Arabic-Gujarati
- Bengali-Oromo
- English-Japanese
- Bengali-Yoruba
- Bengali-Portuguese
- English-Marathi
- Bengali-Hausa
- Arabic-Thai
- English-Tamil
- English-Serbian_Cyrillic
- English-Swahili
- Arabic-Scottish_Gaelic
- Bengali-Burmese
- Bengali-Russian
- Bengali-Vietnamese
- Bengali-English
- English-Somali
- Arabic-Hausa
- Arabic-Kirundi
- Arabic-Persian
- English-Chinese_Simplified
- Bengali-Swahili
- Bengali-Somali
- English-Chinese_Traditional
- Bengali-Gujarati
- English-Scottish_Gaelic
- Arabic-Nepali
- English-Urdu
- English-Punjabi
- Arabic-Azerbaijani
- Arabic-Serbian_Cyrillic
- Arabic-Sinhala
- Bengali-Hindi
- English-Hausa
- Bengali-Thai
- Arabic-Russian
- English-Welsh
- English-Azerbaijani
- Bengali-Ukrainian
- Bengali-Korean
- Arabic-Tamil
- Arabic-Korean
- Bengali-Punjabi
- Bengali-Tigrinya
- Arabic-Hindi
- Arabic-Pidgin
- English-French
- English-Oromo
- English-Uzbek
- Bengali-Persian
- English-Serbian_Latin
- Arabic-Igbo
- Arabic-Punjabi
- Bengali-Chinese_Traditional
- English-Kirundi
- Bengali-Pidgin
- English-Korean
- Arabic-Japanese
- English-Yoruba
- English-Burmese
- Bengali-Sinhala
- English-Hindi
- Bengali-Pashto
- English-Amharic
- Arabic-Turkish
- Arabic-Marathi
- Bengali-Igbo
- Bengali-Nepali
- Bengali-Azerbaijani
- Bengali-Serbian_Latin
- English-Arabic
- English-Portuguese
- English-Nepali
- Bengali-Urdu
- English-Sinhala
- Bengali-Amharic
- Arabic-Welsh
数据文件路径
data/bengali-welsh.jsonldata/english-tigrinya.jsonldata/english-persian.jsonldata/english-gujarati.jsonldata/bengali-japanese.jsonldata/french-arabic.jsonldata/oromo-arabic.jsonldata/arabic-vietnamese.jsonldata/bengali-uzbek.jsonldata/arabic-burmese.jsonldata/bengali-spanish.jsonldata/arabic-pashto.jsonldata/arabic-ukrainian.jsonldata/english-pashto.jsonldata/english-russian.jsonldata/arabic-yoruba.jsonldata/arabic-swahili.jsonldata/english-spanish.jsonldata/english-pidgin.jsonldata/bengali-marathi.jsonldata/english-thai.jsonldata/bengali-tamil.jsonldata/arabic-chinese_traditional.jsonldata/english-vietnamese.jsonldata/english-indonesian.jsonldata/english-telugu.jsonldata/english-turkish.jsonldata/bengali-turkish.jsonldata/arabic-kyrgyz.jsonldata/bengali-arabic.jsonldata/amharic-arabic.jsonldata/arabic-chinese_simplified.jsonldata/arabic-tigrinya.jsonldata/english-kyrgyz.jsonldata/arabic-telugu.jsonldata/arabic-somali.jsonldata/bengali-telugu.jsonldata/bengali-chinese_simplified.jsonldata/bengali-serbian_cyrillic.jsonldata/bengali-french.jsonldata/bengali-kirundi.jsonldata/bengali-kyrgyz.jsonldata/arabic-indonesian.jsonldata/bengali-indonesian.jsonldata/arabic-portuguese.jsonldata/arabic-urdu.jsonldata/english-ukrainian.jsonldata/arabic-spanish.jsonldata/arabic-serbian_latin.jsonldata/english-igbo.jsonldata/arabic-uzbek.jsonldata/arabic-gujarati.jsonldata/bengali-oromo.jsonldata/english-japanese.jsonldata/bengali-yoruba.jsonldata/bengali-portuguese.jsonldata/english-marathi.jsonldata/bengali-hausa.jsonldata/arabic-thai.jsonldata/english-tamil.jsonldata/english-serbian_cyrillic.jsonldata/english-swahili.jsonldata/arabic-scottish_gaelic.jsonldata/bengali-burmese.jsonldata/bengali-russian.jsonldata/bengali-vietnamese.jsonldata/bengali-english.jsonldata/english-somali.jsonldata/arabic-hausa.jsonldata/arabic-kirundi.jsonldata/arabic-persian.jsonldata/english-chinese_simplified.jsonldata/bengali-swahili.jsonldata/bengali-somali.jsonldata/english-chinese_traditional.jsonldata/bengali-gujarati.jsonldata/english-scottish_gaelic.jsonldata/arabic-nepali.jsonldata/english-urdu.jsonldata/english-punjabi.jsonldata/arabic-azerbaijani.jsonldata/arabic-serbian_cyrillic.jsonldata/arabic-sinhala.jsonldata/bengali-hindi.jsonldata/english-hausa.jsonldata/bengali-thai.jsonldata/arabic-russian.jsonldata/english-welsh.jsonldata/english-azerbaijani.jsonldata/bengali-ukrainian.jsonldata/bengali-korean.jsonldata/arabic-tamil.jsonldata/arabic-korean.jsonldata/bengali-punjabi.jsonldata/bengali-tigrinya.jsonldata/arabic-hindi.jsonldata/arabic-pidgin.jsonldata/english-french.jsonldata/english-oromo.jsonldata/english-uzbek.jsonldata/bengali-persian.jsonldata/english-serbian_latin.jsonldata/arabic-igbo.jsonldata/arabic-punjabi.jsonldata/bengali-chinese_traditional.jsonldata/english-kirundi.jsonldata/bengali-pidgin.jsonldata/english-korean.jsonldata/arabic-japanese.jsonldata/english-yoruba.jsonldata/english-burmese.jsonldata/bengali-sinhala.jsonldata/english-hindi.jsonldata/bengali-pashto.jsonldata/english-amharic.jsonldata/arabic-turkish.jsonldata/arabic-marathi.jsonldata/bengali-igbo.jsonldata/bengali-nepali.jsonldata/bengali-azerbaijani.jsonldata/bengali-serbian_latin.jsonldata/english-arabic.jsonldata/english-portuguese.jsonldata/english-nepali.jsonldata/bengali-urdu.jsonldata/english-sinhala.jsonldata/bengali-amharic.jsonldata/arabic-welsh.jsonl



