five

abhik1505040/doc-aligned-crossSum-subset

收藏
Hugging Face2024-06-19 更新2024-06-29 收录
下载链接:
https://hf-mirror.com/datasets/abhik1505040/doc-aligned-crossSum-subset
下载链接
链接失效反馈
官方服务:
资源简介:
--- configs: - config_name: bengali-welsh data_files: "data/bengali-welsh.jsonl" - config_name: english-tigrinya data_files: "data/english-tigrinya.jsonl" - config_name: english-persian data_files: "data/english-persian.jsonl" - config_name: english-gujarati data_files: "data/english-gujarati.jsonl" - config_name: bengali-japanese data_files: "data/bengali-japanese.jsonl" - config_name: french-arabic data_files: "data/french-arabic.jsonl" - config_name: oromo-arabic data_files: "data/oromo-arabic.jsonl" - config_name: arabic-vietnamese data_files: "data/arabic-vietnamese.jsonl" - config_name: bengali-uzbek data_files: "data/bengali-uzbek.jsonl" - config_name: arabic-burmese data_files: "data/arabic-burmese.jsonl" - config_name: bengali-spanish data_files: "data/bengali-spanish.jsonl" - config_name: arabic-pashto data_files: "data/arabic-pashto.jsonl" - config_name: arabic-ukrainian data_files: "data/arabic-ukrainian.jsonl" - config_name: english-pashto data_files: "data/english-pashto.jsonl" - config_name: english-russian data_files: "data/english-russian.jsonl" - config_name: arabic-yoruba data_files: "data/arabic-yoruba.jsonl" - config_name: arabic-swahili data_files: "data/arabic-swahili.jsonl" - config_name: english-spanish data_files: "data/english-spanish.jsonl" - config_name: english-pidgin data_files: "data/english-pidgin.jsonl" - config_name: bengali-marathi data_files: "data/bengali-marathi.jsonl" - config_name: english-thai data_files: "data/english-thai.jsonl" - config_name: bengali-tamil data_files: "data/bengali-tamil.jsonl" - config_name: arabic-chinese_traditional data_files: "data/arabic-chinese_traditional.jsonl" - config_name: english-vietnamese data_files: "data/english-vietnamese.jsonl" - config_name: english-indonesian data_files: "data/english-indonesian.jsonl" - config_name: english-telugu data_files: "data/english-telugu.jsonl" - config_name: english-turkish data_files: "data/english-turkish.jsonl" - config_name: bengali-turkish data_files: "data/bengali-turkish.jsonl" - config_name: arabic-kyrgyz data_files: "data/arabic-kyrgyz.jsonl" - config_name: bengali-arabic data_files: "data/bengali-arabic.jsonl" - config_name: amharic-arabic data_files: "data/amharic-arabic.jsonl" - config_name: arabic-chinese_simplified data_files: "data/arabic-chinese_simplified.jsonl" - config_name: arabic-tigrinya data_files: "data/arabic-tigrinya.jsonl" - config_name: english-kyrgyz data_files: "data/english-kyrgyz.jsonl" - config_name: arabic-telugu data_files: "data/arabic-telugu.jsonl" - config_name: arabic-somali data_files: "data/arabic-somali.jsonl" - config_name: bengali-telugu data_files: "data/bengali-telugu.jsonl" - config_name: bengali-chinese_simplified data_files: "data/bengali-chinese_simplified.jsonl" - config_name: bengali-serbian_cyrillic data_files: "data/bengali-serbian_cyrillic.jsonl" - config_name: bengali-french data_files: "data/bengali-french.jsonl" - config_name: bengali-kirundi data_files: "data/bengali-kirundi.jsonl" - config_name: bengali-kyrgyz data_files: "data/bengali-kyrgyz.jsonl" - config_name: arabic-indonesian data_files: "data/arabic-indonesian.jsonl" - config_name: bengali-indonesian data_files: "data/bengali-indonesian.jsonl" - config_name: arabic-portuguese data_files: "data/arabic-portuguese.jsonl" - config_name: arabic-urdu data_files: "data/arabic-urdu.jsonl" - config_name: english-ukrainian data_files: "data/english-ukrainian.jsonl" - config_name: arabic-spanish data_files: "data/arabic-spanish.jsonl" - config_name: arabic-serbian_latin data_files: "data/arabic-serbian_latin.jsonl" - config_name: english-igbo data_files: "data/english-igbo.jsonl" - config_name: arabic-uzbek data_files: "data/arabic-uzbek.jsonl" - config_name: arabic-gujarati data_files: "data/arabic-gujarati.jsonl" - config_name: bengali-oromo data_files: "data/bengali-oromo.jsonl" - config_name: english-japanese data_files: "data/english-japanese.jsonl" - config_name: bengali-yoruba data_files: "data/bengali-yoruba.jsonl" - config_name: bengali-portuguese data_files: "data/bengali-portuguese.jsonl" - config_name: english-marathi data_files: "data/english-marathi.jsonl" - config_name: bengali-hausa data_files: "data/bengali-hausa.jsonl" - config_name: arabic-thai data_files: "data/arabic-thai.jsonl" - config_name: english-tamil data_files: "data/english-tamil.jsonl" - config_name: english-serbian_cyrillic data_files: "data/english-serbian_cyrillic.jsonl" - config_name: english-swahili data_files: "data/english-swahili.jsonl" - config_name: arabic-scottish_gaelic data_files: "data/arabic-scottish_gaelic.jsonl" - config_name: bengali-burmese data_files: "data/bengali-burmese.jsonl" - config_name: bengali-russian data_files: "data/bengali-russian.jsonl" - config_name: bengali-vietnamese data_files: "data/bengali-vietnamese.jsonl" - config_name: bengali-english data_files: "data/bengali-english.jsonl" - config_name: english-somali data_files: "data/english-somali.jsonl" - config_name: arabic-hausa data_files: "data/arabic-hausa.jsonl" - config_name: arabic-kirundi data_files: "data/arabic-kirundi.jsonl" - config_name: arabic-persian data_files: "data/arabic-persian.jsonl" - config_name: english-chinese_simplified data_files: "data/english-chinese_simplified.jsonl" - config_name: bengali-swahili data_files: "data/bengali-swahili.jsonl" - config_name: bengali-somali data_files: "data/bengali-somali.jsonl" - config_name: english-chinese_traditional data_files: "data/english-chinese_traditional.jsonl" - config_name: bengali-gujarati data_files: "data/bengali-gujarati.jsonl" - config_name: english-scottish_gaelic data_files: "data/english-scottish_gaelic.jsonl" - config_name: arabic-nepali data_files: "data/arabic-nepali.jsonl" - config_name: english-urdu data_files: "data/english-urdu.jsonl" - config_name: english-punjabi data_files: "data/english-punjabi.jsonl" - config_name: arabic-azerbaijani data_files: "data/arabic-azerbaijani.jsonl" - config_name: arabic-serbian_cyrillic data_files: "data/arabic-serbian_cyrillic.jsonl" - config_name: arabic-sinhala data_files: "data/arabic-sinhala.jsonl" - config_name: bengali-hindi data_files: "data/bengali-hindi.jsonl" - config_name: english-hausa data_files: "data/english-hausa.jsonl" - config_name: bengali-thai data_files: "data/bengali-thai.jsonl" - config_name: arabic-russian data_files: "data/arabic-russian.jsonl" - config_name: english-welsh data_files: "data/english-welsh.jsonl" - config_name: english-azerbaijani data_files: "data/english-azerbaijani.jsonl" - config_name: bengali-ukrainian data_files: "data/bengali-ukrainian.jsonl" - config_name: bengali-korean data_files: "data/bengali-korean.jsonl" - config_name: arabic-tamil data_files: "data/arabic-tamil.jsonl" - config_name: arabic-korean data_files: "data/arabic-korean.jsonl" - config_name: bengali-punjabi data_files: "data/bengali-punjabi.jsonl" - config_name: bengali-tigrinya data_files: "data/bengali-tigrinya.jsonl" - config_name: arabic-hindi data_files: "data/arabic-hindi.jsonl" - config_name: arabic-pidgin data_files: "data/arabic-pidgin.jsonl" - config_name: english-french data_files: "data/english-french.jsonl" - config_name: english-oromo data_files: "data/english-oromo.jsonl" - config_name: english-uzbek data_files: "data/english-uzbek.jsonl" - config_name: bengali-persian data_files: "data/bengali-persian.jsonl" - config_name: english-serbian_latin data_files: "data/english-serbian_latin.jsonl" - config_name: arabic-igbo data_files: "data/arabic-igbo.jsonl" - config_name: arabic-punjabi data_files: "data/arabic-punjabi.jsonl" - config_name: bengali-chinese_traditional data_files: "data/bengali-chinese_traditional.jsonl" - config_name: english-kirundi data_files: "data/english-kirundi.jsonl" - config_name: bengali-pidgin data_files: "data/bengali-pidgin.jsonl" - config_name: english-korean data_files: "data/english-korean.jsonl" - config_name: arabic-japanese data_files: "data/arabic-japanese.jsonl" - config_name: english-yoruba data_files: "data/english-yoruba.jsonl" - config_name: english-burmese data_files: "data/english-burmese.jsonl" - config_name: bengali-sinhala data_files: "data/bengali-sinhala.jsonl" - config_name: english-hindi data_files: "data/english-hindi.jsonl" - config_name: bengali-pashto data_files: "data/bengali-pashto.jsonl" - config_name: english-amharic data_files: "data/english-amharic.jsonl" - config_name: arabic-turkish data_files: "data/arabic-turkish.jsonl" - config_name: arabic-marathi data_files: "data/arabic-marathi.jsonl" - config_name: bengali-igbo data_files: "data/bengali-igbo.jsonl" - config_name: bengali-nepali data_files: "data/bengali-nepali.jsonl" - config_name: bengali-azerbaijani data_files: "data/bengali-azerbaijani.jsonl" - config_name: bengali-serbian_latin data_files: "data/bengali-serbian_latin.jsonl" - config_name: english-arabic data_files: "data/english-arabic.jsonl" - config_name: english-portuguese data_files: "data/english-portuguese.jsonl" - config_name: english-nepali data_files: "data/english-nepali.jsonl" - config_name: bengali-urdu data_files: "data/bengali-urdu.jsonl" - config_name: english-sinhala data_files: "data/english-sinhala.jsonl" - config_name: bengali-amharic data_files: "data/bengali-amharic.jsonl" - config_name: arabic-welsh data_files: "data/arabic-welsh.jsonl" ---
提供机构:
abhik1505040
原始信息汇总

数据集概述

数据集配置

语言对

  • Bengali-Welsh
  • English-Tigrinya
  • English-Persian
  • English-Gujarati
  • Bengali-Japanese
  • French-Arabic
  • Oromo-Arabic
  • Arabic-Vietnamese
  • Bengali-Uzbek
  • Arabic-Burmese
  • Bengali-Spanish
  • Arabic-Pashto
  • Arabic-Ukrainian
  • English-Pashto
  • English-Russian
  • Arabic-Yoruba
  • Arabic-Swahili
  • English-Spanish
  • English-Pidgin
  • Bengali-Marathi
  • English-Thai
  • Bengali-Tamil
  • Arabic-Chinese_Traditional
  • English-Vietnamese
  • English-Indonesian
  • English-Telugu
  • English-Turkish
  • Bengali-Turkish
  • Arabic-Kyrgyz
  • Bengali-Arabic
  • Amharic-Arabic
  • Arabic-Chinese_Simplified
  • Arabic-Tigrinya
  • English-Kyrgyz
  • Arabic-Telugu
  • Arabic-Somali
  • Bengali-Telugu
  • Bengali-Chinese_Simplified
  • Bengali-Serbian_Cyrillic
  • Bengali-French
  • Bengali-Kirundi
  • Bengali-Kyrgyz
  • Arabic-Indonesian
  • Bengali-Indonesian
  • Arabic-Portuguese
  • Arabic-Urdu
  • English-Ukrainian
  • Arabic-Spanish
  • Arabic-Serbian_Latin
  • English-Igbo
  • Arabic-Uzbek
  • Arabic-Gujarati
  • Bengali-Oromo
  • English-Japanese
  • Bengali-Yoruba
  • Bengali-Portuguese
  • English-Marathi
  • Bengali-Hausa
  • Arabic-Thai
  • English-Tamil
  • English-Serbian_Cyrillic
  • English-Swahili
  • Arabic-Scottish_Gaelic
  • Bengali-Burmese
  • Bengali-Russian
  • Bengali-Vietnamese
  • Bengali-English
  • English-Somali
  • Arabic-Hausa
  • Arabic-Kirundi
  • Arabic-Persian
  • English-Chinese_Simplified
  • Bengali-Swahili
  • Bengali-Somali
  • English-Chinese_Traditional
  • Bengali-Gujarati
  • English-Scottish_Gaelic
  • Arabic-Nepali
  • English-Urdu
  • English-Punjabi
  • Arabic-Azerbaijani
  • Arabic-Serbian_Cyrillic
  • Arabic-Sinhala
  • Bengali-Hindi
  • English-Hausa
  • Bengali-Thai
  • Arabic-Russian
  • English-Welsh
  • English-Azerbaijani
  • Bengali-Ukrainian
  • Bengali-Korean
  • Arabic-Tamil
  • Arabic-Korean
  • Bengali-Punjabi
  • Bengali-Tigrinya
  • Arabic-Hindi
  • Arabic-Pidgin
  • English-French
  • English-Oromo
  • English-Uzbek
  • Bengali-Persian
  • English-Serbian_Latin
  • Arabic-Igbo
  • Arabic-Punjabi
  • Bengali-Chinese_Traditional
  • English-Kirundi
  • Bengali-Pidgin
  • English-Korean
  • Arabic-Japanese
  • English-Yoruba
  • English-Burmese
  • Bengali-Sinhala
  • English-Hindi
  • Bengali-Pashto
  • English-Amharic
  • Arabic-Turkish
  • Arabic-Marathi
  • Bengali-Igbo
  • Bengali-Nepali
  • Bengali-Azerbaijani
  • Bengali-Serbian_Latin
  • English-Arabic
  • English-Portuguese
  • English-Nepali
  • Bengali-Urdu
  • English-Sinhala
  • Bengali-Amharic
  • Arabic-Welsh

数据文件路径

  • data/bengali-welsh.jsonl
  • data/english-tigrinya.jsonl
  • data/english-persian.jsonl
  • data/english-gujarati.jsonl
  • data/bengali-japanese.jsonl
  • data/french-arabic.jsonl
  • data/oromo-arabic.jsonl
  • data/arabic-vietnamese.jsonl
  • data/bengali-uzbek.jsonl
  • data/arabic-burmese.jsonl
  • data/bengali-spanish.jsonl
  • data/arabic-pashto.jsonl
  • data/arabic-ukrainian.jsonl
  • data/english-pashto.jsonl
  • data/english-russian.jsonl
  • data/arabic-yoruba.jsonl
  • data/arabic-swahili.jsonl
  • data/english-spanish.jsonl
  • data/english-pidgin.jsonl
  • data/bengali-marathi.jsonl
  • data/english-thai.jsonl
  • data/bengali-tamil.jsonl
  • data/arabic-chinese_traditional.jsonl
  • data/english-vietnamese.jsonl
  • data/english-indonesian.jsonl
  • data/english-telugu.jsonl
  • data/english-turkish.jsonl
  • data/bengali-turkish.jsonl
  • data/arabic-kyrgyz.jsonl
  • data/bengali-arabic.jsonl
  • data/amharic-arabic.jsonl
  • data/arabic-chinese_simplified.jsonl
  • data/arabic-tigrinya.jsonl
  • data/english-kyrgyz.jsonl
  • data/arabic-telugu.jsonl
  • data/arabic-somali.jsonl
  • data/bengali-telugu.jsonl
  • data/bengali-chinese_simplified.jsonl
  • data/bengali-serbian_cyrillic.jsonl
  • data/bengali-french.jsonl
  • data/bengali-kirundi.jsonl
  • data/bengali-kyrgyz.jsonl
  • data/arabic-indonesian.jsonl
  • data/bengali-indonesian.jsonl
  • data/arabic-portuguese.jsonl
  • data/arabic-urdu.jsonl
  • data/english-ukrainian.jsonl
  • data/arabic-spanish.jsonl
  • data/arabic-serbian_latin.jsonl
  • data/english-igbo.jsonl
  • data/arabic-uzbek.jsonl
  • data/arabic-gujarati.jsonl
  • data/bengali-oromo.jsonl
  • data/english-japanese.jsonl
  • data/bengali-yoruba.jsonl
  • data/bengali-portuguese.jsonl
  • data/english-marathi.jsonl
  • data/bengali-hausa.jsonl
  • data/arabic-thai.jsonl
  • data/english-tamil.jsonl
  • data/english-serbian_cyrillic.jsonl
  • data/english-swahili.jsonl
  • data/arabic-scottish_gaelic.jsonl
  • data/bengali-burmese.jsonl
  • data/bengali-russian.jsonl
  • data/bengali-vietnamese.jsonl
  • data/bengali-english.jsonl
  • data/english-somali.jsonl
  • data/arabic-hausa.jsonl
  • data/arabic-kirundi.jsonl
  • data/arabic-persian.jsonl
  • data/english-chinese_simplified.jsonl
  • data/bengali-swahili.jsonl
  • data/bengali-somali.jsonl
  • data/english-chinese_traditional.jsonl
  • data/bengali-gujarati.jsonl
  • data/english-scottish_gaelic.jsonl
  • data/arabic-nepali.jsonl
  • data/english-urdu.jsonl
  • data/english-punjabi.jsonl
  • data/arabic-azerbaijani.jsonl
  • data/arabic-serbian_cyrillic.jsonl
  • data/arabic-sinhala.jsonl
  • data/bengali-hindi.jsonl
  • data/english-hausa.jsonl
  • data/bengali-thai.jsonl
  • data/arabic-russian.jsonl
  • data/english-welsh.jsonl
  • data/english-azerbaijani.jsonl
  • data/bengali-ukrainian.jsonl
  • data/bengali-korean.jsonl
  • data/arabic-tamil.jsonl
  • data/arabic-korean.jsonl
  • data/bengali-punjabi.jsonl
  • data/bengali-tigrinya.jsonl
  • data/arabic-hindi.jsonl
  • data/arabic-pidgin.jsonl
  • data/english-french.jsonl
  • data/english-oromo.jsonl
  • data/english-uzbek.jsonl
  • data/bengali-persian.jsonl
  • data/english-serbian_latin.jsonl
  • data/arabic-igbo.jsonl
  • data/arabic-punjabi.jsonl
  • data/bengali-chinese_traditional.jsonl
  • data/english-kirundi.jsonl
  • data/bengali-pidgin.jsonl
  • data/english-korean.jsonl
  • data/arabic-japanese.jsonl
  • data/english-yoruba.jsonl
  • data/english-burmese.jsonl
  • data/bengali-sinhala.jsonl
  • data/english-hindi.jsonl
  • data/bengali-pashto.jsonl
  • data/english-amharic.jsonl
  • data/arabic-turkish.jsonl
  • data/arabic-marathi.jsonl
  • data/bengali-igbo.jsonl
  • data/bengali-nepali.jsonl
  • data/bengali-azerbaijani.jsonl
  • data/bengali-serbian_latin.jsonl
  • data/english-arabic.jsonl
  • data/english-portuguese.jsonl
  • data/english-nepali.jsonl
  • data/bengali-urdu.jsonl
  • data/english-sinhala.jsonl
  • data/bengali-amharic.jsonl
  • data/arabic-welsh.jsonl
5,000+
优质数据集
54 个
任务类型
进入经典数据集
二维码
社区交流群

面向社区/商业的数据集话题

二维码
科研交流群

面向高校/科研机构的开源数据集话题

数据驱动未来

携手共赢发展

商业合作