five

neurlang/low-quality-multilingual-sentences

收藏
Hugging Face2026-04-09 更新2026-04-12 收录
下载链接:
https://hf-mirror.com/datasets/neurlang/low-quality-multilingual-sentences
下载链接
链接失效反馈
官方服务:
资源简介:
--- configs: - config_name: af data_files: - path: - af.jsonl.zst split: train - config_name: am data_files: - path: - am.jsonl.zst split: train - config_name: an data_files: - path: - an.jsonl.zst split: train - config_name: as data_files: - path: - as.jsonl.zst split: train - config_name: ba data_files: - path: - ba.jsonl.zst split: train - config_name: be data_files: - path: - be.jsonl.zst split: train - config_name: bpy data_files: - path: - bpy.jsonl.zst split: train - config_name: bs data_files: - path: - bs.jsonl.zst split: train - config_name: ce data_files: - path: - ce.jsonl.zst split: train - config_name: ceb data_files: - path: - ceb.jsonl.zst split: train - config_name: chr data_files: - path: - chr.jsonl.zst split: train - config_name: cv data_files: - path: - cv.jsonl.zst split: train - config_name: dz data_files: - path: - dz.jsonl.zst split: train - config_name: eo data_files: - path: - eo.jsonl.zst split: train - config_name: eu data_files: - path: - eu.jsonl.zst split: train - config_name: gl data_files: - path: - gl.jsonl.zst split: train - config_name: gn data_files: - path: - gn.jsonl.zst split: train - config_name: gu data_files: - path: - gu.jsonl.zst split: train - config_name: ha data_files: - path: - ha.jsonl.zst split: train - config_name: haw data_files: - path: - haw.jsonl.zst split: train - config_name: he_nikud data_files: - path: - he_nikud.jsonl.zst split: train - config_name: hr data_files: - path: - hr.jsonl.zst split: train - config_name: ht data_files: - path: - ht.jsonl.zst split: train - config_name: ia data_files: - path: - ia.jsonl.zst split: train - config_name: io data_files: - path: - io.jsonl.zst split: train - config_name: kl data_files: - path: - kl.jsonl.zst split: train - config_name: ms_arab data_files: - path: - ms_arab.jsonl.zst split: train - config_name: my data_files: - path: - my.jsonl.zst split: train - config_name: ny data_files: - path: - ny.jsonl.zst split: train - config_name: tts data_files: - path: - tts.jsonl.zst split: train - config_name: yue data_files: - path: - yue.jsonl.zst split: train task_categories: - text-generation - text-classification - text-retrieval size_categories: - n<1K license: cc-by-4.0 --- # Low Quality Multilingual Sentences - This dataset is a complement to agentlans/high-quality-multilingual-sentences to extend it to more languages. - The new sentences in this dataset are low quality, proceed with caution.
提供机构:
neurlang
5,000+
优质数据集
54 个
任务类型
进入经典数据集
二维码
社区交流群

面向社区/商业的数据集话题

二维码
科研交流群

面向高校/科研机构的开源数据集话题

数据驱动未来

携手共赢发展

商业合作