five

ufal/npfl147

收藏
Hugging Face2025-11-11 更新2026-01-03 收录
下载链接:
https://hf-mirror.com/datasets/ufal/npfl147
下载链接
链接失效反馈
官方服务:
资源简介:
--- license: cc-by-sa-4.0 configs: - config_name: cs data_files: - split: train path: cs/train-* - config_name: de data_files: - split: train path: de/train-* - config_name: en data_files: - split: train path: en/train-* - config_name: eu data_files: - split: train path: eu/train-* - config_name: ga data_files: - split: train path: ga/train-* - config_name: 'no' data_files: - split: train path: no/train-* - config_name: sk data_files: - split: train path: sk/train-* - config_name: tr data_files: - split: train path: tr/train-* dataset_info: - config_name: cs features: - name: text dtype: string - name: id dtype: string - name: wikiname dtype: string - name: page_id dtype: int64 - name: title dtype: string - name: url dtype: string - name: date_modified dtype: string - name: in_language dtype: string - name: wikidata_id dtype: string - name: bytes_html dtype: int64 - name: wikitext dtype: string - name: version dtype: int64 - name: infoboxes dtype: string - name: has_math dtype: bool splits: - name: train num_bytes: 11694725 num_examples: 1000 download_size: 6282043 dataset_size: 11694725 - config_name: de features: - name: text dtype: string - name: id dtype: string - name: wikiname dtype: string - name: page_id dtype: int64 - name: title dtype: string - name: url dtype: string - name: date_modified dtype: string - name: in_language dtype: string - name: wikidata_id dtype: string - name: bytes_html dtype: int64 - name: wikitext dtype: string - name: version dtype: int64 - name: infoboxes dtype: string - name: has_math dtype: bool splits: - name: train num_bytes: 14360755 num_examples: 1000 download_size: 7258099 dataset_size: 14360755 - config_name: en features: - name: text dtype: string - name: id dtype: string - name: wikiname dtype: string - name: page_id dtype: int64 - name: title dtype: string - name: url dtype: string - name: date_modified dtype: string - name: in_language dtype: string - name: wikidata_id dtype: string - name: bytes_html dtype: int64 - name: wikitext dtype: string - name: version dtype: int64 - name: infoboxes dtype: string - name: has_math dtype: bool splits: - name: train num_bytes: 16891985 num_examples: 1000 download_size: 8072990 dataset_size: 16891985 - config_name: eu features: - name: text dtype: string - name: id dtype: string - name: wikiname dtype: string - name: page_id dtype: int64 - name: title dtype: string - name: url dtype: string - name: date_modified dtype: string - name: in_language dtype: string - name: wikidata_id dtype: string - name: bytes_html dtype: int64 - name: wikitext dtype: string - name: version dtype: int64 - name: infoboxes dtype: string - name: has_math dtype: bool splits: - name: train num_bytes: 5444095 num_examples: 1000 download_size: 2799626 dataset_size: 5444095 - config_name: ga features: - name: text dtype: string - name: id dtype: string - name: wikiname dtype: string - name: page_id dtype: int64 - name: title dtype: string - name: url dtype: string - name: date_modified dtype: string - name: in_language dtype: string - name: wikidata_id dtype: string - name: bytes_html dtype: int64 - name: wikitext dtype: string - name: version dtype: int64 - name: infoboxes dtype: string - name: has_math dtype: bool splits: - name: train num_bytes: 4156261 num_examples: 1000 download_size: 1988378 dataset_size: 4156261 - config_name: 'no' features: - name: text dtype: string - name: id dtype: string - name: wikiname dtype: string - name: page_id dtype: int64 - name: title dtype: string - name: url dtype: string - name: date_modified dtype: string - name: in_language dtype: string - name: wikidata_id dtype: string - name: bytes_html dtype: int64 - name: wikitext dtype: string - name: version dtype: int64 - name: infoboxes dtype: string - name: has_math dtype: bool splits: - name: train num_bytes: 6189359 num_examples: 1000 download_size: 2934307 dataset_size: 6189359 - config_name: sk features: - name: text dtype: string - name: id dtype: string - name: wikiname dtype: string - name: page_id dtype: int64 - name: title dtype: string - name: url dtype: string - name: date_modified dtype: string - name: in_language dtype: string - name: wikidata_id dtype: string - name: bytes_html dtype: int64 - name: wikitext dtype: string - name: version dtype: int64 - name: infoboxes dtype: string - name: has_math dtype: bool splits: - name: train num_bytes: 7505596 num_examples: 1000 download_size: 4015604 dataset_size: 7505596 - config_name: tr features: - name: text dtype: string - name: id dtype: string - name: wikiname dtype: string - name: page_id dtype: int64 - name: title dtype: string - name: url dtype: string - name: date_modified dtype: string - name: in_language dtype: string - name: wikidata_id dtype: string - name: bytes_html dtype: int64 - name: wikitext dtype: string - name: version dtype: int64 - name: infoboxes dtype: string - name: has_math dtype: bool splits: - name: train num_bytes: 8693739 num_examples: 1000 download_size: 3841243 dataset_size: 8693739 ---
提供机构:
ufal
5,000+
优质数据集
54 个
任务类型
进入经典数据集
二维码
社区交流群

面向社区/商业的数据集话题

二维码
科研交流群

面向高校/科研机构的开源数据集话题

数据驱动未来

携手共赢发展

商业合作