five

crystina-z/beir-corpus

收藏
Hugging Face2025-12-02 更新2026-03-29 收录
下载链接:
https://hf-mirror.com/datasets/crystina-z/beir-corpus
下载链接
链接失效反馈
官方服务:
资源简介:
--- dataset_info: - config_name: arguana features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 9388094 num_examples: 8674 download_size: 5090573 dataset_size: 9388094 - config_name: cqadupstack-android features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 14044469 num_examples: 22998 download_size: 8469185 dataset_size: 14044469 - config_name: cqadupstack-english features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 20194221 num_examples: 40221 download_size: 12765386 dataset_size: 20194221 - config_name: cqadupstack-gaming features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 22932704 num_examples: 45301 download_size: 14582609 dataset_size: 22932704 - config_name: cqadupstack-gis features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 38750755 num_examples: 37637 download_size: 21469906 dataset_size: 38750755 - config_name: cqadupstack-mathematica features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 19568620 num_examples: 16705 download_size: 10936387 dataset_size: 19568620 - config_name: cqadupstack-physics features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 32038422 num_examples: 38316 download_size: 19263056 dataset_size: 32038422 - config_name: cqadupstack-programmers features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 34546412 num_examples: 32176 download_size: 20610369 dataset_size: 34546412 - config_name: cqadupstack-stats features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 45347600 num_examples: 42269 download_size: 26223522 dataset_size: 45347600 - config_name: cqadupstack-tex features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 89853366 num_examples: 68184 download_size: 45628853 dataset_size: 89853366 - config_name: cqadupstack-unix features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 48471433 num_examples: 47382 download_size: 26280626 dataset_size: 48471433 - config_name: cqadupstack-webmasters features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 12609627 num_examples: 17405 download_size: 7528650 dataset_size: 12609627 - config_name: cqadupstack-wordpress features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 55433096 num_examples: 48605 download_size: 28164980 dataset_size: 55433096 - config_name: dbpedia-entity features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 1639231635 num_examples: 4635922 download_size: 1034114659 dataset_size: 1639231635 - config_name: fever features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 3095105800 num_examples: 5416568 download_size: 2015570437 dataset_size: 3095105800 - config_name: fiqa features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 45303212 num_examples: 57638 download_size: 28359752 dataset_size: 45303212 - config_name: hotpotqa features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 1621286119 num_examples: 5233329 download_size: 1025375847 dataset_size: 1621286119 - config_name: nfcorpus features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 5856698 num_examples: 3633 download_size: 3203776 dataset_size: 5856698 - config_name: nq features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 1381417863 num_examples: 2681468 download_size: 787119917 dataset_size: 1381417863 - config_name: quora features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 41829670 num_examples: 522931 download_size: 25319921 dataset_size: 41829670 - config_name: scidocs features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 32262487 num_examples: 25657 download_size: 19040960 dataset_size: 32262487 - config_name: scifact features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 7874970 num_examples: 5183 download_size: 4575513 dataset_size: 7874970 - config_name: trec-covid features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 195185777 num_examples: 171332 download_size: 113062344 dataset_size: 195185777 - config_name: webis-touche2020 features: - name: docid dtype: string - name: title dtype: string - name: text dtype: string splits: - name: train num_bytes: 678068503 num_examples: 382545 download_size: 363323305 dataset_size: 678068503 configs: - config_name: arguana data_files: - split: train path: arguana/train-* - config_name: cqadupstack-android data_files: - split: train path: cqadupstack-android/train-* - config_name: cqadupstack-english data_files: - split: train path: cqadupstack-english/train-* - config_name: cqadupstack-gaming data_files: - split: train path: cqadupstack-gaming/train-* - config_name: cqadupstack-gis data_files: - split: train path: cqadupstack-gis/train-* - config_name: cqadupstack-mathematica data_files: - split: train path: cqadupstack-mathematica/train-* - config_name: cqadupstack-physics data_files: - split: train path: cqadupstack-physics/train-* - config_name: cqadupstack-programmers data_files: - split: train path: cqadupstack-programmers/train-* - config_name: cqadupstack-stats data_files: - split: train path: cqadupstack-stats/train-* - config_name: cqadupstack-tex data_files: - split: train path: cqadupstack-tex/train-* - config_name: cqadupstack-unix data_files: - split: train path: cqadupstack-unix/train-* - config_name: cqadupstack-webmasters data_files: - split: train path: cqadupstack-webmasters/train-* - config_name: cqadupstack-wordpress data_files: - split: train path: cqadupstack-wordpress/train-* - config_name: dbpedia-entity data_files: - split: train path: dbpedia-entity/train-* - config_name: fever data_files: - split: train path: fever/train-* - config_name: fiqa data_files: - split: train path: fiqa/train-* - config_name: hotpotqa data_files: - split: train path: hotpotqa/train-* - config_name: nfcorpus data_files: - split: train path: nfcorpus/train-* - config_name: nq data_files: - split: train path: nq/train-* - config_name: quora data_files: - split: train path: quora/train-* - config_name: scidocs data_files: - split: train path: scidocs/train-* - config_name: scifact data_files: - split: train path: scifact/train-* - config_name: trec-covid data_files: - split: train path: trec-covid/train-* - config_name: webis-touche2020 data_files: - split: train path: webis-touche2020/train-* ---
提供机构:
crystina-z
5,000+
优质数据集
54 个
任务类型
进入经典数据集
二维码
社区交流群

面向社区/商业的数据集话题

二维码
科研交流群

面向高校/科研机构的开源数据集话题

数据驱动未来

携手共赢发展

商业合作