crystina-z/beir-corpus
收藏Hugging Face2025-12-02 更新2026-03-29 收录
下载链接:
https://hf-mirror.com/datasets/crystina-z/beir-corpus
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: arguana
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 9388094
num_examples: 8674
download_size: 5090573
dataset_size: 9388094
- config_name: cqadupstack-android
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 14044469
num_examples: 22998
download_size: 8469185
dataset_size: 14044469
- config_name: cqadupstack-english
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 20194221
num_examples: 40221
download_size: 12765386
dataset_size: 20194221
- config_name: cqadupstack-gaming
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 22932704
num_examples: 45301
download_size: 14582609
dataset_size: 22932704
- config_name: cqadupstack-gis
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 38750755
num_examples: 37637
download_size: 21469906
dataset_size: 38750755
- config_name: cqadupstack-mathematica
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 19568620
num_examples: 16705
download_size: 10936387
dataset_size: 19568620
- config_name: cqadupstack-physics
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 32038422
num_examples: 38316
download_size: 19263056
dataset_size: 32038422
- config_name: cqadupstack-programmers
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 34546412
num_examples: 32176
download_size: 20610369
dataset_size: 34546412
- config_name: cqadupstack-stats
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 45347600
num_examples: 42269
download_size: 26223522
dataset_size: 45347600
- config_name: cqadupstack-tex
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 89853366
num_examples: 68184
download_size: 45628853
dataset_size: 89853366
- config_name: cqadupstack-unix
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 48471433
num_examples: 47382
download_size: 26280626
dataset_size: 48471433
- config_name: cqadupstack-webmasters
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 12609627
num_examples: 17405
download_size: 7528650
dataset_size: 12609627
- config_name: cqadupstack-wordpress
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 55433096
num_examples: 48605
download_size: 28164980
dataset_size: 55433096
- config_name: dbpedia-entity
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 1639231635
num_examples: 4635922
download_size: 1034114659
dataset_size: 1639231635
- config_name: fever
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 3095105800
num_examples: 5416568
download_size: 2015570437
dataset_size: 3095105800
- config_name: fiqa
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 45303212
num_examples: 57638
download_size: 28359752
dataset_size: 45303212
- config_name: hotpotqa
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 1621286119
num_examples: 5233329
download_size: 1025375847
dataset_size: 1621286119
- config_name: nfcorpus
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 5856698
num_examples: 3633
download_size: 3203776
dataset_size: 5856698
- config_name: nq
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 1381417863
num_examples: 2681468
download_size: 787119917
dataset_size: 1381417863
- config_name: quora
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 41829670
num_examples: 522931
download_size: 25319921
dataset_size: 41829670
- config_name: scidocs
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 32262487
num_examples: 25657
download_size: 19040960
dataset_size: 32262487
- config_name: scifact
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 7874970
num_examples: 5183
download_size: 4575513
dataset_size: 7874970
- config_name: trec-covid
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 195185777
num_examples: 171332
download_size: 113062344
dataset_size: 195185777
- config_name: webis-touche2020
features:
- name: docid
dtype: string
- name: title
dtype: string
- name: text
dtype: string
splits:
- name: train
num_bytes: 678068503
num_examples: 382545
download_size: 363323305
dataset_size: 678068503
configs:
- config_name: arguana
data_files:
- split: train
path: arguana/train-*
- config_name: cqadupstack-android
data_files:
- split: train
path: cqadupstack-android/train-*
- config_name: cqadupstack-english
data_files:
- split: train
path: cqadupstack-english/train-*
- config_name: cqadupstack-gaming
data_files:
- split: train
path: cqadupstack-gaming/train-*
- config_name: cqadupstack-gis
data_files:
- split: train
path: cqadupstack-gis/train-*
- config_name: cqadupstack-mathematica
data_files:
- split: train
path: cqadupstack-mathematica/train-*
- config_name: cqadupstack-physics
data_files:
- split: train
path: cqadupstack-physics/train-*
- config_name: cqadupstack-programmers
data_files:
- split: train
path: cqadupstack-programmers/train-*
- config_name: cqadupstack-stats
data_files:
- split: train
path: cqadupstack-stats/train-*
- config_name: cqadupstack-tex
data_files:
- split: train
path: cqadupstack-tex/train-*
- config_name: cqadupstack-unix
data_files:
- split: train
path: cqadupstack-unix/train-*
- config_name: cqadupstack-webmasters
data_files:
- split: train
path: cqadupstack-webmasters/train-*
- config_name: cqadupstack-wordpress
data_files:
- split: train
path: cqadupstack-wordpress/train-*
- config_name: dbpedia-entity
data_files:
- split: train
path: dbpedia-entity/train-*
- config_name: fever
data_files:
- split: train
path: fever/train-*
- config_name: fiqa
data_files:
- split: train
path: fiqa/train-*
- config_name: hotpotqa
data_files:
- split: train
path: hotpotqa/train-*
- config_name: nfcorpus
data_files:
- split: train
path: nfcorpus/train-*
- config_name: nq
data_files:
- split: train
path: nq/train-*
- config_name: quora
data_files:
- split: train
path: quora/train-*
- config_name: scidocs
data_files:
- split: train
path: scidocs/train-*
- config_name: scifact
data_files:
- split: train
path: scifact/train-*
- config_name: trec-covid
data_files:
- split: train
path: trec-covid/train-*
- config_name: webis-touche2020
data_files:
- split: train
path: webis-touche2020/train-*
---
提供机构:
crystina-z



