allenai/scirepeval_test
收藏Hugging Face2023-10-19 更新2024-03-04 收录
下载链接:
https://hf-mirror.com/datasets/allenai/scirepeval_test
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: fos
features:
- name: paper_id
dtype: string
- name: label
sequence: int32
splits:
- name: test
num_bytes: 51276
num_examples: 472
- name: train
num_bytes: 5873604
num_examples: 54131
download_size: 3194762
dataset_size: 5924880
- config_name: mesh_descriptors
features:
- name: paper_id
dtype: string
- name: label
dtype: int32
splits:
- name: test
num_bytes: 820660
num_examples: 51738
- name: train
num_bytes: 3283053
num_examples: 206949
download_size: 3203144
dataset_size: 4103713
- config_name: cite_count
features:
- name: paper_id
dtype: string
- name: label
dtype: float64
splits:
- name: test
num_bytes: 121260
num_examples: 6012
- name: train
num_bytes: 483822
num_examples: 24000
download_size: 477603
dataset_size: 605082
- config_name: pub_year
features:
- name: paper_id
dtype: string
- name: label
dtype: float64
splits:
- name: test
num_bytes: 123284
num_examples: 6000
- name: train
num_bytes: 493073
num_examples: 24000
download_size: 518506
dataset_size: 616357
- config_name: high_influence_cite
features:
- name: query_id
dtype: string
- name: cand_id
dtype: string
- name: score
dtype: uint8
splits:
- name: test
num_bytes: 1439013
num_examples: 58255
download_size: 3477938
dataset_size: 1439013
- config_name: same_author
features:
- name: query_id
dtype: string
- name: cand_id
dtype: string
- name: score
dtype: uint8
splits:
- name: test
num_bytes: 3144107
num_examples: 123430
download_size: 7464157
dataset_size: 3144107
- config_name: search
features:
- name: query_id
dtype: string
- name: cand_id
dtype: string
- name: score
dtype: uint8
splits:
- name: test
num_bytes: 1283980
num_examples: 25850
download_size: 2188731
dataset_size: 1283980
- config_name: drsm
features:
- name: paper_id
dtype: string
- name: label
dtype: int32
splits:
- name: test
num_bytes: 15277
num_examples: 955
- name: train
num_bytes: 119083
num_examples: 7520
download_size: 100492
dataset_size: 134360
- config_name: feeds_1
features:
- name: query_id
dtype: string
- name: cand_id
dtype: string
- name: score
dtype: uint8
splits:
- name: test
num_bytes: 110997
num_examples: 4223
download_size: 258802
dataset_size: 110997
- config_name: feeds_m
features:
- name: query_id
dtype: string
- name: cand_id
dtype: string
- name: score
dtype: uint8
splits:
- name: test
num_bytes: 2321483
num_examples: 87528
download_size: 5384963
dataset_size: 2321483
- config_name: feeds_title
features:
- name: query_id
dtype: string
- name: cand_id
dtype: string
- name: score
dtype: uint8
splits:
- name: test
num_bytes: 210605
num_examples: 4233
download_size: 358760
dataset_size: 210605
- config_name: peer_review_score
features:
- name: paper_id
dtype: string
- name: label
dtype: float64
splits:
- name: test
num_bytes: 89892
num_examples: 2043
- name: train
num_bytes: 359348
num_examples: 8167
download_size: 408432
dataset_size: 449240
- config_name: hIndex
features:
- name: paper_id
dtype: string
- name: label
dtype: float64
splits:
- name: test
num_bytes: 94864
num_examples: 2156
- name: train
num_bytes: 382756
num_examples: 8699
download_size: 434232
dataset_size: 477620
- config_name: trec_covid
features:
- name: query_id
dtype: string
- name: cand_id
dtype: string
- name: score
dtype: int8
splits:
- name: test
num_bytes: 3396582
num_examples: 69318
download_size: 5822714
dataset_size: 3396582
- config_name: tweet_mentions
features:
- name: paper_id
dtype: string
- name: label
dtype: float64
splits:
- name: test
num_bytes: 111212
num_examples: 5132
- name: train
num_bytes: 444784
num_examples: 20523
download_size: 454231
dataset_size: 555996
- config_name: scidocs_mag
features:
- name: paper_id
dtype: string
- name: label
dtype: int32
splits:
- name: test
num_bytes: 180048
num_examples: 3751
- name: train
num_bytes: 840048
num_examples: 17501
download_size: 923863
dataset_size: 1020096
- config_name: scidocs_mesh
features:
- name: paper_id
dtype: string
- name: label
dtype: int32
splits:
- name: test
num_bytes: 169488
num_examples: 3531
- name: train
num_bytes: 790944
num_examples: 16478
download_size: 862299
dataset_size: 960432
- config_name: scidocs_view
features:
- name: query_id
dtype: string
- name: cand_id
dtype: string
- name: score
dtype: uint8
splits:
- name: test
num_bytes: 2668042
num_examples: 29978
download_size: 3717272
dataset_size: 2668042
- config_name: scidocs_cite
features:
- name: query_id
dtype: string
- name: cand_id
dtype: string
- name: score
dtype: uint8
splits:
- name: test
num_bytes: 2663592
num_examples: 29928
download_size: 3711072
dataset_size: 2663592
- config_name: scidocs_cocite
features:
- name: query_id
dtype: string
- name: cand_id
dtype: string
- name: score
dtype: uint8
splits:
- name: test
num_bytes: 2665461
num_examples: 29949
download_size: 3713676
dataset_size: 2665461
- config_name: scidocs_read
features:
- name: query_id
dtype: string
- name: cand_id
dtype: string
- name: score
dtype: uint8
splits:
- name: test
num_bytes: 2667953
num_examples: 29977
download_size: 3717148
dataset_size: 2667953
- config_name: reviewers
features:
- name: r_id
dtype: string
- name: papers
sequence: string
splits:
- name: metadata
num_bytes: 3564977
num_examples: 668
download_size: 3576339
dataset_size: 3564977
- config_name: paper_reviewer_matching
features:
- name: query_id
dtype: string
- name: cand_id
dtype: string
- name: score
dtype: uint8
splits:
- name: test_hard
num_bytes: 50603
num_examples: 1729
- name: test_soft
num_bytes: 50603
num_examples: 1729
download_size: 222236
dataset_size: 101206
- config_name: biomimicry
features:
- name: paper_id
dtype: string
- name: label
dtype: int32
splits:
- name: test
num_bytes: 44513
num_examples: 2748
- name: train
num_bytes: 133570
num_examples: 8243
download_size: 134151
dataset_size: 178083
- config_name: relish
features:
- name: query_id
dtype: string
- name: cand_id
dtype: string
- name: score
dtype: uint8
splits:
- name: test
num_bytes: 4779565
num_examples: 191245
download_size: 11473140
dataset_size: 4779565
- config_name: nfcorpus
features:
- name: query_id
dtype: string
- name: cand_id
dtype: string
- name: score
dtype: uint8
splits:
- name: test
num_bytes: 1188859
num_examples: 44634
download_size: 2751049
dataset_size: 1188859
---
The dataset includes multiple configurations, each with different features and splits. Main features include paper_id, query_id, cand_id, and label, with data types ranging from string, int32, float64 to uint8. Each configuration has train and test splits, providing the number of bytes and examples.
提供机构:
allenai
原始信息汇总
数据集概述
数据集配置
fos
- 特征:
paper_id: 字符串label: 整数序列
- 分割:
test: 51276 字节, 472 样本train: 5873604 字节, 54131 样本
- 下载大小: 3194762 字节
- 数据集大小: 5924880 字节
mesh_descriptors
- 特征:
paper_id: 字符串label: 整数
- 分割:
test: 820660 字节, 51738 样本train: 3283053 字节, 206949 样本
- 下载大小: 3203144 字节
- 数据集大小: 4103713 字节
cite_count
- 特征:
paper_id: 字符串label: 浮点数
- 分割:
test: 121260 字节, 6012 样本train: 483822 字节, 24000 样本
- 下载大小: 477603 字节
- 数据集大小: 605082 字节
pub_year
- 特征:
paper_id: 字符串label: 浮点数
- 分割:
test: 123284 字节, 6000 样本train: 493073 字节, 24000 样本
- 下载大小: 518506 字节
- 数据集大小: 616357 字节
high_influence_cite
- 特征:
query_id: 字符串cand_id: 字符串score: 无符号整数
- 分割:
test: 1439013 字节, 58255 样本
- 下载大小: 3477938 字节
- 数据集大小: 1439013 字节
same_author
- 特征:
query_id: 字符串cand_id: 字符串score: 无符号整数
- 分割:
test: 3144107 字节, 123430 样本
- 下载大小: 7464157 字节
- 数据集大小: 3144107 字节
search
- 特征:
query_id: 字符串cand_id: 字符串score: 无符号整数
- 分割:
test: 1283980 字节, 25850 样本
- 下载大小: 2188731 字节
- 数据集大小: 1283980 字节
drsm
- 特征:
paper_id: 字符串label: 整数
- 分割:
test: 15277 字节, 955 样本train: 119083 字节, 7520 样本
- 下载大小: 100492 字节
- 数据集大小: 134360 字节
feeds_1
- 特征:
query_id: 字符串cand_id: 字符串score: 无符号整数
- 分割:
test: 110997 字节, 4223 样本
- 下载大小: 258802 字节
- 数据集大小: 110997 字节
feeds_m
- 特征:
query_id: 字符串cand_id: 字符串score: 无符号整数
- 分割:
test: 2321483 字节, 87528 样本
- 下载大小: 5384963 字节
- 数据集大小: 2321483 字节
feeds_title
- 特征:
query_id: 字符串cand_id: 字符串score: 无符号整数
- 分割:
test: 210605 字节, 4233 样本
- 下载大小: 358760 字节
- 数据集大小: 210605 字节
peer_review_score
- 特征:
paper_id: 字符串label: 浮点数
- 分割:
test: 89892 字节, 2043 样本train: 359348 字节, 8167 样本
- 下载大小: 408432 字节
- 数据集大小: 449240 字节
hIndex
- 特征:
paper_id: 字符串label: 浮点数
- 分割:
test: 94864 字节, 2156 样本train: 382756 字节, 8699 样本
- 下载大小: 434232 字节
- 数据集大小: 477620 字节
trec_covid
- 特征:
query_id: 字符串cand_id: 字符串score: 整数
- 分割:
test: 3396582 字节, 69318 样本
- 下载大小: 5822714 字节
- 数据集大小: 3396582 字节
tweet_mentions
- 特征:
paper_id: 字符串label: 浮点数
- 分割:
test: 111212 字节, 5132 样本train: 444784 字节, 20523 样本
- 下载大小: 454231 字节
- 数据集大小: 555996 字节
scidocs_mag
- 特征:
paper_id: 字符串label: 整数
- 分割:
test: 180048 字节, 3751 样本train: 840048 字节, 17501 样本
- 下载大小: 923863 字节
- 数据集大小: 1020096 字节
scidocs_mesh
- 特征:
paper_id: 字符串label: 整数
- 分割:
test: 169488 字节, 3531 样本train: 790944 字节, 16478 样本
- 下载大小: 862299 字节
- 数据集大小: 960432 字节
scidocs_view
- 特征:
query_id: 字符串cand_id: 字符串score: 无符号整数
- 分割:
test: 2668042 字节, 29978 样本
- 下载大小: 3717272 字节
- 数据集大小: 2668042 字节
scidocs_cite
- 特征:
query_id: 字符串cand_id: 字符串score: 无符号整数
- 分割:
test: 2663592 字节, 29928 样本
- 下载大小: 3711072 字节
- 数据集大小: 2663592 字节
scidocs_cocite
- 特征:
query_id: 字符串cand_id: 字符串score: 无符号整数
- 分割:
test: 2665461 字节, 29949 样本
- 下载大小: 3713676 字节
- 数据集大小: 2665461 字节
scidocs_read
- 特征:
query_id: 字符串cand_id: 字符串score: 无符号整数
- 分割:
test: 2667953 字节, 29977 样本
- 下载大小: 3717148 字节
- 数据集大小: 2667953 字节
reviewers
- 特征:
r_id: 字符串papers: 字符串序列
- 分割:
metadata: 3564977 字节, 668 样本
- 下载大小: 3576339 字节
- 数据集大小: 3564977 字节
paper_reviewer_matching
- 特征:
query_id: 字符串cand_id: 字符串score: 无符号整数
- 分割:
test_hard: 50603 字节, 1729 样本test_soft: 50603 字节, 1729 样本
- 下载大小: 222236 字节
- 数据集大小: 101206 字节
biomimicry
- 特征:
paper_id: 字符串label: 整数
- 分割:
test: 44513 字节, 2748 样本train: 133570 字节, 8243 样本
- 下载大小: 134151 字节
- 数据集大小: 178083 字节
relish
- 特征:
query_id: 字符串cand_id: 字符串score: 无符号整数
- 分割:
test: 4779565 字节, 191245 样本
- 下载大小: 11473140 字节
- 数据集大小: 4779565 字节
nfcorpus
- 特征:
query_id: 字符串cand_id: 字符串score: 无符号整数
- 分割:
test: 1188859 字节, 44634 样本
- 下载大小: 2751049 字节
- 数据集大小: 1188859 字节



