MLP-Lemma/Evaluation-dataset
收藏Hugging Face2024-05-15 更新2024-06-12 收录
下载链接:
https://hf-mirror.com/datasets/MLP-Lemma/Evaluation-dataset
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: BookSum
features:
- name: output
dtype: string
- name: context
dtype: string
- name: instruction
dtype: string
- name: input_ids
sequence: int32
- name: input_sentences_ids
sequence:
sequence: int64
- name: inst_ids
sequence:
sequence: int64
splits:
- name: train
num_bytes: 17267998
num_examples: 288
download_size: 5970410
dataset_size: 17267998
- config_name: BoolQ
features:
- name: instruction
dtype: string
- name: context
dtype: string
- name: output
dtype: string
- name: input_ids
sequence: int32
- name: input_sentences_ids
sequence:
sequence: int64
- name: inst_ids
sequence:
sequence: int64
splits:
- name: train
num_bytes: 2563572
num_examples: 1000
download_size: 816586
dataset_size: 2563572
- config_name: CNN-DM
features:
- name: context
dtype: string
- name: output
dtype: string
- name: instruction
dtype: string
- name: input_ids
sequence: int32
- name: input_sentences_ids
sequence:
sequence: int64
- name: inst_ids
sequence:
sequence: int64
splits:
- name: train
num_bytes: 3645754
num_examples: 300
download_size: 1397132
dataset_size: 3645754
- config_name: CosmosQA
features:
- name: context
dtype: string
- name: instruction
dtype: string
- name: output
dtype: string
- name: input_ids
sequence: int32
- name: input_sentences_ids
sequence:
sequence: int64
- name: inst_ids
sequence:
sequence: int64
splits:
- name: train
num_bytes: 879818
num_examples: 300
download_size: 273118
dataset_size: 879818
- config_name: DROP
features:
- name: context
dtype: string
- name: instruction
dtype: string
- name: output
dtype: string
- name: input_ids
sequence: int32
- name: input_sentences_ids
sequence:
sequence: int64
- name: inst_ids
sequence:
sequence: int64
splits:
- name: train
num_bytes: 2016740
num_examples: 500
download_size: 518170
dataset_size: 2016740
- config_name: GovReport
features:
- name: context
dtype: string
- name: output
dtype: string
- name: instruction
dtype: string
- name: input_ids
sequence: int32
- name: input_sentences_ids
sequence:
sequence: int64
- name: inst_ids
sequence:
sequence: int64
splits:
- name: train
num_bytes: 127666196
num_examples: 923
download_size: 39664285
dataset_size: 127666196
- config_name: HotpotQA
features:
- name: instruction
dtype: string
- name: output
dtype: string
- name: context
dtype: string
- name: input_ids
sequence: int32
- name: input_sentences_ids
sequence:
sequence: int64
- name: inst_ids
sequence:
sequence: int64
splits:
- name: train
num_bytes: 17712002
num_examples: 997
download_size: 6058864
dataset_size: 17712002
- config_name: Infbench-choice
features:
- name: id
dtype: int64
- name: context
dtype: string
- name: instruction
dtype: string
- name: output
sequence: string
- name: input_ids
sequence: int32
- name: input_sentences_ids
sequence:
sequence: int64
- name: inst_ids
sequence:
sequence: int64
splits:
- name: train
num_bytes: 195101301
num_examples: 119
download_size: 38794741
dataset_size: 195101301
- config_name: Infbench-qa
features:
- name: id
dtype: int64
- name: context
dtype: string
- name: instruction
dtype: string
- name: output
sequence: string
- name: input_ids
sequence: int32
- name: input_sentences_ids
sequence:
sequence: int64
- name: inst_ids
sequence:
sequence: int64
splits:
- name: train
num_bytes: 327190419
num_examples: 190
download_size: 59896490
dataset_size: 327190419
- config_name: Infbench-sum
features:
- name: id
dtype: int64
- name: context
dtype: string
- name: instruction
dtype: string
- name: output
sequence: string
- name: input_ids
sequence: int32
- name: input_sentences_ids
sequence:
sequence: int64
- name: inst_ids
sequence:
sequence: int64
splits:
- name: train
num_bytes: 96526309
num_examples: 62
download_size: 34773252
dataset_size: 96526309
- config_name: ReCoRD
features:
- name: context
dtype: string
- name: instruction
dtype: string
- name: output
dtype: string
- name: input_ids
sequence: int32
- name: input_sentences_ids
sequence:
sequence: int64
- name: inst_ids
sequence:
sequence: int64
splits:
- name: train
num_bytes: 2032246
num_examples: 500
download_size: 729920
dataset_size: 2032246
- config_name: SQuAD
features:
- name: context
dtype: string
- name: instruction
dtype: string
- name: output
dtype: string
- name: input_ids
sequence: int32
- name: input_sentences_ids
sequence:
sequence: int64
- name: inst_ids
sequence:
sequence: int64
splits:
- name: train
num_bytes: 3067944
num_examples: 998
download_size: 993185
dataset_size: 3067944
- config_name: TriviaQA
features:
- name: instruction
dtype: string
- name: context
dtype: string
- name: output
dtype: string
- name: input_ids
sequence: int32
- name: input_sentences_ids
sequence:
sequence: int64
- name: inst_ids
sequence:
sequence: int64
splits:
- name: train
num_bytes: 121417901
num_examples: 848
download_size: 41629964
dataset_size: 121417901
- config_name: XSum
features:
- name: context
dtype: string
- name: output
dtype: string
- name: instruction
dtype: string
- name: input_ids
sequence: int32
- name: input_sentences_ids
sequence:
sequence: int64
- name: inst_ids
sequence:
sequence: int64
splits:
- name: train
num_bytes: 2264314
num_examples: 299
download_size: 857299
dataset_size: 2264314
configs:
- config_name: BookSum
data_files:
- split: train
path: BookSum/train-*
- config_name: BoolQ
data_files:
- split: train
path: BoolQ/train-*
- config_name: CNN-DM
data_files:
- split: train
path: CNN-DM/train-*
- config_name: CosmosQA
data_files:
- split: train
path: CosmosQA/train-*
- config_name: DROP
data_files:
- split: train
path: DROP/train-*
- config_name: GovReport
data_files:
- split: train
path: GovReport/train-*
- config_name: HotpotQA
data_files:
- split: train
path: HotpotQA/train-*
- config_name: Infbench-choice
data_files:
- split: train
path: Infbench-choice/train-*
- config_name: Infbench-qa
data_files:
- split: train
path: Infbench-qa/train-*
- config_name: Infbench-sum
data_files:
- split: train
path: Infbench-sum/train-*
- config_name: ReCoRD
data_files:
- split: train
path: ReCoRD/train-*
- config_name: SQuAD
data_files:
- split: train
path: SQuAD/train-*
- config_name: TriviaQA
data_files:
- split: train
path: TriviaQA/train-*
- config_name: XSum
data_files:
- split: train
path: XSum/train-*
---
The dataset includes multiple configurations, each with specific features and splits. Features include string-type context, instruction, and output, as well as integer sequence-type input IDs, input sentence IDs, and instruction IDs. Each configuration has a training split, along with data size and number of examples provided.
提供机构:
MLP-Lemma
原始信息汇总
数据集概述
BookSum
- 特征:
- output: 字符串
- context: 字符串
- instruction: 字符串
- input_ids: 整数序列(int32)
- input_sentences_ids: 整数序列(int64)
- inst_ids: 整数序列(int64)
- 训练集:
- 大小: 17267998字节
- 示例数: 288
- 下载大小: 5970410字节
BoolQ
- 特征:
- instruction: 字符串
- context: 字符串
- output: 字符串
- input_ids: 整数序列(int32)
- input_sentences_ids: 整数序列(int64)
- inst_ids: 整数序列(int64)
- 训练集:
- 大小: 2563572字节
- 示例数: 1000
- 下载大小: 816586字节
CNN-DM
- 特征:
- context: 字符串
- output: 字符串
- instruction: 字符串
- input_ids: 整数序列(int32)
- input_sentences_ids: 整数序列(int64)
- inst_ids: 整数序列(int64)
- 训练集:
- 大小: 3645754字节
- 示例数: 300
- 下载大小: 1397132字节
CosmosQA
- 特征:
- context: 字符串
- instruction: 字符串
- output: 字符串
- input_ids: 整数序列(int32)
- input_sentences_ids: 整数序列(int64)
- inst_ids: 整数序列(int64)
- 训练集:
- 大小: 879818字节
- 示例数: 300
- 下载大小: 273118字节
DROP
- 特征:
- context: 字符串
- instruction: 字符串
- output: 字符串
- input_ids: 整数序列(int32)
- input_sentences_ids: 整数序列(int64)
- inst_ids: 整数序列(int64)
- 训练集:
- 大小: 2016740字节
- 示例数: 500
- 下载大小: 518170字节
GovReport
- 特征:
- context: 字符串
- output: 字符串
- instruction: 字符串
- input_ids: 整数序列(int32)
- input_sentences_ids: 整数序列(int64)
- inst_ids: 整数序列(int64)
- 训练集:
- 大小: 127666196字节
- 示例数: 923
- 下载大小: 39664285字节
HotpotQA
- 特征:
- instruction: 字符串
- output: 字符串
- context: 字符串
- input_ids: 整数序列(int32)
- input_sentences_ids: 整数序列(int64)
- inst_ids: 整数序列(int64)
- 训练集:
- 大小: 17712002字节
- 示例数: 997
- 下载大小: 6058864字节
Infbench-choice
- 特征:
- id: 整数(int64)
- context: 字符串
- instruction: 字符串
- output: 字符串序列
- input_ids: 整数序列(int32)
- input_sentences_ids: 整数序列(int64)
- inst_ids: 整数序列(int64)
- 训练集:
- 大小: 195101301字节
- 示例数: 119
- 下载大小: 38794741字节
Infbench-qa
- 特征:
- id: 整数(int64)
- context: 字符串
- instruction: 字符串
- output: 字符串序列
- input_ids: 整数序列(int32)
- input_sentences_ids: 整数序列(int64)
- inst_ids: 整数序列(int64)
- 训练集:
- 大小: 327190419字节
- 示例数: 190
- 下载大小: 59896490字节
Infbench-sum
- 特征:
- id: 整数(int64)
- context: 字符串
- instruction: 字符串
- output: 字符串序列
- input_ids: 整数序列(int32)
- input_sentences_ids: 整数序列(int64)
- inst_ids: 整数序列(int64)
- 训练集:
- 大小: 96526309字节
- 示例数: 62
- 下载大小: 34773252字节
ReCoRD
- 特征:
- context: 字符串
- instruction: 字符串
- output: 字符串
- input_ids: 整数序列(int32)
- input_sentences_ids: 整数序列(int64)
- inst_ids: 整数序列(int64)
- 训练集:
- 大小: 2032246字节
- 示例数: 500
- 下载大小: 729920字节
SQuAD
- 特征:
- context: 字符串
- instruction: 字符串
- output: 字符串
- input_ids: 整数序列(int32)
- input_sentences_ids: 整数序列(int64)
- inst_ids: 整数序列(int64)
- 训练集:
- 大小: 3067944字节
- 示例数: 998
- 下载大小: 993185字节
TriviaQA
- 特征:
- instruction: 字符串
- context: 字符串
- output: 字符串
- input_ids: 整数序列(int32)
- input_sentences_ids: 整数序列(int64)
- inst_ids: 整数序列(int64)
- 训练集:
- 大小: 121417901字节
- 示例数: 848
- 下载大小: 41629964字节
XSum
- 特征:
- context: 字符串
- output: 字符串
- instruction: 字符串
- input_ids: 整数序列(int32)
- input_sentences_ids: 整数序列(int64)
- inst_ids: 整数序列(int64)
- 训练集:
- 大小: 2264314字节
- 示例数: 299
- 下载大小: 857299字节



