MLP-Lemma/Instruct-datasets
收藏Hugging Face2024-05-13 更新2024-06-12 收录
下载链接:
https://hf-mirror.com/datasets/MLP-Lemma/Instruct-datasets
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: BigPatent
features:
- name: context
dtype: string
- name: output
dtype: string
- name: instruction
dtype: string
splits:
- name: train
num_bytes: 1601754641
num_examples: 50000
download_size: 641124435
dataset_size: 1601754641
- config_name: BookSum
features:
- name: output
dtype: string
- name: context
dtype: string
- name: instruction
dtype: string
splits:
- name: train
num_bytes: 254212252
num_examples: 9600
- name: validation
num_bytes: 34236979
num_examples: 1484
- name: test
num_bytes: 37939574
num_examples: 1431
download_size: 168552307
dataset_size: 326388805
- config_name: BoolQ
features:
- name: instruction
dtype: string
- name: context
dtype: string
- name: output
dtype: string
splits:
- name: train
num_bytes: 7114238
num_examples: 9427
- name: validation
num_bytes: 2442220
num_examples: 3270
- name: test
num_bytes: 2427795
num_examples: 3245
download_size: 6580560
dataset_size: 11984253
- config_name: CNN-DM
features:
- name: context
dtype: string
- name: output
dtype: string
- name: instruction
dtype: string
splits:
- name: train
num_bytes: 444284494.2583582
num_examples: 100000
- name: validation
num_bytes: 58380052
num_examples: 13368
- name: test
num_bytes: 50480704
num_examples: 11490
download_size: 333656188
dataset_size: 553145250.2583582
- config_name: CosmosQA
features:
- name: context
dtype: string
- name: instruction
dtype: string
- name: output
dtype: string
splits:
- name: train
num_bytes: 18371124
num_examples: 25262
- name: test
num_bytes: 5500433
num_examples: 6963
- name: validation
num_bytes: 2357043
num_examples: 2985
download_size: 10385528
dataset_size: 26228600
- config_name: DROP
features:
- name: context
dtype: string
- name: instruction
dtype: string
- name: output
dtype: string
splits:
- name: train
num_bytes: 13787339.92248062
num_examples: 10000
- name: validation
num_bytes: 11717286
num_examples: 9535
download_size: 7737038
dataset_size: 25504625.92248062
- config_name: GovReport
features:
- name: context
dtype: string
- name: output
dtype: string
- name: instruction
dtype: string
splits:
- name: train
num_bytes: 954459414
num_examples: 17517
- name: validation
num_bytes: 55883493
num_examples: 973
- name: test
num_bytes: 51654524
num_examples: 973
download_size: 506640732
dataset_size: 1061997431
- config_name: HotpotQA
features:
- name: instruction
dtype: string
- name: output
dtype: string
- name: context
sequence: string
splits:
- name: train
num_bytes: 535762851
num_examples: 90447
- name: validation
num_bytes: 44301953
num_examples: 7405
download_size: 341058415
dataset_size: 580064804
- config_name: LongAlpaca
features:
- name: context
dtype: string
- name: instruction
dtype: string
- name: output
dtype: string
splits:
- name: train
num_bytes: 432722520
num_examples: 8937
download_size: 150560798
dataset_size: 432722520
- config_name: MultiNews
features:
- name: context
dtype: string
- name: output
dtype: string
- name: instruction
dtype: string
splits:
- name: train
num_bytes: 560992073
num_examples: 44972
download_size: 323577415
dataset_size: 560992073
- config_name: MultiRC
features:
- name: context
dtype: string
- name: instruction
dtype: string
- name: output
dtype: string
splits:
- name: train
num_bytes: 20144535.745879676
num_examples: 12025
download_size: 1073538
dataset_size: 20144535.745879676
- config_name: NarrativeQA
features:
- name: context
dtype: string
- name: instruction
dtype: string
- name: output
dtype: string
splits:
- name: train
num_bytes: 11205217155
num_examples: 32747
download_size: 219398608
dataset_size: 11205217155
- config_name: QMsum
features:
- name: context
dtype: string
- name: output
dtype: string
- name: instruction
dtype: string
splits:
- name: train
num_bytes: 64540456
num_examples: 1257
download_size: 4106286
dataset_size: 64540456
- config_name: Qasper
features:
- name: context
dtype: string
- name: instruction
dtype: string
- name: output
dtype: string
splits:
- name: train
num_bytes: 66059761
num_examples: 2567
download_size: 11200886
dataset_size: 66059761
- config_name: Quality
features:
- name: instruction
dtype: string
- name: output
dtype: string
- name: context
dtype: string
splits:
- name: train
num_bytes: 63062210
num_examples: 2523
download_size: 3354424
dataset_size: 63062210
- config_name: ReCoRD
features:
- name: context
dtype: string
- name: instruction
dtype: string
- name: output
dtype: string
splits:
- name: train
num_bytes: 130993405
num_examples: 100730
- name: validation
num_bytes: 12866110
num_examples: 10000
- name: test
num_bytes: 12754476
num_examples: 10000
download_size: 65519961
dataset_size: 156613991
- config_name: SQuAD
features:
- name: context
dtype: string
- name: instruction
dtype: string
- name: output
dtype: string
splits:
- name: train
num_bytes: 80852411
num_examples: 87599
- name: validation
num_bytes: 10506372
num_examples: 10570
download_size: 16200360
dataset_size: 91358783
- config_name: TriviaQA
features:
- name: instruction
dtype: string
- name: context
dtype: string
- name: output
dtype: string
splits:
- name: train
num_bytes: 3299873628
num_examples: 61888
- name: validation
num_bytes: 424762536
num_examples: 7993
- name: test
num_bytes: 405293513
num_examples: 7701
download_size: 2414090463
dataset_size: 4129929677
- config_name: XSum
features:
- name: context
dtype: string
- name: output
dtype: string
- name: instruction
dtype: string
splits:
- name: train
num_bytes: 242713691.097552
num_examples: 100000
- name: validation
num_bytes: 27183291
num_examples: 11332
- name: test
num_bytes: 27646656
num_examples: 11334
download_size: 182239672
dataset_size: 297543638.097552
configs:
- config_name: BigPatent
data_files:
- split: train
path: BigPatent/train-*
- config_name: BookSum
data_files:
- split: train
path: BookSum/train-*
- split: validation
path: BookSum/validation-*
- split: test
path: BookSum/test-*
- config_name: BoolQ
data_files:
- split: train
path: BoolQ/train-*
- split: validation
path: BoolQ/validation-*
- split: test
path: BoolQ/test-*
- config_name: CNN-DM
data_files:
- split: train
path: CNN-DM/train-*
- split: validation
path: CNN-DM/validation-*
- split: test
path: CNN-DM/test-*
- config_name: CosmosQA
data_files:
- split: train
path: CosmosQA/train-*
- split: test
path: CosmosQA/test-*
- split: validation
path: CosmosQA/validation-*
- config_name: DROP
data_files:
- split: train
path: DROP/train-*
- split: validation
path: DROP/validation-*
- config_name: GovReport
data_files:
- split: train
path: GovReport/train-*
- split: validation
path: GovReport/validation-*
- split: test
path: GovReport/test-*
- config_name: HotpotQA
data_files:
- split: train
path: HotpotQA/train-*
- split: validation
path: HotpotQA/validation-*
- config_name: LongAlpaca
data_files:
- split: train
path: LongAlpaca/train-*
- config_name: MultiNews
data_files:
- split: train
path: MultiNews/train-*
- config_name: MultiRC
data_files:
- split: train
path: MultiRC/train-*
- config_name: NarrativeQA
data_files:
- split: train
path: NarrativeQA/train-*
- config_name: QMsum
data_files:
- split: train
path: QMsum/train-*
- config_name: Qasper
data_files:
- split: train
path: Qasper/train-*
- config_name: Quality
data_files:
- split: train
path: Quality/train-*
- config_name: ReCoRD
data_files:
- split: train
path: ReCoRD/train-*
- split: validation
path: ReCoRD/validation-*
- split: test
path: ReCoRD/test-*
- config_name: SQuAD
data_files:
- split: train
path: SQuAD/train-*
- split: validation
path: SQuAD/validation-*
- config_name: TriviaQA
data_files:
- split: train
path: TriviaQA/train-*
- split: validation
path: TriviaQA/validation-*
- split: test
path: TriviaQA/test-*
- config_name: XSum
data_files:
- split: train
path: XSum/train-*
- split: validation
path: XSum/validation-*
- split: test
path: XSum/test-*
---
The provided README content lists multiple datasets, each with specific configurations, features, and splits. Each dataset includes details such as the configuration name, features (context, output, instruction), and the splits (train, validation, test) with corresponding number of bytes and examples. Additionally, the download size and dataset size are specified for each dataset. The data files for each dataset are also listed, specifying the split and path.
提供机构:
MLP-Lemma
原始信息汇总
数据集概述
BigPatent
- 特征:
- context: string
- output: string
- instruction: string
- 分割:
- train: 50000 examples, 1601754641 bytes
- 下载大小: 641124435 bytes
- 数据集大小: 1601754641 bytes
BookSum
- 特征:
- output: string
- context: string
- instruction: string
- 分割:
- train: 9600 examples, 254212252 bytes
- validation: 1484 examples, 34236979 bytes
- test: 1431 examples, 37939574 bytes
- 下载大小: 168552307 bytes
- 数据集大小: 326388805 bytes
BoolQ
- 特征:
- instruction: string
- context: string
- output: string
- 分割:
- train: 9427 examples, 7114238 bytes
- validation: 3270 examples, 2442220 bytes
- test: 3245 examples, 2427795 bytes
- 下载大小: 6580560 bytes
- 数据集大小: 11984253 bytes
CNN-DM
- 特征:
- context: string
- output: string
- instruction: string
- 分割:
- train: 100000 examples, 444284494.2583582 bytes
- validation: 13368 examples, 58380052 bytes
- test: 11490 examples, 50480704 bytes
- 下载大小: 333656188 bytes
- 数据集大小: 553145250.2583582 bytes
CosmosQA
- 特征:
- context: string
- instruction: string
- output: string
- 分割:
- train: 25262 examples, 18371124 bytes
- validation: 2985 examples, 2357043 bytes
- test: 6963 examples, 5500433 bytes
- 下载大小: 10385528 bytes
- 数据集大小: 26228600 bytes
DROP
- 特征:
- context: string
- instruction: string
- output: string
- 分割:
- train: 10000 examples, 13787339.92248062 bytes
- validation: 9535 examples, 11717286 bytes
- 下载大小: 7737038 bytes
- 数据集大小: 25504625.92248062 bytes
GovReport
- 特征:
- context: string
- output: string
- instruction: string
- 分割:
- train: 17517 examples, 954459414 bytes
- validation: 973 examples, 55883493 bytes
- test: 973 examples, 51654524 bytes
- 下载大小: 506640732 bytes
- 数据集大小: 1061997431 bytes
HotpotQA
- 特征:
- instruction: string
- output: string
- context: string
- 分割:
- train: 90447 examples, 535762851 bytes
- validation: 7405 examples, 44301953 bytes
- 下载大小: 341058415 bytes
- 数据集大小: 580064804 bytes
LongAlpaca
- 特征:
- context: string
- instruction: string
- output: string
- 分割:
- train: 8937 examples, 432722520 bytes
- 下载大小: 150560798 bytes
- 数据集大小: 432722520 bytes
MultiNews
- 特征:
- context: string
- output: string
- instruction: string
- 分割:
- train: 44972 examples, 560992073 bytes
- 下载大小: 323577415 bytes
- 数据集大小: 560992073 bytes
MultiRC
- 特征:
- context: string
- instruction: string
- output: string
- 分割:
- train: 12025 examples, 20144535.745879676 bytes
- 下载大小: 1073538 bytes
- 数据集大小: 20144535.745879676 bytes
NarrativeQA
- 特征:
- context: string
- instruction: string
- output: string
- 分割:
- train: 32747 examples, 11205217155 bytes
- 下载大小: 219398608 bytes
- 数据集大小: 11205217155 bytes
QMsum
- 特征:
- context: string
- output: string
- instruction: string
- 分割:
- train: 1257 examples, 64540456 bytes
- 下载大小: 4106286 bytes
- 数据集大小: 64540456 bytes
Qasper
- 特征:
- context: string
- instruction: string
- output: string
- 分割:
- train: 2567 examples, 66059761 bytes
- 下载大小: 11200886 bytes
- 数据集大小: 66059761 bytes
Quality
- 特征:
- instruction: string
- output: string
- context: string
- 分割:
- train: 2523 examples, 63062210 bytes
- 下载大小: 3354424 bytes
- 数据集大小: 63062210 bytes
ReCoRD
- 特征:
- context: string
- instruction: string
- output: string
- 分割:
- train: 100730 examples, 130993405 bytes
- validation: 10000 examples, 12866110 bytes
- test: 10000 examples, 12754476 bytes
- 下载大小: 65519961 bytes
- 数据集大小: 156613991 bytes
SQuAD
- 特征:
- context: string
- instruction: string
- output: string
- 分割:
- train: 87599 examples, 80852411 bytes
- validation: 10570 examples, 10506372 bytes
- 下载大小: 16200360 bytes
- 数据集大小: 91358783 bytes
TriviaQA
- 特征:
- instruction: string
- context: string
- output: string
- 分割:
- train: 61888 examples, 3299873628 bytes
- validation: 7993 examples, 424762536 bytes
- test: 7701 examples, 405293513 bytes
- 下载大小: 2414090463 bytes
- 数据集大小: 4129929677 bytes
XSum
- 特征:
- context: string
- output: string
- instruction: string
- 分割:
- train: 100000 examples, 242713691.097552 bytes
- validation: 11332 examples, 27183291 bytes
- test: 11334 examples, 27646656 bytes
- 下载大小: 182239672 bytes
- 数据集大小: 297543638.097552 bytes



