huy-nh-2000/instructUIE
收藏Hugging Face2024-05-08 更新2024-06-12 收录
下载链接:
https://hf-mirror.com/datasets/huy-nh-2000/instructUIE
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: EE
features:
- name: Task
dtype: string
- name: Dataset
dtype: string
- name: subset
dtype: string
- name: Samples
sequence: 'null'
- name: Instance
struct:
- name: id
dtype: string
- name: sentence
dtype: string
- name: label
dtype: string
- name: instruction
dtype: string
- name: ground_truth
dtype: string
splits:
- name: train
num_bytes: 20189069
num_examples: 25014
download_size: 4399272
dataset_size: 20189069
- config_name: EEA
features:
- name: Task
dtype: string
- name: Dataset
dtype: string
- name: subset
dtype: string
- name: Samples
sequence: 'null'
- name: Instance
struct:
- name: id
dtype: string
- name: sentence
dtype: string
- name: label
dtype: string
- name: instruction
dtype: string
- name: ground_truth
dtype: string
splits:
- name: train
num_bytes: 7021834
num_examples: 9991
- name: validation
num_bytes: 1645543
num_examples: 2076
- name: test
num_bytes: 2207530
num_examples: 2761
download_size: 2765025
dataset_size: 10874907
- config_name: EET
features:
- name: Task
dtype: string
- name: Dataset
dtype: string
- name: subset
dtype: string
- name: Samples
sequence: 'null'
- name: Instance
struct:
- name: id
dtype: string
- name: sentence
dtype: string
- name: label
dtype: string
- name: instruction
dtype: string
- name: ground_truth
dtype: string
splits:
- name: train
num_bytes: 5465589
num_examples: 9991
- name: validation
num_bytes: 1020359
num_examples: 2076
- name: test
num_bytes: 1343998
num_examples: 2761
download_size: 1682269
dataset_size: 7829946
- config_name: EP
features:
- name: Task
dtype: string
- name: Dataset
dtype: string
- name: subset
dtype: string
- name: Samples
sequence: 'null'
- name: Instance
struct:
- name: id
dtype: string
- name: sentence
dtype: string
- name: label
dtype: string
- name: instruction
dtype: string
- name: ground_truth
dtype: string
splits:
- name: train
num_bytes: 34708167
num_examples: 44377
- name: validation
num_bytes: 2607084
num_examples: 3220
download_size: 8806588
dataset_size: 37315251
- config_name: EPR
features:
- name: Task
dtype: string
- name: Dataset
dtype: string
- name: subset
dtype: string
- name: Samples
sequence: 'null'
- name: Instance
struct:
- name: id
dtype: string
- name: sentence
dtype: string
- name: label
dtype: string
- name: instruction
dtype: string
- name: ground_truth
dtype: string
splits:
- name: train
num_bytes: 40193520
num_examples: 44342
- name: validation
num_bytes: 2992986
num_examples: 3206
download_size: 22665880
dataset_size: 43186506
- config_name: ES
features:
- name: Task
dtype: string
- name: Dataset
dtype: string
- name: subset
dtype: string
- name: Samples
sequence: 'null'
- name: Instance
struct:
- name: id
dtype: string
- name: sentence
dtype: string
- name: label
dtype: string
- name: instruction
dtype: string
- name: ground_truth
dtype: string
splits:
- name: train
num_bytes: 106827307
num_examples: 255852
- name: validation
num_bytes: 17352441
num_examples: 38923
download_size: 73239008
dataset_size: 124179748
- config_name: ET
features:
- name: Task
dtype: string
- name: Dataset
dtype: string
- name: subset
dtype: string
- name: Samples
sequence: 'null'
- name: Instance
struct:
- name: id
dtype: string
- name: sentence
dtype: string
- name: label
dtype: string
- name: instruction
dtype: string
- name: ground_truth
dtype: string
splits:
- name: train
num_bytes: 130028891
num_examples: 255763
- name: validation
num_bytes: 21584246
num_examples: 38645
download_size: 91560010
dataset_size: 151613137
- config_name: NER
features:
- name: Task
dtype: string
- name: Dataset
dtype: string
- name: subset
dtype: string
- name: Samples
sequence: 'null'
- name: Instance
struct:
- name: id
dtype: string
- name: sentence
dtype: string
- name: label
dtype: string
- name: instruction
dtype: string
- name: ground_truth
dtype: string
splits:
- name: train
num_bytes: 402710463
num_examples: 862638
- name: validation
num_bytes: 65625327
num_examples: 128623
- name: test
num_bytes: 65778404
num_examples: 128651
download_size: 265187111
dataset_size: 534114194
- config_name: RE
features:
- name: Task
dtype: string
- name: Dataset
dtype: string
- name: subset
dtype: string
- name: Samples
sequence: 'null'
- name: Instance
struct:
- name: id
dtype: string
- name: sentence
dtype: string
- name: label
dtype: string
- name: instruction
dtype: string
- name: ground_truth
dtype: string
splits:
- name: train
num_bytes: 104596538
num_examples: 120950
- name: validation
num_bytes: 9981918
num_examples: 10628
- name: test
num_bytes: 14975259
num_examples: 16911
download_size: 28416114
dataset_size: 129553715
configs:
- config_name: EE
data_files:
- split: train
path: EE/train-*
- config_name: EEA
data_files:
- split: train
path: EEA/train-*
- split: validation
path: EEA/validation-*
- split: test
path: EEA/test-*
- config_name: EET
data_files:
- split: train
path: EET/train-*
- split: validation
path: EET/validation-*
- split: test
path: EET/test-*
- config_name: EP
data_files:
- split: train
path: EP/train-*
- split: validation
path: EP/validation-*
- config_name: EPR
data_files:
- split: train
path: EPR/train-*
- split: validation
path: EPR/validation-*
- config_name: ES
data_files:
- split: train
path: ES/train-*
- split: validation
path: ES/validation-*
- config_name: ET
data_files:
- split: train
path: ET/train-*
- split: validation
path: ET/validation-*
- config_name: NER
data_files:
- split: train
path: NER/train-*
- split: validation
path: NER/validation-*
- split: test
path: NER/test-*
- config_name: RE
data_files:
- split: train
path: RE/train-*
- split: validation
path: RE/validation-*
- split: test
path: RE/test-*
---
The dataset consists of multiple configurations, each targeting different natural language processing tasks such as Event Extraction (EE), Entity Relation Extraction (RE), etc. Each configuration details the task type, dataset name, subset, sample count, and the specific structure of instances including ID, sentence, label, instruction, and ground truth. The dataset is divided into train, validation, and test sets, with corresponding data file paths provided.
提供机构:
huy-nh-2000
原始信息汇总
数据集概述
配置名称:EE
- 特征:
- Task: string
- Dataset: string
- subset: string
- Samples: null
- Instance:
- id: string
- sentence: string
- label: string
- instruction: string
- ground_truth: string
- 分割:
- train:
- 字节数: 20189069
- 样本数: 25014
- train:
- 下载大小:4399272
- 数据集大小:20189069
配置名称:EEA
- 特征:
- Task: string
- Dataset: string
- subset: string
- Samples: null
- Instance:
- id: string
- sentence: string
- label: string
- instruction: string
- ground_truth: string
- 分割:
- train:
- 字节数: 7021834
- 样本数: 9991
- validation:
- 字节数: 1645543
- 样本数: 2076
- test:
- 字节数: 2207530
- 样本数: 2761
- train:
- 下载大小:2765025
- 数据集大小:10874907
配置名称:EET
- 特征:
- Task: string
- Dataset: string
- subset: string
- Samples: null
- Instance:
- id: string
- sentence: string
- label: string
- instruction: string
- ground_truth: string
- 分割:
- train:
- 字节数: 5465589
- 样本数: 9991
- validation:
- 字节数: 1020359
- 样本数: 2076
- test:
- 字节数: 1343998
- 样本数: 2761
- train:
- 下载大小:1682269
- 数据集大小:7829946
配置名称:EP
- 特征:
- Task: string
- Dataset: string
- subset: string
- Samples: null
- Instance:
- id: string
- sentence: string
- label: string
- instruction: string
- ground_truth: string
- 分割:
- train:
- 字节数: 34708167
- 样本数: 44377
- validation:
- 字节数: 2607084
- 样本数: 3220
- train:
- 下载大小:8806588
- 数据集大小:37315251
配置名称:EPR
- 特征:
- Task: string
- Dataset: string
- subset: string
- Samples: null
- Instance:
- id: string
- sentence: string
- label: string
- instruction: string
- ground_truth: string
- 分割:
- train:
- 字节数: 40193520
- 样本数: 44342
- validation:
- 字节数: 2992986
- 样本数: 3206
- train:
- 下载大小:22665880
- 数据集大小:43186506
配置名称:ES
- 特征:
- Task: string
- Dataset: string
- subset: string
- Samples: null
- Instance:
- id: string
- sentence: string
- label: string
- instruction: string
- ground_truth: string
- 分割:
- train:
- 字节数: 106827307
- 样本数: 255852
- validation:
- 字节数: 17352441
- 样本数: 38923
- train:
- 下载大小:73239008
- 数据集大小:124179748
配置名称:ET
- 特征:
- Task: string
- Dataset: string
- subset: string
- Samples: null
- Instance:
- id: string
- sentence: string
- label: string
- instruction: string
- ground_truth: string
- 分割:
- train:
- 字节数: 130028891
- 样本数: 255763
- validation:
- 字节数: 21584246
- 样本数: 38645
- train:
- 下载大小:91560010
- 数据集大小:151613137
配置名称:NER
- 特征:
- Task: string
- Dataset: string
- subset: string
- Samples: null
- Instance:
- id: string
- sentence: string
- label: string
- instruction: string
- ground_truth: string
- 分割:
- train:
- 字节数: 402710463
- 样本数: 862638
- validation:
- 字节数: 65625327
- 样本数: 128623
- test:
- 字节数: 65778404
- 样本数: 128651
- train:
- 下载大小:265187111
- 数据集大小:534114194
配置名称:RE
- 特征:
- Task: string
- Dataset: string
- subset: string
- Samples: null
- Instance:
- id: string
- sentence: string
- label: string
- instruction: string
- ground_truth: string
- 分割:
- train:
- 字节数: 104596538
- 样本数: 120950
- validation:
- 字节数: 9981918
- 样本数: 10628
- test:
- 字节数: 14975259
- 样本数: 16911
- train:
- 下载大小:28416114
- 数据集大小:129553715



