Xnhyacinth/Image
收藏Hugging Face2023-12-08 更新2024-03-04 收录
下载链接:
https://hf-mirror.com/datasets/Xnhyacinth/Image
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: NQ
features:
- name: id
dtype: int64
- name: question
dtype: string
- name: answers
sequence: string
- name: ctxs
list:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
- name: compressed_ctxs_1
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
- name: compressed_ctxs_5
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
- name: compressed_ctxs_10
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
- name: compressed_ctxs_20
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
- name: compressed_ctxs_50
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
- name: compressed_ctxs_100
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
splits:
- name: train
num_bytes: 6106425228
num_examples: 79168
- name: eval
num_bytes: 675422872
num_examples: 8757
- name: test
num_bytes: 279441134
num_examples: 3610
download_size: 3931027405
dataset_size: 7061289234
- config_name: TQA
features:
- name: id
dtype: int64
- name: question
dtype: string
- name: answers
sequence: string
- name: target
dtype: string
- name: ctxs
list:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
- name: compressed_ctxs_1
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
- name: compressed_ctxs_5
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
- name: compressed_ctxs_10
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
- name: compressed_ctxs_20
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
- name: compressed_ctxs_50
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
- name: compressed_ctxs_100
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
splits:
- name: train
num_bytes: 6116069275
num_examples: 78785
- name: eval
num_bytes: 685921423
num_examples: 8837
- name: test
num_bytes: 878592842
num_examples: 11313
download_size: 4438699237
dataset_size: 7680583540
- config_name: WQ
features:
- name: id
dtype: string
- name: question
dtype: string
- name: answers
sequence: string
- name: ctxs
list:
- name: hasanswer
dtype: bool
- name: id
dtype: string
- name: score
dtype: string
- name: text
dtype: string
- name: title
dtype: string
- name: compressed_ctxs_5
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
- name: compressed_ctxs_10
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
- name: compressed_ctxs_20
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
- name: compressed_ctxs_50
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
- name: compressed_ctxs_100
struct:
- name: compressed_prompt
dtype: string
- name: compressed_tokens
dtype: int64
- name: origin_tokens
dtype: int64
- name: ratio
dtype: string
- name: saving
dtype: string
splits:
- name: train
num_bytes: 268644771
num_examples: 3478
- name: eval
num_bytes: 23143123
num_examples: 300
- name: test
num_bytes: 157146882
num_examples: 2032
download_size: 254281138
dataset_size: 448934776
configs:
- config_name: NQ
data_files:
- split: train
path: NQ/train-*
- split: eval
path: NQ/eval-*
- split: test
path: NQ/test-*
- config_name: TQA
data_files:
- split: train
path: TQA/train-*
- split: eval
path: TQA/eval-*
- split: test
path: TQA/test-*
- config_name: WQ
data_files:
- split: train
path: WQ/train-*
- split: eval
path: WQ/eval-*
- split: test
path: WQ/test-*
---
提供机构:
Xnhyacinth
原始信息汇总
数据集概述
数据集配置
-
NQ
- 特征:
id: 类型int64question: 类型stringanswers: 序列类型stringctxs: 列表类型id: 类型stringtext: 类型stringtitle: 类型string
compressed_ctxs_1: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
compressed_ctxs_5: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
compressed_ctxs_10: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
compressed_ctxs_20: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
compressed_ctxs_50: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
compressed_ctxs_100: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
- 分割:
train: 字节数6106425228, 样本数79168eval: 字节数675422872, 样本数8757test: 字节数279441134, 样本数3610
- 下载大小:
3931027405 - 数据集大小:
7061289234
- 特征:
-
TQA
- 特征:
id: 类型int64question: 类型stringanswers: 序列类型stringtarget: 类型stringctxs: 列表类型id: 类型stringtext: 类型stringtitle: 类型string
compressed_ctxs_1: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
compressed_ctxs_5: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
compressed_ctxs_10: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
compressed_ctxs_20: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
compressed_ctxs_50: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
compressed_ctxs_100: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
- 分割:
train: 字节数6116069275, 样本数78785eval: 字节数685921423, 样本数8837test: 字节数878592842, 样本数11313
- 下载大小:
4438699237 - 数据集大小:
7680583540
- 特征:
-
WQ
- 特征:
id: 类型stringquestion: 类型stringanswers: 序列类型stringctxs: 列表类型hasanswer: 类型boolid: 类型stringscore: 类型stringtext: 类型stringtitle: 类型string
compressed_ctxs_5: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
compressed_ctxs_10: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
compressed_ctxs_20: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
compressed_ctxs_50: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
compressed_ctxs_100: 结构类型compressed_prompt: 类型stringcompressed_tokens: 类型int64origin_tokens: 类型int64ratio: 类型stringsaving: 类型string
- 分割:
train: 字节数268644771, 样本数3478eval: 字节数23143123, 样本数300test: 字节数157146882, 样本数2032
- 下载大小:
254281138 - 数据集大小:
448934776
- 特征:



