toilaluan/sanitize_vl
收藏Hugging Face2026-03-18 更新2026-03-29 收录
下载链接:
https://hf-mirror.com/datasets/toilaluan/sanitize_vl
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: train-ocr
features:
- name: images
list: image
- name: texts
list:
- name: user
dtype: string
- name: assistant
dtype: string
- name: source
dtype: string
- name: visual_dependency_ratings
list: int64
- name: visual_dependency_min
dtype: int64
- name: formatting_ratings
list: int64
- name: formatting_min
dtype: int64
- name: image_correspondence_ratings
list: int64
- name: image_correspondence_min
dtype: int64
- name: relevance_ratings
list: int64
- name: relevance_min
dtype: int64
splits:
- name: train
num_bytes: 7861047853
num_examples: 16041
download_size: 7864497675
dataset_size: 7861047853
- config_name: train-qa
features:
- name: images
list: image
- name: texts
list:
- name: user
dtype: string
- name: assistant
dtype: string
- name: source
dtype: string
- name: relevance_ratings
list: int64
- name: relevance_min
dtype: int64
- name: visual_dependency_ratings
list: int64
- name: visual_dependency_min
dtype: int64
- name: image_correspondence_ratings
list: int64
- name: image_correspondence_min
dtype: int64
- name: formatting_ratings
list: int64
- name: formatting_min
dtype: int64
splits:
- name: train
num_bytes: 2376717390
num_examples: 46159
download_size: 2392726650
dataset_size: 2376717390
- config_name: val-ocr
features:
- name: images
list: image
- name: texts
list:
- name: user
dtype: string
- name: assistant
dtype: string
- name: source
dtype: string
- name: visual_dependency_ratings
list: int64
- name: visual_dependency_min
dtype: int64
- name: formatting_ratings
list: int64
- name: formatting_min
dtype: int64
- name: image_correspondence_ratings
list: int64
- name: image_correspondence_min
dtype: int64
- name: relevance_ratings
list: int64
- name: relevance_min
dtype: int64
splits:
- name: train
num_bytes: 62727125
num_examples: 128
download_size: 65401902
dataset_size: 62727125
- config_name: val-qa
features:
- name: images
list: image
- name: texts
list:
- name: user
dtype: string
- name: assistant
dtype: string
- name: source
dtype: string
- name: relevance_ratings
list: int64
- name: relevance_min
dtype: int64
- name: visual_dependency_ratings
list: int64
- name: visual_dependency_min
dtype: int64
- name: image_correspondence_ratings
list: int64
- name: image_correspondence_min
dtype: int64
- name: formatting_ratings
list: int64
- name: formatting_min
dtype: int64
splits:
- name: train
num_bytes: 6590536
num_examples: 128
download_size: 6742838
dataset_size: 6590536
configs:
- config_name: train-ocr
data_files:
- split: train
path: train-ocr/train-*
- config_name: train-qa
data_files:
- split: train
path: train-qa/train-*
- config_name: val-ocr
data_files:
- split: train
path: val-ocr/train-*
- config_name: val-qa
data_files:
- split: train
path: val-qa/train-*
---
提供机构:
toilaluan



