Moenupa/Vision-Flan-191-1k
收藏Hugging Face2026-04-15 更新2026-04-26 收录
下载链接:
https://hf-mirror.com/datasets/Moenupa/Vision-Flan-191-1k
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: 300w+human_portrait_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 2140527041
num_examples: 600
download_size: 2140342754
dataset_size: 2140527041
- config_name: A-OKVQA+answer_rationales_matching
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 167256468
num_examples: 1000
download_size: 167139150
dataset_size: 167256468
- config_name: A-OKVQA+rationales_generation
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 168723202
num_examples: 1000
download_size: 168526755
dataset_size: 168723202
- config_name: A-OKVQA+visual_question_answering
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 168570401
num_examples: 1000
download_size: 168486846
dataset_size: 168570401
- config_name: AI2D+visual_question_answering
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 218699854
num_examples: 1000
download_size: 218508988
dataset_size: 218699854
- config_name: AID+aerial_scene_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 268750581
num_examples: 1000
download_size: 268668745
dataset_size: 268750581
- config_name: CHART2TEXT+chart_caption
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 126300035
num_examples: 1000
download_size: 125939575
dataset_size: 126300035
- config_name: CLEVR+question_answer
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 190012271
num_examples: 1000
download_size: 189904999
dataset_size: 190012271
- config_name: CLEVR_CoGenT+Multiple_Question_Answering
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 192156151
num_examples: 1000
download_size: 191093245
dataset_size: 192156151
- config_name: CLEVR_CoGenT+Question_Answer_Matching
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 191501595
num_examples: 1000
download_size: 190475382
dataset_size: 191501595
- config_name: CLEVR_CoGenT+Question_Answering
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 190599780
num_examples: 1000
download_size: 190281922
dataset_size: 190599780
- config_name: CLEVR_CoGenT+VQA_context
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 191129398
num_examples: 1000
download_size: 190403427
dataset_size: 191129398
- config_name: CONCADIA+image_caption_context_1
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 16444826
num_examples: 1000
download_size: 16039874
dataset_size: 16444826
- config_name: CONCADIA+image_caption_context_2
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 16605352
num_examples: 1000
download_size: 16167722
dataset_size: 16605352
- config_name: CONCADIA+image_description
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 14945703
num_examples: 1000
download_size: 14851561
dataset_size: 14945703
- config_name: CUB-200-2011+Bird_Classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 98059117
num_examples: 1000
download_size: 97961698
dataset_size: 98059117
- config_name: Caltech-256+image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 40049835
num_examples: 1000
download_size: 39697993
dataset_size: 40049835
- config_name: Caltech101+Image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 15757226
num_examples: 1000
download_size: 15343607
dataset_size: 15757226
- config_name: Caltech101+Living_Thing_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 16243889
num_examples: 1000
download_size: 15754483
dataset_size: 16243889
- config_name: Cars+car_brand_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 61862134
num_examples: 1000
download_size: 61476229
dataset_size: 61862134
- config_name: Cars+car_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 64980837
num_examples: 1000
download_size: 64481154
dataset_size: 64980837
- config_name: Clevr+Multiple_Question_Answering
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 191946525
num_examples: 1000
download_size: 190881022
dataset_size: 191946525
- config_name: Clevr+Question_Answer_Matching
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 191271744
num_examples: 1000
download_size: 190371881
dataset_size: 191271744
- config_name: Clevr+Question_Answering
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 190621536
num_examples: 1000
download_size: 190302158
dataset_size: 190621536
- config_name: Clevr+VQA_context
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 191284570
num_examples: 1000
download_size: 190565601
dataset_size: 191284570
- config_name: CoVA+webpage_recognition
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 511112611
num_examples: 1000
download_size: 511044465
dataset_size: 511112611
- config_name: ConceptualCaptions+image_captioning
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 89718015
num_examples: 1000
download_size: 89336668
dataset_size: 89718015
- config_name: Core50+Object_detection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 162176459
num_examples: 1000
download_size: 161888941
dataset_size: 162176459
- config_name: DAQUAR+object_question_answer
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 298180676
num_examples: 1000
download_size: 298085580
dataset_size: 298180676
- config_name: DOCVQA+question_answer
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 695700817
num_examples: 1000
download_size: 695665056
dataset_size: 695700817
- config_name: DOMAIN_NET+clipart_image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 28317929
num_examples: 1000
download_size: 28151698
dataset_size: 28317929
- config_name: DOMAIN_NET+image_category_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 32428012
num_examples: 1000
download_size: 32132428
dataset_size: 32428012
- config_name: DOMAIN_NET+infograph_image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 90911231
num_examples: 1000
download_size: 90703288
dataset_size: 90911231
- config_name: DOMAIN_NET+painting_image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 50314907
num_examples: 1000
download_size: 50223295
dataset_size: 50314907
- config_name: DOMAIN_NET+quickdraw_image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 2992102
num_examples: 1000
download_size: 2869778
dataset_size: 2992102
- config_name: DOMAIN_NET+real_image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 36006608
num_examples: 1000
download_size: 35953236
dataset_size: 36006608
- config_name: DTD+all_texture_detection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 118665334
num_examples: 1000
download_size: 118458759
dataset_size: 118665334
- config_name: DTD+coarse_grained_texture_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 108515096
num_examples: 1000
download_size: 108325917
dataset_size: 108515096
- config_name: DVQA+charts_question_answer_1
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 22484393
num_examples: 1000
download_size: 22339818
dataset_size: 22484393
- config_name: DVQA+charts_question_answer_2
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 21860607
num_examples: 1000
download_size: 21721436
dataset_size: 21860607
- config_name: Dark-Zurich+time_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 1981788216
num_examples: 1000
download_size: 1981760079
dataset_size: 1981788216
- config_name: DeepFashion_highres_Attribute_and_Category+Cloth_Classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 280608052
num_examples: 1000
download_size: 280509567
dataset_size: 280608052
- config_name: DeepWeeds+weed_species_recognition
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 28290802
num_examples: 1000
download_size: 28154358
dataset_size: 28290802
- config_name: ExDark+object_recognition
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 155199853
num_examples: 1000
download_size: 155082815
dataset_size: 155199853
- config_name: FFHQ-Text+text-to-face_generation
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 1051358216
num_examples: 760
download_size: 1050852239
dataset_size: 1051358216
- config_name: FGVC_Aircraft+Aircraft_Classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 278918556
num_examples: 1000
download_size: 278161976
dataset_size: 278918556
- config_name: FGVC_Aircraft+Aircraft_Classification_Family
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 279238830
num_examples: 1000
download_size: 278998902
dataset_size: 279238830
- config_name: FGVC_Aircraft+Aircraft_Classification_Manufacturer
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 281163612
num_examples: 1000
download_size: 280919172
dataset_size: 281163612
- config_name: FGVC_Aircraft+Aircraft_Classification_Variant
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 271244587
num_examples: 1000
download_size: 270971993
dataset_size: 271244587
- config_name: FLICKR30K+caption_image
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 141253316
num_examples: 1000
download_size: 140953888
dataset_size: 141253316
- config_name: FUNSD+text_detection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 16843280
num_examples: 199
download_size: 16743083
dataset_size: 16843280
- config_name: FlickrLogos-27+logo_detection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 86424148
num_examples: 944
download_size: 86359502
dataset_size: 86424148
- config_name: FoodLogoDet-1500+food_logo_recognition
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 67972950
num_examples: 1000
download_size: 67903063
dataset_size: 67972950
- config_name: GEOMETRY3K+geometry_question_answer
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 20837619
num_examples: 1000
download_size: 20659992
dataset_size: 20837619
- config_name: GQA
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 142450996
num_examples: 1000
download_size: 142421911
dataset_size: 142450996
- config_name: GTSRB+image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 3383640
num_examples: 1000
download_size: 2791215
dataset_size: 3383640
- config_name: HICO+human_activity_detection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 182323353
num_examples: 1000
download_size: 182152526
dataset_size: 182323353
- config_name: HICO+object_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 175989137
num_examples: 1000
download_size: 175948073
dataset_size: 175989137
- config_name: ITM
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 164192526
num_examples: 1000
download_size: 164064240
dataset_size: 164192526
- config_name: ImageNet-A+image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 93696415
num_examples: 1000
download_size: 93243074
dataset_size: 93696415
- config_name: ImageNet-C+image_classification_blur
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 8279596
num_examples: 1000
download_size: 7840627
dataset_size: 8279596
- config_name: ImageNet-C+image_classification_general
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 14285415
num_examples: 1000
download_size: 13961493
dataset_size: 14285415
- config_name: ImageNet-C+image_classification_noise
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 20114021
num_examples: 1000
download_size: 19616935
dataset_size: 20114021
- config_name: ImageNet-C+image_classification_weather
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 13449860
num_examples: 1000
download_size: 13132431
dataset_size: 13449860
- config_name: ImageNet-R+image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 75079951
num_examples: 1000
download_size: 74745913
dataset_size: 75079951
- config_name: ImageNet-R+image_domain_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 75727085
num_examples: 1000
download_size: 75404119
dataset_size: 75727085
- config_name: ImageNet-Sketch+image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 149100004
num_examples: 1000
download_size: 148993977
dataset_size: 149100004
- config_name: KVQA+image_captioning
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 1120730565
num_examples: 998
download_size: 1120635873
dataset_size: 1120730565
- config_name: KVQA+image_question_answer
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 1040644785
num_examples: 1000
download_size: 1040403190
dataset_size: 1040644785
- config_name: LAD+object_detection_details
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 87160367
num_examples: 1000
download_size: 86726090
dataset_size: 87160367
- config_name: LFW+face_recognition
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 16281050
num_examples: 1000
download_size: 15358367
dataset_size: 16281050
- config_name: LOC_NARRATIVES+ade20k_images_caption
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 171951596
num_examples: 1000
download_size: 171768674
dataset_size: 171951596
- config_name: LOC_NARRATIVES+coco_images_caption
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 162043144
num_examples: 1000
download_size: 161863110
dataset_size: 162043144
- config_name: LOC_NARRATIVES+flickr30k_images_caption
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 140212542
num_examples: 1000
download_size: 139992440
dataset_size: 140212542
- config_name: LOC_NARRATIVES+open_images_caption
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 324817003
num_examples: 1000
download_size: 324695184
dataset_size: 324817003
- config_name: LSUN+Image_Classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 19499449
num_examples: 1000
download_size: 19124933
dataset_size: 19499449
- config_name: MEMOTION+sentiment_detection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 118742396
num_examples: 1000
download_size: 118653648
dataset_size: 118742396
- config_name: MNIST-M+number_recognition
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 2457499
num_examples: 1000
download_size: 2269541
dataset_size: 2457499
- config_name: MVTecAD+anomaly_detection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 985978274
num_examples: 1000
download_size: 985189959
dataset_size: 985978274
- config_name: MVTecAD+image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 973906108
num_examples: 1000
download_size: 973352845
dataset_size: 973906108
- config_name: MemeCap+image_captioning
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 250269010
num_examples: 1000
download_size: 250143363
dataset_size: 250269010
- config_name: MemeCap+meme_captioning
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 274003198
num_examples: 1000
download_size: 273927945
dataset_size: 274003198
- config_name: NOCAPS+image_caption
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 316223570
num_examples: 1000
download_size: 315826207
dataset_size: 316223570
- config_name: NUS-WIDE+Animal_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 10744584
num_examples: 1000
download_size: 10463634
dataset_size: 10744584
- config_name: ObjectNet+Object_classfication
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 351965136
num_examples: 1000
download_size: 351701354
dataset_size: 351965136
- config_name: Office-Home+Image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 91918082
num_examples: 1000
download_size: 91640584
dataset_size: 91918082
- config_name: Office_31+Image_Classification_Category
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 21853213
num_examples: 1000
download_size: 21322252
dataset_size: 21853213
- config_name: Office_31+Image_Classification_Object
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 21059363
num_examples: 1000
download_size: 20720682
dataset_size: 21059363
- config_name: Office_31+Image_Classification_ObjectAndCategory
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 21437032
num_examples: 1000
download_size: 20846606
dataset_size: 21437032
- config_name: PACS+art_painting_object_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 26254622
num_examples: 1000
download_size: 26142745
dataset_size: 26254622
- config_name: PACS+cartoon_object_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 18516101
num_examples: 1000
download_size: 18412419
dataset_size: 18516101
- config_name: PACS+dog_image_category_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 19490841
num_examples: 1000
download_size: 19246725
dataset_size: 19490841
- config_name: PACS+elephant_image_category_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 19916638
num_examples: 1000
download_size: 19670980
dataset_size: 19916638
- config_name: PACS+giraffe_image_category_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 18611096
num_examples: 1000
download_size: 18364570
dataset_size: 18611096
- config_name: PACS+guitar_image_category_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 16091076
num_examples: 1000
download_size: 15848215
dataset_size: 16091076
- config_name: PACS+horse_image_category_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 19192699
num_examples: 1000
download_size: 18947892
dataset_size: 19192699
- config_name: PACS+house_image_category_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 23562841
num_examples: 943
download_size: 23325806
dataset_size: 23562841
- config_name: PACS+person_image_category_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 21139131
num_examples: 1000
download_size: 20958986
dataset_size: 21139131
- config_name: PACS+photo_object_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 25192664
num_examples: 1000
download_size: 25094922
dataset_size: 25192664
- config_name: PICKAPIC+image_short_description
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 698218691
num_examples: 1000
download_size: 698209097
dataset_size: 698218691
- config_name: Places205+Image_env_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 15828514
num_examples: 1000
download_size: 15329823
dataset_size: 15828514
- config_name: PlotQA+visual_question_answering
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 34046467
num_examples: 1000
download_size: 33839728
dataset_size: 34046467
- config_name: RAVEN+next_pattern
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 48337137
num_examples: 1000
download_size: 48130196
dataset_size: 48337137
- config_name: REDCAPS+reddit_caption_1
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 997786749
num_examples: 1000
download_size: 997726481
dataset_size: 997786749
- config_name: REDCAPS+reddit_caption_2
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 1009819614
num_examples: 1000
download_size: 1009650771
dataset_size: 1009819614
- config_name: Road-Anomaly+anomaly_detection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 22349998
num_examples: 60
download_size: 22349718
dataset_size: 22349998
- config_name: SCUT-CTW1500+text_detection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 51064615
num_examples: 500
download_size: 51035103
dataset_size: 51064615
- config_name: SKETCH+living_organism_detection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 30855770
num_examples: 1000
download_size: 30547472
dataset_size: 30855770
- config_name: SKETCH+object_detection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 30234277
num_examples: 1000
download_size: 30123163
dataset_size: 30234277
- config_name: STANFORD_DOGS+dog_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 38942453
num_examples: 1000
download_size: 38856716
dataset_size: 38942453
- config_name: STL-10+Image_Classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 18126414
num_examples: 1000
download_size: 17919095
dataset_size: 18126414
- config_name: STVQA+image_question_answer
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 102353565
num_examples: 1000
download_size: 102259114
dataset_size: 102353565
- config_name: SentiCap+image_sentiment_captioning
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 159595072
num_examples: 1000
download_size: 159311925
dataset_size: 159595072
- config_name: Set5+image_recognition
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 454977
num_examples: 15
download_size: 457254
dataset_size: 454977
- config_name: Total-Text+text_detection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 283410772
num_examples: 1000
download_size: 283219940
dataset_size: 283410772
- config_name: VIQUAE+question_answer
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 93788382
num_examples: 1000
download_size: 93704092
dataset_size: 93788382
- config_name: VIZWIZ+image_captioning
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 474948969
num_examples: 1000
download_size: 474768122
dataset_size: 474948969
- config_name: VOC2007+object_detection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 87808821
num_examples: 1000
download_size: 87690937
dataset_size: 87808821
- config_name: VQA
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 49089960
num_examples: 1000
download_size: 49028665
dataset_size: 49089960
- config_name: VQA-E+image_captioning
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 159521243
num_examples: 1000
download_size: 159449910
dataset_size: 159521243
- config_name: VQA-E+visual_question_answering
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 155418254
num_examples: 1000
download_size: 155302856
dataset_size: 155418254
- config_name: VQARAD+question_answer
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 54255085
num_examples: 1000
download_size: 54110459
dataset_size: 54255085
- config_name: VQA_activity_recognition
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 186898154
num_examples: 1000
download_size: 186811909
dataset_size: 186898154
- config_name: VQA_attribute
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 164746926
num_examples: 1000
download_size: 164670819
dataset_size: 164746926
- config_name: VQA_color
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 167578937
num_examples: 1000
download_size: 167524543
dataset_size: 167578937
- config_name: VQA_counting
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 167373291
num_examples: 1000
download_size: 167307517
dataset_size: 167373291
- config_name: VQA_object_presence
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 165550901
num_examples: 1000
download_size: 165484012
dataset_size: 165550901
- config_name: VQA_object_recognition
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 170587163
num_examples: 1000
download_size: 170497921
dataset_size: 170587163
- config_name: VQA_positional_reasoning
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 158174051
num_examples: 1000
download_size: 158089189
dataset_size: 158174051
- config_name: VQA_scene_recognition
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 158043107
num_examples: 1000
download_size: 157967337
dataset_size: 158043107
- config_name: VQA_sentiment_understanding
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 157646518
num_examples: 1000
download_size: 157579458
dataset_size: 157646518
- config_name: VQA_sport_recognition
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 158605283
num_examples: 1000
download_size: 158525889
dataset_size: 158605283
- config_name: VQA_utility_affordance
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 45628502
num_examples: 291
download_size: 45598029
dataset_size: 45628502
- config_name: VQAv2
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 157748127
num_examples: 1000
download_size: 157750446
dataset_size: 157748127
- config_name: VQG+caption_generation
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 216168937
num_examples: 995
download_size: 216129859
dataset_size: 216168937
- config_name: VQG+question_generation
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 391602450
num_examples: 998
download_size: 391553702
dataset_size: 391602450
- config_name: VisDA-2017+image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 51273148
num_examples: 1000
download_size: 50743097
dataset_size: 51273148
- config_name: VisDA-2017+object_classification_train
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 50519066
num_examples: 1000
download_size: 50300135
dataset_size: 50519066
- config_name: VisDA-2017+object_classification_validation
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 18623537
num_examples: 1000
download_size: 18387463
dataset_size: 18623537
- config_name: WIKIART+art_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 301364011
num_examples: 1000
download_size: 301185938
dataset_size: 301364011
- config_name: WIT+detailed_description
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 1414632839
num_examples: 1000
download_size: 1414603437
dataset_size: 1414632839
- config_name: Winoground+image_captioning
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 369998705
num_examples: 800
download_size: 369838978
dataset_size: 369998705
- config_name: Yoga-82+yoga_pose_recognition
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 167815353
num_examples: 1000
download_size: 167759777
dataset_size: 167815353
- config_name: ayahoo_test_images+animal_object_vehicle_image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 71305702
num_examples: 1000
download_size: 70536257
dataset_size: 71305702
- config_name: cinic-10+image_classification_animal
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 3421613
num_examples: 1000
download_size: 3173863
dataset_size: 3421613
- config_name: cinic-10+image_classification_shipping
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 3687725
num_examples: 1000
download_size: 3427422
dataset_size: 3687725
- config_name: cinic-10+image_classification_transport
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 3711412
num_examples: 1000
download_size: 3412348
dataset_size: 3711412
- config_name: cinic-10+object_presence_animal
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 3411115
num_examples: 1000
download_size: 3121235
dataset_size: 3411115
- config_name: cinic-10+object_presence_shipping
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 3535280
num_examples: 1000
download_size: 3199376
dataset_size: 3535280
- config_name: coco+image_classification_animal
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 191631455
num_examples: 1000
download_size: 191119516
dataset_size: 191631455
- config_name: coco+image_classification_appliance
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 141057883
num_examples: 1000
download_size: 140676945
dataset_size: 141057883
- config_name: coco+image_classification_furniture
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 165094934
num_examples: 1000
download_size: 164713910
dataset_size: 165094934
- config_name: coco+image_classification_kitchen
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 170069740
num_examples: 1000
download_size: 169660934
dataset_size: 170069740
- config_name: coco+image_classification_sports
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 158848644
num_examples: 1000
download_size: 158278186
dataset_size: 158848644
- config_name: coco+image_classification_vehicle
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 180197579
num_examples: 1000
download_size: 179746189
dataset_size: 180197579
- config_name: crowdhuman+count_numbers
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 611544018
num_examples: 1000
download_size: 611403207
dataset_size: 611544018
- config_name: expw+expression_detection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 85656778
num_examples: 1000
download_size: 85538044
dataset_size: 85656778
- config_name: fairface+image_classification_age
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 21896751
num_examples: 1000
download_size: 21737604
dataset_size: 21896751
- config_name: fairface+image_classification_gender
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 21726421
num_examples: 1000
download_size: 21579415
dataset_size: 21726421
- config_name: fairface+image_classification_race
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 21808878
num_examples: 1000
download_size: 21696282
dataset_size: 21808878
- config_name: iconqa+choose_txt
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 11459348
num_examples: 1000
download_size: 11299983
dataset_size: 11459348
- config_name: iconqa+fill_in_blank
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 13289525
num_examples: 1000
download_size: 13137889
dataset_size: 13289525
- config_name: image_caption
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 163964459
num_examples: 1000
download_size: 163897983
dataset_size: 163964459
- config_name: image_quality
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 481358912
num_examples: 1000
download_size: 481248062
dataset_size: 481358912
- config_name: image_text_selection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 164076095
num_examples: 1000
download_size: 163882340
dataset_size: 164076095
- config_name: infographicvqa+question_answer
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 345350673
num_examples: 1000
download_size: 345235481
dataset_size: 345350673
- config_name: infographicvqa+single_document_question
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 671537892
num_examples: 1000
download_size: 671458816
dataset_size: 671537892
- config_name: model-vs-human+image_style_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 41793926
num_examples: 1000
download_size: 41746126
dataset_size: 41793926
- config_name: multimodal_factual_checking
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 111559840
num_examples: 1000
download_size: 111047721
dataset_size: 111559840
- config_name: ok_vqa
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 161970926
num_examples: 1000
download_size: 161955316
dataset_size: 161970926
- config_name: places365+Image_Classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 14441507
num_examples: 1000
download_size: 14168505
dataset_size: 14441507
- config_name: question_image_match
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 158916712
num_examples: 1000
download_size: 158782721
dataset_size: 158916712
- config_name: recipe-qa+visual_coherence
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 23215505
num_examples: 1000
download_size: 23091708
dataset_size: 23215505
- config_name: semart+image_description
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 157995216
num_examples: 1000
download_size: 157666712
dataset_size: 157995216
- config_name: semart+image_school
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 158892165
num_examples: 1000
download_size: 158707995
dataset_size: 158892165
- config_name: semart+image_technique
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 157981645
num_examples: 1000
download_size: 157815627
dataset_size: 157981645
- config_name: semart+image_timeframe
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 155797604
num_examples: 1000
download_size: 155641617
dataset_size: 155797604
- config_name: semart+image_type
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 158020652
num_examples: 1000
download_size: 157842984
dataset_size: 158020652
- config_name: spot-the-diff+image_diff_identification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 193320177
num_examples: 1000
download_size: 193082474
dataset_size: 193320177
- config_name: textcaps+caption_generation
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 282577673
num_examples: 1000
download_size: 282397808
dataset_size: 282577673
- config_name: trainSet+image_classification
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 332445036
num_examples: 1000
download_size: 332315060
dataset_size: 332445036
- config_name: visdial+answer_question_2
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 164725761
num_examples: 1000
download_size: 164425842
dataset_size: 164725761
- config_name: visdial+answer_question_4
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 165974902
num_examples: 1000
download_size: 165681723
dataset_size: 165974902
- config_name: visdial+answer_question_6
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 158647694
num_examples: 1000
download_size: 158265314
dataset_size: 158647694
- config_name: visdial+answer_question_9
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 166319748
num_examples: 1000
download_size: 165860797
dataset_size: 166319748
- config_name: visualgenome_vqa
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 136292917
num_examples: 1000
download_size: 136270349
dataset_size: 136292917
- config_name: vizwiz+question_answer
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 498179964
num_examples: 1000
download_size: 498072710
dataset_size: 498179964
- config_name: wikihow_image_text_step_order
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 29768052
num_examples: 1000
download_size: 29608960
dataset_size: 29768052
- config_name: wikihow_immediate_next_step_selection
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 30843570
num_examples: 1000
download_size: 30631410
dataset_size: 30843570
- config_name: wikihow_next_step
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 30988974
num_examples: 1000
download_size: 30687458
dataset_size: 30988974
- config_name: wikihow_text_image_step_order
features:
- name: conversations
list:
- name: from
dtype: string
- name: value
dtype: string
- name: problem
dtype: string
- name: answer
dtype: string
- name: images
list: image
- name: _qid
dtype: string
splits:
- name: train
num_bytes: 31569946
num_examples: 1000
download_size: 31410571
dataset_size: 31569946
configs:
- config_name: 300w+human_portrait_classification
data_files:
- split: train
path: 300w+human_portrait_classification/train-*
- config_name: A-OKVQA+answer_rationales_matching
data_files:
- split: train
path: A-OKVQA+answer_rationales_matching/train-*
- config_name: A-OKVQA+rationales_generation
data_files:
- split: train
path: A-OKVQA+rationales_generation/train-*
- config_name: A-OKVQA+visual_question_answering
data_files:
- split: train
path: A-OKVQA+visual_question_answering/train-*
- config_name: AI2D+visual_question_answering
data_files:
- split: train
path: AI2D+visual_question_answering/train-*
- config_name: AID+aerial_scene_classification
data_files:
- split: train
path: AID+aerial_scene_classification/train-*
- config_name: CHART2TEXT+chart_caption
data_files:
- split: train
path: CHART2TEXT+chart_caption/train-*
- config_name: CLEVR+question_answer
data_files:
- split: train
path: CLEVR+question_answer/train-*
- config_name: CLEVR_CoGenT+Multiple_Question_Answering
data_files:
- split: train
path: CLEVR_CoGenT+Multiple_Question_Answering/train-*
- config_name: CLEVR_CoGenT+Question_Answer_Matching
data_files:
- split: train
path: CLEVR_CoGenT+Question_Answer_Matching/train-*
- config_name: CLEVR_CoGenT+Question_Answering
data_files:
- split: train
path: CLEVR_CoGenT+Question_Answering/train-*
- config_name: CLEVR_CoGenT+VQA_context
data_files:
- split: train
path: CLEVR_CoGenT+VQA_context/train-*
- config_name: CONCADIA+image_caption_context_1
data_files:
- split: train
path: CONCADIA+image_caption_context_1/train-*
- config_name: CONCADIA+image_caption_context_2
data_files:
- split: train
path: CONCADIA+image_caption_context_2/train-*
- config_name: CONCADIA+image_description
data_files:
- split: train
path: CONCADIA+image_description/train-*
- config_name: CUB-200-2011+Bird_Classification
data_files:
- split: train
path: CUB-200-2011+Bird_Classification/train-*
- config_name: Caltech-256+image_classification
data_files:
- split: train
path: Caltech-256+image_classification/train-*
- config_name: Caltech101+Image_classification
data_files:
- split: train
path: Caltech101+Image_classification/train-*
- config_name: Caltech101+Living_Thing_classification
data_files:
- split: train
path: Caltech101+Living_Thing_classification/train-*
- config_name: Cars+car_brand_classification
data_files:
- split: train
path: Cars+car_brand_classification/train-*
- config_name: Cars+car_classification
data_files:
- split: train
path: Cars+car_classification/train-*
- config_name: Clevr+Multiple_Question_Answering
data_files:
- split: train
path: Clevr+Multiple_Question_Answering/train-*
- config_name: Clevr+Question_Answer_Matching
data_files:
- split: train
path: Clevr+Question_Answer_Matching/train-*
- config_name: Clevr+Question_Answering
data_files:
- split: train
path: Clevr+Question_Answering/train-*
- config_name: Clevr+VQA_context
data_files:
- split: train
path: Clevr+VQA_context/train-*
- config_name: CoVA+webpage_recognition
data_files:
- split: train
path: CoVA+webpage_recognition/train-*
- config_name: ConceptualCaptions+image_captioning
data_files:
- split: train
path: ConceptualCaptions+image_captioning/train-*
- config_name: Core50+Object_detection
data_files:
- split: train
path: Core50+Object_detection/train-*
- config_name: DAQUAR+object_question_answer
data_files:
- split: train
path: DAQUAR+object_question_answer/train-*
- config_name: DOCVQA+question_answer
data_files:
- split: train
path: DOCVQA+question_answer/train-*
- config_name: DOMAIN_NET+clipart_image_classification
data_files:
- split: train
path: DOMAIN_NET+clipart_image_classification/train-*
- config_name: DOMAIN_NET+image_category_classification
data_files:
- split: train
path: DOMAIN_NET+image_category_classification/train-*
- config_name: DOMAIN_NET+infograph_image_classification
data_files:
- split: train
path: DOMAIN_NET+infograph_image_classification/train-*
- config_name: DOMAIN_NET+painting_image_classification
data_files:
- split: train
path: DOMAIN_NET+painting_image_classification/train-*
- config_name: DOMAIN_NET+quickdraw_image_classification
data_files:
- split: train
path: DOMAIN_NET+quickdraw_image_classification/train-*
- config_name: DOMAIN_NET+real_image_classification
data_files:
- split: train
path: DOMAIN_NET+real_image_classification/train-*
- config_name: DTD+all_texture_detection
data_files:
- split: train
path: DTD+all_texture_detection/train-*
- config_name: DTD+coarse_grained_texture_classification
data_files:
- split: train
path: DTD+coarse_grained_texture_classification/train-*
- config_name: DVQA+charts_question_answer_1
data_files:
- split: train
path: DVQA+charts_question_answer_1/train-*
- config_name: DVQA+charts_question_answer_2
data_files:
- split: train
path: DVQA+charts_question_answer_2/train-*
- config_name: Dark-Zurich+time_classification
data_files:
- split: train
path: Dark-Zurich+time_classification/train-*
- config_name: DeepFashion_highres_Attribute_and_Category+Cloth_Classification
data_files:
- split: train
path: DeepFashion_highres_Attribute_and_Category+Cloth_Classification/train-*
- config_name: DeepWeeds+weed_species_recognition
data_files:
- split: train
path: DeepWeeds+weed_species_recognition/train-*
- config_name: ExDark+object_recognition
data_files:
- split: train
path: ExDark+object_recognition/train-*
- config_name: FFHQ-Text+text-to-face_generation
data_files:
- split: train
path: FFHQ-Text+text-to-face_generation/train-*
- config_name: FGVC_Aircraft+Aircraft_Classification
data_files:
- split: train
path: FGVC_Aircraft+Aircraft_Classification/train-*
- config_name: FGVC_Aircraft+Aircraft_Classification_Family
data_files:
- split: train
path: FGVC_Aircraft+Aircraft_Classification_Family/train-*
- config_name: FGVC_Aircraft+Aircraft_Classification_Manufacturer
data_files:
- split: train
path: FGVC_Aircraft+Aircraft_Classification_Manufacturer/train-*
- config_name: FGVC_Aircraft+Aircraft_Classification_Variant
data_files:
- split: train
path: FGVC_Aircraft+Aircraft_Classification_Variant/train-*
- config_name: FLICKR30K+caption_image
data_files:
- split: train
path: FLICKR30K+caption_image/train-*
- config_name: FUNSD+text_detection
data_files:
- split: train
path: FUNSD+text_detection/train-*
- config_name: FlickrLogos-27+logo_detection
data_files:
- split: train
path: FlickrLogos-27+logo_detection/train-*
- config_name: FoodLogoDet-1500+food_logo_recognition
data_files:
- split: train
path: FoodLogoDet-1500+food_logo_recognition/train-*
- config_name: GEOMETRY3K+geometry_question_answer
data_files:
- split: train
path: GEOMETRY3K+geometry_question_answer/train-*
- config_name: GQA
data_files:
- split: train
path: GQA/train-*
- config_name: GTSRB+image_classification
data_files:
- split: train
path: GTSRB+image_classification/train-*
- config_name: HICO+human_activity_detection
data_files:
- split: train
path: HICO+human_activity_detection/train-*
- config_name: HICO+object_classification
data_files:
- split: train
path: HICO+object_classification/train-*
- config_name: ITM
data_files:
- split: train
path: ITM/train-*
- config_name: ImageNet-A+image_classification
data_files:
- split: train
path: ImageNet-A+image_classification/train-*
- config_name: ImageNet-C+image_classification_blur
data_files:
- split: train
path: ImageNet-C+image_classification_blur/train-*
- config_name: ImageNet-C+image_classification_general
data_files:
- split: train
path: ImageNet-C+image_classification_general/train-*
- config_name: ImageNet-C+image_classification_noise
data_files:
- split: train
path: ImageNet-C+image_classification_noise/train-*
- config_name: ImageNet-C+image_classification_weather
data_files:
- split: train
path: ImageNet-C+image_classification_weather/train-*
- config_name: ImageNet-R+image_classification
data_files:
- split: train
path: ImageNet-R+image_classification/train-*
- config_name: ImageNet-R+image_domain_classification
data_files:
- split: train
path: ImageNet-R+image_domain_classification/train-*
- config_name: ImageNet-Sketch+image_classification
data_files:
- split: train
path: ImageNet-Sketch+image_classification/train-*
- config_name: KVQA+image_captioning
data_files:
- split: train
path: KVQA+image_captioning/train-*
- config_name: KVQA+image_question_answer
data_files:
- split: train
path: KVQA+image_question_answer/train-*
- config_name: LAD+object_detection_details
data_files:
- split: train
path: LAD+object_detection_details/train-*
- config_name: LFW+face_recognition
data_files:
- split: train
path: LFW+face_recognition/train-*
- config_name: LOC_NARRATIVES+ade20k_images_caption
data_files:
- split: train
path: LOC_NARRATIVES+ade20k_images_caption/train-*
- config_name: LOC_NARRATIVES+coco_images_caption
data_files:
- split: train
path: LOC_NARRATIVES+coco_images_caption/train-*
- config_name: LOC_NARRATIVES+flickr30k_images_caption
data_files:
- split: train
path: LOC_NARRATIVES+flickr30k_images_caption/train-*
- config_name: LOC_NARRATIVES+open_images_caption
data_files:
- split: train
path: LOC_NARRATIVES+open_images_caption/train-*
- config_name: LSUN+Image_Classification
data_files:
- split: train
path: LSUN+Image_Classification/train-*
- config_name: MEMOTION+sentiment_detection
data_files:
- split: train
path: MEMOTION+sentiment_detection/train-*
- config_name: MNIST-M+number_recognition
data_files:
- split: train
path: MNIST-M+number_recognition/train-*
- config_name: MVTecAD+anomaly_detection
data_files:
- split: train
path: MVTecAD+anomaly_detection/train-*
- config_name: MVTecAD+image_classification
data_files:
- split: train
path: MVTecAD+image_classification/train-*
- config_name: MemeCap+image_captioning
data_files:
- split: train
path: MemeCap+image_captioning/train-*
- config_name: MemeCap+meme_captioning
data_files:
- split: train
path: MemeCap+meme_captioning/train-*
- config_name: NOCAPS+image_caption
data_files:
- split: train
path: NOCAPS+image_caption/train-*
- config_name: NUS-WIDE+Animal_classification
data_files:
- split: train
path: NUS-WIDE+Animal_classification/train-*
- config_name: ObjectNet+Object_classfication
data_files:
- split: train
path: ObjectNet+Object_classfication/train-*
- config_name: Office-Home+Image_classification
data_files:
- split: train
path: Office-Home+Image_classification/train-*
- config_name: Office_31+Image_Classification_Category
data_files:
- split: train
path: Office_31+Image_Classification_Category/train-*
- config_name: Office_31+Image_Classification_Object
data_files:
- split: train
path: Office_31+Image_Classification_Object/train-*
- config_name: Office_31+Image_Classification_ObjectAndCategory
data_files:
- split: train
path: Office_31+Image_Classification_ObjectAndCategory/train-*
- config_name: PACS+art_painting_object_classification
data_files:
- split: train
path: PACS+art_painting_object_classification/train-*
- config_name: PACS+cartoon_object_classification
data_files:
- split: train
path: PACS+cartoon_object_classification/train-*
- config_name: PACS+dog_image_category_classification
data_files:
- split: train
path: PACS+dog_image_category_classification/train-*
- config_name: PACS+elephant_image_category_classification
data_files:
- split: train
path: PACS+elephant_image_category_classification/train-*
- config_name: PACS+giraffe_image_category_classification
data_files:
- split: train
path: PACS+giraffe_image_category_classification/train-*
- config_name: PACS+guitar_image_category_classification
data_files:
- split: train
path: PACS+guitar_image_category_classification/train-*
- config_name: PACS+horse_image_category_classification
data_files:
- split: train
path: PACS+horse_image_category_classification/train-*
- config_name: PACS+house_image_category_classification
data_files:
- split: train
path: PACS+house_image_category_classification/train-*
- config_name: PACS+person_image_category_classification
data_files:
- split: train
path: PACS+person_image_category_classification/train-*
- config_name: PACS+photo_object_classification
data_files:
- split: train
path: PACS+photo_object_classification/train-*
- config_name: PICKAPIC+image_short_description
data_files:
- split: train
path: PICKAPIC+image_short_description/train-*
- config_name: Places205+Image_env_classification
data_files:
- split: train
path: Places205+Image_env_classification/train-*
- config_name: PlotQA+visual_question_answering
data_files:
- split: train
path: PlotQA+visual_question_answering/train-*
- config_name: RAVEN+next_pattern
data_files:
- split: train
path: RAVEN+next_pattern/train-*
- config_name: REDCAPS+reddit_caption_1
data_files:
- split: train
path: REDCAPS+reddit_caption_1/train-*
- config_name: REDCAPS+reddit_caption_2
data_files:
- split: train
path: REDCAPS+reddit_caption_2/train-*
- config_name: Road-Anomaly+anomaly_detection
data_files:
- split: train
path: Road-Anomaly+anomaly_detection/train-*
- config_name: SCUT-CTW1500+text_detection
data_files:
- split: train
path: SCUT-CTW1500+text_detection/train-*
- config_name: SKETCH+living_organism_detection
data_files:
- split: train
path: SKETCH+living_organism_detection/train-*
- config_name: SKETCH+object_detection
data_files:
- split: train
path: SKETCH+object_detection/train-*
- config_name: STANFORD_DOGS+dog_classification
data_files:
- split: train
path: STANFORD_DOGS+dog_classification/train-*
- config_name: STL-10+Image_Classification
data_files:
- split: train
path: STL-10+Image_Classification/train-*
- config_name: STVQA+image_question_answer
data_files:
- split: train
path: STVQA+image_question_answer/train-*
- config_name: SentiCap+image_sentiment_captioning
data_files:
- split: train
path: SentiCap+image_sentiment_captioning/train-*
- config_name: Set5+image_recognition
data_files:
- split: train
path: Set5+image_recognition/train-*
- config_name: Total-Text+text_detection
data_files:
- split: train
path: Total-Text+text_detection/train-*
- config_name: VIQUAE+question_answer
data_files:
- split: train
path: VIQUAE+question_answer/train-*
- config_name: VIZWIZ+image_captioning
data_files:
- split: train
path: VIZWIZ+image_captioning/train-*
- config_name: VOC2007+object_detection
data_files:
- split: train
path: VOC2007+object_detection/train-*
- config_name: VQA
data_files:
- split: train
path: VQA/train-*
- config_name: VQA-E+image_captioning
data_files:
- split: train
path: VQA-E+image_captioning/train-*
- config_name: VQA-E+visual_question_answering
data_files:
- split: train
path: VQA-E+visual_question_answering/train-*
- config_name: VQARAD+question_answer
data_files:
- split: train
path: VQARAD+question_answer/train-*
- config_name: VQA_activity_recognition
data_files:
- split: train
path: VQA_activity_recognition/train-*
- config_name: VQA_attribute
data_files:
- split: train
path: VQA_attribute/train-*
- config_name: VQA_color
data_files:
- split: train
path: VQA_color/train-*
- config_name: VQA_counting
data_files:
- split: train
path: VQA_counting/train-*
- config_name: VQA_object_presence
data_files:
- split: train
path: VQA_object_presence/train-*
- config_name: VQA_object_recognition
data_files:
- split: train
path: VQA_object_recognition/train-*
- config_name: VQA_positional_reasoning
data_files:
- split: train
path: VQA_positional_reasoning/train-*
- config_name: VQA_scene_recognition
data_files:
- split: train
path: VQA_scene_recognition/train-*
- config_name: VQA_sentiment_understanding
data_files:
- split: train
path: VQA_sentiment_understanding/train-*
- config_name: VQA_sport_recognition
data_files:
- split: train
path: VQA_sport_recognition/train-*
- config_name: VQA_utility_affordance
data_files:
- split: train
path: VQA_utility_affordance/train-*
- config_name: VQAv2
data_files:
- split: train
path: VQAv2/train-*
- config_name: VQG+caption_generation
data_files:
- split: train
path: VQG+caption_generation/train-*
- config_name: VQG+question_generation
data_files:
- split: train
path: VQG+question_generation/train-*
- config_name: VisDA-2017+image_classification
data_files:
- split: train
path: VisDA-2017+image_classification/train-*
- config_name: VisDA-2017+object_classification_train
data_files:
- split: train
path: VisDA-2017+object_classification_train/train-*
- config_name: VisDA-2017+object_classification_validation
data_files:
- split: train
path: VisDA-2017+object_classification_validation/train-*
- config_name: WIKIART+art_classification
data_files:
- split: train
path: WIKIART+art_classification/train-*
- config_name: WIT+detailed_description
data_files:
- split: train
path: WIT+detailed_description/train-*
- config_name: Winoground+image_captioning
data_files:
- split: train
path: Winoground+image_captioning/train-*
- config_name: Yoga-82+yoga_pose_recognition
data_files:
- split: train
path: Yoga-82+yoga_pose_recognition/train-*
- config_name: ayahoo_test_images+animal_object_vehicle_image_classification
data_files:
- split: train
path: ayahoo_test_images+animal_object_vehicle_image_classification/train-*
- config_name: cinic-10+image_classification_animal
data_files:
- split: train
path: cinic-10+image_classification_animal/train-*
- config_name: cinic-10+image_classification_shipping
data_files:
- split: train
path: cinic-10+image_classification_shipping/train-*
- config_name: cinic-10+image_classification_transport
data_files:
- split: train
path: cinic-10+image_classification_transport/train-*
- config_name: cinic-10+object_presence_animal
data_files:
- split: train
path: cinic-10+object_presence_animal/train-*
- config_name: cinic-10+object_presence_shipping
data_files:
- split: train
path: cinic-10+object_presence_shipping/train-*
- config_name: coco+image_classification_animal
data_files:
- split: train
path: coco+image_classification_animal/train-*
- config_name: coco+image_classification_appliance
data_files:
- split: train
path: coco+image_classification_appliance/train-*
- config_name: coco+image_classification_furniture
data_files:
- split: train
path: coco+image_classification_furniture/train-*
- config_name: coco+image_classification_kitchen
data_files:
- split: train
path: coco+image_classification_kitchen/train-*
- config_name: coco+image_classification_sports
data_files:
- split: train
path: coco+image_classification_sports/train-*
- config_name: coco+image_classification_vehicle
data_files:
- split: train
path: coco+image_classification_vehicle/train-*
- config_name: crowdhuman+count_numbers
data_files:
- split: train
path: crowdhuman+count_numbers/train-*
- config_name: expw+expression_detection
data_files:
- split: train
path: expw+expression_detection/train-*
- config_name: fairface+image_classification_age
data_files:
- split: train
path: fairface+image_classification_age/train-*
- config_name: fairface+image_classification_gender
data_files:
- split: train
path: fairface+image_classification_gender/train-*
- config_name: fairface+image_classification_race
data_files:
- split: train
path: fairface+image_classification_race/train-*
- config_name: iconqa+choose_txt
data_files:
- split: train
path: iconqa+choose_txt/train-*
- config_name: iconqa+fill_in_blank
data_files:
- split: train
path: iconqa+fill_in_blank/train-*
- config_name: image_caption
data_files:
- split: train
path: image_caption/train-*
- config_name: image_quality
data_files:
- split: train
path: image_quality/train-*
- config_name: image_text_selection
data_files:
- split: train
path: image_text_selection/train-*
- config_name: infographicvqa+question_answer
data_files:
- split: train
path: infographicvqa+question_answer/train-*
- config_name: infographicvqa+single_document_question
data_files:
- split: train
path: infographicvqa+single_document_question/train-*
- config_name: model-vs-human+image_style_classification
data_files:
- split: train
path: model-vs-human+image_style_classification/train-*
- config_name: multimodal_factual_checking
data_files:
- split: train
path: multimodal_factual_checking/train-*
- config_name: ok_vqa
data_files:
- split: train
path: ok_vqa/train-*
- config_name: places365+Image_Classification
data_files:
- split: train
path: places365+Image_Classification/train-*
- config_name: question_image_match
data_files:
- split: train
path: question_image_match/train-*
- config_name: recipe-qa+visual_coherence
data_files:
- split: train
path: recipe-qa+visual_coherence/train-*
- config_name: semart+image_description
data_files:
- split: train
path: semart+image_description/train-*
- config_name: semart+image_school
data_files:
- split: train
path: semart+image_school/train-*
- config_name: semart+image_technique
data_files:
- split: train
path: semart+image_technique/train-*
- config_name: semart+image_timeframe
data_files:
- split: train
path: semart+image_timeframe/train-*
- config_name: semart+image_type
data_files:
- split: train
path: semart+image_type/train-*
- config_name: spot-the-diff+image_diff_identification
data_files:
- split: train
path: spot-the-diff+image_diff_identification/train-*
- config_name: textcaps+caption_generation
data_files:
- split: train
path: textcaps+caption_generation/train-*
- config_name: trainSet+image_classification
data_files:
- split: train
path: trainSet+image_classification/train-*
- config_name: visdial+answer_question_2
data_files:
- split: train
path: visdial+answer_question_2/train-*
- config_name: visdial+answer_question_4
data_files:
- split: train
path: visdial+answer_question_4/train-*
- config_name: visdial+answer_question_6
data_files:
- split: train
path: visdial+answer_question_6/train-*
- config_name: visdial+answer_question_9
data_files:
- split: train
path: visdial+answer_question_9/train-*
- config_name: visualgenome_vqa
data_files:
- split: train
path: visualgenome_vqa/train-*
- config_name: vizwiz+question_answer
data_files:
- split: train
path: vizwiz+question_answer/train-*
- config_name: wikihow_image_text_step_order
data_files:
- split: train
path: wikihow_image_text_step_order/train-*
- config_name: wikihow_immediate_next_step_selection
data_files:
- split: train
path: wikihow_immediate_next_step_selection/train-*
- config_name: wikihow_next_step
data_files:
- split: train
path: wikihow_next_step/train-*
- config_name: wikihow_text_image_step_order
data_files:
- split: train
path: wikihow_text_image_step_order/train-*
---
提供机构:
Moenupa



