five

ifx-pse-sys-ml/FineVisionConcatShuffleIFX

收藏
Hugging Face2026-02-07 更新2026-03-29 收录
下载链接:
https://hf-mirror.com/datasets/ifx-pse-sys-ml/FineVisionConcatShuffleIFX
下载链接
链接失效反馈
官方服务:
资源简介:
--- dataset_info: - config_name: CoSyn_400k_chart_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 25619864642 num_examples: 116814 download_size: 25225445972 dataset_size: 25619864642 - config_name: CoSyn_400k_chemical_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 284198825 num_examples: 8942 download_size: 271906883 dataset_size: 284198825 - config_name: CoSyn_400k_circuit_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 395789878 num_examples: 10470 download_size: 381016569 dataset_size: 395789878 - config_name: CoSyn_400k_diagram_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 7305290487 num_examples: 34963 download_size: 7230953565 dataset_size: 7305290487 - config_name: CoSyn_400k_document_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 24180800351 num_examples: 71282 download_size: 23999725924 dataset_size: 24180800351 - config_name: CoSyn_400k_graphic_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 335696720 num_examples: 26968 download_size: 309517786 dataset_size: 335696720 - config_name: CoSyn_400k_math_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 6107902067 num_examples: 66714 download_size: 6050179921 dataset_size: 6107902067 - config_name: CoSyn_400k_music_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 405064951 num_examples: 11969 download_size: 376544240 dataset_size: 405064951 - config_name: CoSyn_400k_nutrition_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 1508995196 num_examples: 6931 download_size: 1484258654 dataset_size: 1508995196 - config_name: CoSyn_400k_table_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 7684057562 num_examples: 46518 download_size: 7555591572 dataset_size: 7684057562 - config_name: LLaVA_Instruct_150K_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 76726976305 num_examples: 157710 download_size: 76639741025 dataset_size: 76726976305 - config_name: SynthChartNet_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 17908210474 num_examples: 500000 download_size: 17681999342 dataset_size: 17908210474 - config_name: SynthCodeNet_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 61998944805 num_examples: 499983 download_size: 61417487293 dataset_size: 61998944805 - config_name: SynthFormulaNet_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 2640399650 num_examples: 499997 download_size: 2507242506 dataset_size: 2640399650 - config_name: Unichart_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 18177702837 num_examples: 611925 download_size: 16997663269 dataset_size: 18177702837 - config_name: a_okvqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 22759813096 num_examples: 54602 download_size: 22755477780 dataset_size: 22759813096 - config_name: aguvis-stage-1_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 234712403450 num_examples: 458957 download_size: 233162291145 dataset_size: 234712403450 - config_name: ai2d_merged_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 867183687 num_examples: 4858 download_size: 860207729 dataset_size: 867183687 - config_name: alfworldgpt_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 3890916013 num_examples: 45073 download_size: 3827725886 dataset_size: 3890916013 - config_name: allava_laion_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 366924100118 num_examples: 468664 download_size: 366489610327 dataset_size: 366924100118 - config_name: allava_vflan_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 92699924830 num_examples: 177078 download_size: 92523508108 dataset_size: 92699924830 - config_name: aokvqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 896746995 num_examples: 16539 download_size: 892525732 dataset_size: 896746995 - config_name: art_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 5141087027 num_examples: 5492 download_size: 5140877121 dataset_size: 5141087027 - config_name: arxivqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 81923891145 num_examples: 100000 download_size: 81864015544 dataset_size: 81923891145 - config_name: bentham_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 1450160157 num_examples: 10843 download_size: 1448627831 dataset_size: 1450160157 - config_name: blockdiagramcomputerized_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 28792412 num_examples: 502 download_size: 28521247 dataset_size: 28792412 - config_name: blockdiagramhandwritten_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 146651427 num_examples: 1029 download_size: 146122850 dataset_size: 146651427 - config_name: cambrian(filtered)_processed_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 37993961967 num_examples: 83123 download_size: 37971082563 dataset_size: 37993961967 - config_name: captcha_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 1108385680 num_examples: 113062 download_size: 1089909799 dataset_size: 1108385680 - config_name: chart2text_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 1129185232 num_examples: 26961 download_size: 1107099979 dataset_size: 1129185232 - config_name: chartqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 815177636 num_examples: 18265 download_size: 803305616 dataset_size: 815177636 - config_name: chinesememe_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 14244178797 num_examples: 54212 download_size: 14224220241 dataset_size: 14244178797 - config_name: chrome_writting_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 80740219 num_examples: 8825 download_size: 78758190 dataset_size: 80740219 - config_name: clevr_math(mathv360k)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 708070313 num_examples: 5280 download_size: 706821176 dataset_size: 708070313 - config_name: clevr_math_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 9394753179 num_examples: 70000 download_size: 9342159966 dataset_size: 9394753179 - config_name: clevr_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 10557164232 num_examples: 70000 download_size: 10463967812 dataset_size: 10557164232 - config_name: coco_colors_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 55374513761 num_examples: 118287 download_size: 55345321245 dataset_size: 55374513761 - config_name: cocoqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 2402176651 num_examples: 46287 download_size: 2392213818 dataset_size: 2402176651 - config_name: cocotext_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 7930321113 num_examples: 16169 download_size: 7929075702 dataset_size: 7930321113 - config_name: ctw_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 109319005809 num_examples: 24290 download_size: 109311132658 dataset_size: 109319005809 - config_name: datik_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 3646550814 num_examples: 220537 download_size: 3466511674 dataset_size: 3646550814 - config_name: datikz_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 642401206 num_examples: 47441 download_size: 586668154 dataset_size: 642401206 - config_name: densefusion_1m_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 146400498630 num_examples: 1058751 download_size: 144313602922 dataset_size: 146400498630 - config_name: diagram_image_to_text_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 18704652 num_examples: 300 download_size: 18503975 dataset_size: 18704652 - config_name: docvqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 12018085455 num_examples: 10189 download_size: 12005172399 dataset_size: 12018085455 - config_name: drivelm_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 37226086574 num_examples: 4072 download_size: 36798256034 dataset_size: 37226086574 - config_name: dvqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 4581122677 num_examples: 200000 download_size: 4278433755 dataset_size: 4581122677 - config_name: est_vqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 18902347737 num_examples: 19358 download_size: 18902667141 dataset_size: 18902347737 - config_name: face_emotion_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 15127430 num_examples: 797 download_size: 14958324 dataset_size: 15127430 - config_name: figureqa(mathv360k)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 419233308 num_examples: 17587 download_size: 414199711 dataset_size: 419233308 - config_name: figureqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 2346521984 num_examples: 100000 download_size: 2215216124 dataset_size: 2346521984 - config_name: finqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 138085294 num_examples: 5276 download_size: 123097812 dataset_size: 138085294 - config_name: funsd_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 35306126 num_examples: 194 download_size: 35076254 dataset_size: 35306126 - config_name: geo170k(align)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 197478419 num_examples: 35297 download_size: 186291816 dataset_size: 197478419 - config_name: geo170k(qa)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 87625239 num_examples: 12101 download_size: 79047392 dataset_size: 87625239 - config_name: geo3k_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 38756450 num_examples: 2091 download_size: 38229173 dataset_size: 38756450 - config_name: geometry3k(mathv360k)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 197867040 num_examples: 9724 download_size: 195123654 dataset_size: 197867040 - config_name: geomverse_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 1183898574 num_examples: 9303 download_size: 1060049351 dataset_size: 1183898574 - config_name: geoqa+(mathv360k)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 94213385 num_examples: 17162 download_size: 89279244 dataset_size: 94213385 - config_name: geos(mathv360k)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 3936551 num_examples: 498 download_size: 540527 dataset_size: 3936551 - config_name: google_landmarks_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 189460877583 num_examples: 299993 download_size: 189299764214 dataset_size: 189460877583 - config_name: groundui_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 6044684493 num_examples: 13531 download_size: 6037461163 dataset_size: 6044684493 - config_name: handwriting_forms_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 168001320 num_examples: 1400 download_size: 167149784 dataset_size: 168001320 - config_name: hateful_memes_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 3059106936 num_examples: 8500 download_size: 3058114975 dataset_size: 3059106936 - config_name: hitab_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 163933811 num_examples: 2500 download_size: 160097022 dataset_size: 163933811 - config_name: hme100k_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 1547322238 num_examples: 74492 download_size: 1532531288 dataset_size: 1547322238 - config_name: hw_squad_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 21637654124 num_examples: 20457 download_size: 21634501523 dataset_size: 21637654124 - config_name: iam_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 1138240375 num_examples: 5663 download_size: 1134309500 dataset_size: 1138240375 - config_name: iconqa(mathv360k)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 209363819 num_examples: 22589 download_size: 203019936 dataset_size: 209363819 - config_name: iconqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 330918684 num_examples: 27307 download_size: 324811645 dataset_size: 330918684 - config_name: idk_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 5197458667 num_examples: 11123 download_size: 5194514073 dataset_size: 5197458667 - config_name: iiit5k_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 21788299 num_examples: 1990 download_size: 21421890 dataset_size: 21788299 - config_name: image_textualization(filtered)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 39882386265 num_examples: 99573 download_size: 39828174060 dataset_size: 39882386265 - config_name: imgur5k_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 12591193687 num_examples: 5934 download_size: 12591187824 dataset_size: 12591193687 - config_name: indoor_qa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 797864338 num_examples: 3350 download_size: 797373158 dataset_size: 797864338 - config_name: infographic(gpt4v)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 2014741001 num_examples: 1982 download_size: 2011061494 dataset_size: 2014741001 - config_name: infographic_vqa_llava_format_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 1765450705 num_examples: 2113 download_size: 1764450113 dataset_size: 1765450705 - config_name: infographic_vqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 4467478769 num_examples: 4394 download_size: 4465203822 dataset_size: 4467478769 - config_name: intergps_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 25159137 num_examples: 1280 download_size: 24842900 dataset_size: 25159137 - config_name: invoices_receipts_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 1925658173 num_examples: 3013 download_size: 1923266233 dataset_size: 1925658173 - config_name: k12_printing_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 4587776492 num_examples: 256636 download_size: 4527784643 dataset_size: 4587776492 - config_name: laion_gpt4v_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 3021991358 num_examples: 9301 download_size: 3016547452 dataset_size: 3021991358 - config_name: latex_handwritten_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 12665391123 num_examples: 39583 download_size: 12654390241 dataset_size: 12665391123 - config_name: latexformulas_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 5604066571 num_examples: 552340 download_size: 5478333282 dataset_size: 5604066571 - config_name: llavar_gpt4_20k_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 4235161138 num_examples: 19790 download_size: 4228404343 dataset_size: 4235161138 - config_name: lnqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 266234624450 num_examples: 302780 download_size: 266097025627 dataset_size: 266234624450 - config_name: localized_narratives_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 21346039622 num_examples: 199998 download_size: 21278750115 dataset_size: 21346039622 - config_name: lrv_chart_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 86443747 num_examples: 1776 download_size: 85267361 dataset_size: 86443747 - config_name: lrv_normal(filtered)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 2985153090 num_examples: 10489 download_size: 2967131358 dataset_size: 2985153090 - config_name: lvis_instruct4v_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 107372117408 num_examples: 222711 download_size: 107201751503 dataset_size: 107372117408 - config_name: mapqa(mathv360k)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 351523293 num_examples: 5225 download_size: 349652980 dataset_size: 351523293 - config_name: mapqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 3371567796 num_examples: 37417 download_size: 3305418797 dataset_size: 3371567796 - config_name: maptext_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 1504185688 num_examples: 200 download_size: 1504233154 dataset_size: 1504185688 - config_name: mathwriting-google_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 12299849132 num_examples: 300000 download_size: 12198579303 dataset_size: 12299849132 - config_name: mavis_math_metagen_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 3975743055 num_examples: 87348 download_size: 3933300199 dataset_size: 3975743055 - config_name: mavis_math_rule_geo_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 20031473556 num_examples: 99986 download_size: 19762014933 dataset_size: 20031473556 - config_name: memotion_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 2530732269 num_examples: 6991 download_size: 2528539182 dataset_size: 2530732269 - config_name: mimic_cgd_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 13184046229 num_examples: 70939 download_size: 13158485573 dataset_size: 13184046229 - config_name: mmc_instruct_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 16504686675 num_examples: 168178 download_size: 16208763930 dataset_size: 16504686675 - config_name: mmevol_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 25742258957 num_examples: 160215 download_size: 25488522892 dataset_size: 25742258957 - config_name: mmra_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 1289479228 num_examples: 1024 download_size: 1289214223 dataset_size: 1289479228 - config_name: mmsoc_memotion_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 2531495346 num_examples: 6991 download_size: 2529040189 dataset_size: 2531495346 - config_name: multihiertt_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 1378939893 num_examples: 7619 download_size: 1373206402 dataset_size: 1378939893 - config_name: nlvr2_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 23552939006 num_examples: 50426 download_size: 23498638908 dataset_size: 23552939006 - config_name: objects365_qa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 202609182505 num_examples: 1665847 download_size: 199422864779 dataset_size: 202609182505 - config_name: ocrvqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 6148694690 num_examples: 165746 download_size: 6042448366 dataset_size: 6148694690 - config_name: olmOCR-mix-0225-books_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 6633513593 num_examples: 15194 download_size: 6617041289 dataset_size: 6633513593 - config_name: olmOCR-mix-0225-documents_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 97945411922 num_examples: 228858 download_size: 97270682161 dataset_size: 97945411922 - config_name: oodvqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 4694216488 num_examples: 8488 download_size: 4693055221 dataset_size: 4694216488 - config_name: orand_car_a_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 23695905 num_examples: 1999 download_size: 23261960 dataset_size: 23695905 - config_name: pathvqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 18649764291 num_examples: 32632 download_size: 18646476625 dataset_size: 18649764291 - config_name: pdfvqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 1663171974 num_examples: 8593 download_size: 1649858023 dataset_size: 1663171974 - config_name: plotqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 8939469653 num_examples: 157070 download_size: 5318074899 dataset_size: 8939469653 - config_name: pmc_vqa(mathv360k)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 3445695928 num_examples: 35948 download_size: 3436152785 dataset_size: 3445695928 - config_name: raven_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 1734015245 num_examples: 42000 download_size: 1718269443 dataset_size: 1734015245 - config_name: rendered_text_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 11087694683 num_examples: 10000 download_size: 11087735420 dataset_size: 11087694683 - config_name: robut_sqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 691189224 num_examples: 8514 download_size: 682584203 dataset_size: 691189224 - config_name: robut_wikisql_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 6319666999 num_examples: 74989 download_size: 6288703680 dataset_size: 6319666999 - config_name: robut_wtq_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 4150731032 num_examples: 38246 download_size: 4122580618 dataset_size: 4150731032 - config_name: scienceqa(nona_context)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 1431447437 num_examples: 19208 download_size: 1424133076 dataset_size: 1431447437 - config_name: scienceqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 287033525 num_examples: 4976 download_size: 282831669 dataset_size: 287033525 - config_name: screen2words_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 1693100008 num_examples: 15730 download_size: 1345859583 dataset_size: 1693100008 - config_name: screenqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 44877713633 num_examples: 80761 download_size: 44816803742 dataset_size: 44877713633 - config_name: sharegpt4o_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 39874537033 num_examples: 57284 download_size: 39795997476 dataset_size: 39874537033 - config_name: sharegpt4v(coco)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 20028717363 num_examples: 50017 download_size: 20003659353 dataset_size: 20028717363 - config_name: sharegpt4v(knowledge)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 2405545785 num_examples: 1988 download_size: 2404938446 dataset_size: 2405545785 - config_name: sharegpt4v(llava)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 5644424725 num_examples: 29986 download_size: 5626275629 dataset_size: 5644424725 - config_name: sharegpt4v(sam)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 31591489062 num_examples: 8990 download_size: 31590918139 dataset_size: 31591489062 - config_name: sketchyvqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 462161568 num_examples: 8000 download_size: 453990099 dataset_size: 462161568 - config_name: slidevqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 4252723452 num_examples: 1919 download_size: 4193090766 dataset_size: 4252723452 - config_name: spark_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 1062709498 num_examples: 3904 download_size: 1061969529 dataset_size: 1062709498 - config_name: spatialsense_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 3539733377 num_examples: 10440 download_size: 3536888182 dataset_size: 3539733377 - config_name: spot_the_diff_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 1656404738 num_examples: 8566 download_size: 1654716690 dataset_size: 1656404738 - config_name: sroie_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 382776993 num_examples: 33616 download_size: 376277892 dataset_size: 382776993 - config_name: st_vqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 878508564 num_examples: 17247 download_size: 875109273 dataset_size: 878508564 - config_name: sujet_finance_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 4870772171 num_examples: 9801 download_size: 4857913142 dataset_size: 4870772171 - config_name: super_clevr(mathv360k)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 2494651973 num_examples: 8642 download_size: 2492739175 dataset_size: 2494651973 - config_name: svrd_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 4770519535 num_examples: 4396 download_size: 4768018864 dataset_size: 4770519535 - config_name: synthdog_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 312184613844 num_examples: 500000 download_size: 312084977571 dataset_size: 312184613844 - config_name: tabmwp(mathv360k)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 308654365 num_examples: 22452 download_size: 302949811 dataset_size: 308654365 - config_name: tabmwp_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 311512614 num_examples: 22722 download_size: 303652245 dataset_size: 311512614 - config_name: tal_ocr_eng_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 4597914663 num_examples: 256646 download_size: 4552095862 dataset_size: 4597914663 - config_name: tallyqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 4683118093 num_examples: 98680 download_size: 4658835679 dataset_size: 4683118093 - config_name: tat_dqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 56046180 num_examples: 2207 download_size: 52316261 dataset_size: 56046180 - config_name: tat_qa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 74053928 num_examples: 2199 download_size: 70738269 dataset_size: 74053928 - config_name: text_OpenMathInstruct-2_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 1397157730 num_examples: 1000000 download_size: 623924776 dataset_size: 1397157730 - config_name: text_code_feedback_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 395467554 num_examples: 66383 download_size: 171585140 dataset_size: 395467554 - config_name: text_codefeedback_filtered_instruction_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 366717666 num_examples: 156525 download_size: 176676147 dataset_size: 366717666 - config_name: text_infinitymath_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 36914765 num_examples: 101380 download_size: 17184905 dataset_size: 36914765 - config_name: text_mathinstruct_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 199703065 num_examples: 262039 download_size: 97981905 dataset_size: 199703065 - config_name: text_mathqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 309234514 num_examples: 394996 download_size: 139444290 dataset_size: 309234514 - config_name: text_mathstepdpo10k_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 5594152 num_examples: 10795 download_size: 2550939 dataset_size: 5594152 - config_name: text_numinamath_cot_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 1292196153 num_examples: 859494 download_size: 620076783 dataset_size: 1292196153 - config_name: text_openhermes_2_5_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 1573997409 num_examples: 1001551 download_size: 868338197 dataset_size: 1573997409 - config_name: text_openorca_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 6918913888 num_examples: 4233853 download_size: 4075448386 dataset_size: 6918913888 - config_name: text_orcamath_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 238325136 num_examples: 200035 download_size: 108031725 dataset_size: 238325136 - config_name: text_pythoncode25k_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 26382364 num_examples: 49626 download_size: 12283293 dataset_size: 26382364 - config_name: text_pythoncodealpaca_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 12659736 num_examples: 18612 download_size: 5549081 dataset_size: 12659736 - config_name: text_ruozhiba_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 624832 num_examples: 1496 download_size: 360638 dataset_size: 624832 - config_name: text_theoremqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 221207 num_examples: 800 download_size: 108484 dataset_size: 221207 - config_name: text_wizardlm_evol_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 135800299 num_examples: 69999 download_size: 68714788 dataset_size: 135800299 - config_name: textcaps_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 18032413337 num_examples: 21906 download_size: 18030747675 dataset_size: 18032413337 - config_name: textocr(gpt4v)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 20681375215 num_examples: 25060 download_size: 20677064872 dataset_size: 20681375215 - config_name: textvqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 2145630071 num_examples: 21943 download_size: 2138920572 dataset_size: 2145630071 - config_name: tqa_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 659074113 num_examples: 2749 download_size: 657357804 dataset_size: 659074113 - config_name: unigeo(mathv360k)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 68744641 num_examples: 11949 download_size: 65310796 dataset_size: 68744641 - config_name: ureader_cap_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 75068378076 num_examples: 91215 download_size: 75061115061 dataset_size: 75068378076 - config_name: ureader_ie_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 9192940809 num_examples: 17320 download_size: 9126327120 dataset_size: 9192940809 - config_name: ureader_kg_processed_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 19867187002 num_examples: 37550 download_size: 19843543468 dataset_size: 19867187002 - config_name: ureader_qa_processed_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 150865727147 num_examples: 252953 download_size: 150762201366 dataset_size: 150865727147 - config_name: vision_flan(filtered)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 82861381228 num_examples: 175964 download_size: 82817085515 dataset_size: 82861381228 - config_name: vistext_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 551952773 num_examples: 9969 download_size: 543840221 dataset_size: 551952773 - config_name: visual7w_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 4452152278 num_examples: 14366 download_size: 4443335051 dataset_size: 4452152278 - config_name: visualmrc_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 1765924200 num_examples: 3027 download_size: 1762162367 dataset_size: 1765924200 - config_name: visualwebinstruct(filtered)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 36324449027 num_examples: 263581 download_size: 36226672977 dataset_size: 36324449027 - config_name: vizwiz(mathv360k)_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 10145612649 num_examples: 6604 download_size: 10145487057 dataset_size: 10145612649 - config_name: vqaonbd_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 7937425918 num_examples: 39986 download_size: 7748184032 dataset_size: 7937425918 - config_name: vqarad_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 17413769 num_examples: 313 download_size: 16981770 dataset_size: 17413769 - config_name: vqav2_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 4295913718 num_examples: 82772 download_size: 4258255315 dataset_size: 4295913718 - config_name: vsr_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 108075841 num_examples: 2157 download_size: 107510141 dataset_size: 108075841 - config_name: websight_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 splits: - name: train num_bytes: 8465469926 num_examples: 10000 download_size: 8421543880 dataset_size: 8465469926 - config_name: wildvision_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 splits: - name: train num_bytes: 365457306 num_examples: 333 download_size: 365198936 dataset_size: 365457306 - config_name: wordart_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 splits: - name: train num_bytes: 2837621085 num_examples: 4804 download_size: 2837202622 dataset_size: 2837621085 - config_name: yesbut_shuffled features: - name: images list: image - name: texts list: - name: user dtype: string - name: assistant dtype: string - name: source dtype: string - name: image_correspondence_ratings list: int64 - name: image_correspondence_min dtype: int64 - name: formatting_ratings list: int64 - name: formatting_min dtype: int64 - name: visual_dependency_ratings list: int64 - name: visual_dependency_min dtype: int64 - name: relevance_ratings list: int64 - name: relevance_min dtype: int64 splits: - name: train num_bytes: 3172969719 num_examples: 4318 download_size: 3170240874 dataset_size: 3172969719 configs: - config_name: CoSyn_400k_chart_shuffled data_files: - split: train path: CoSyn_400k_chart_shuffled/train-* - config_name: CoSyn_400k_chemical_shuffled data_files: - split: train path: CoSyn_400k_chemical_shuffled/train-* - config_name: CoSyn_400k_circuit_shuffled data_files: - split: train path: CoSyn_400k_circuit_shuffled/train-* - config_name: CoSyn_400k_diagram_shuffled data_files: - split: train path: CoSyn_400k_diagram_shuffled/train-* - config_name: CoSyn_400k_document_shuffled data_files: - split: train path: CoSyn_400k_document_shuffled/train-* - config_name: CoSyn_400k_graphic_shuffled data_files: - split: train path: CoSyn_400k_graphic_shuffled/train-* - config_name: CoSyn_400k_math_shuffled data_files: - split: train path: CoSyn_400k_math_shuffled/train-* - config_name: CoSyn_400k_music_shuffled data_files: - split: train path: CoSyn_400k_music_shuffled/train-* - config_name: CoSyn_400k_nutrition_shuffled data_files: - split: train path: CoSyn_400k_nutrition_shuffled/train-* - config_name: CoSyn_400k_table_shuffled data_files: - split: train path: CoSyn_400k_table_shuffled/train-* - config_name: LLaVA_Instruct_150K_shuffled data_files: - split: train path: LLaVA_Instruct_150K_shuffled/train-* - config_name: SynthChartNet_shuffled data_files: - split: train path: SynthChartNet_shuffled/train-* - config_name: SynthCodeNet_shuffled data_files: - split: train path: SynthCodeNet_shuffled/train-* - config_name: SynthFormulaNet_shuffled data_files: - split: train path: SynthFormulaNet_shuffled/train-* - config_name: Unichart_shuffled data_files: - split: train path: Unichart_shuffled/train-* - config_name: a_okvqa_shuffled data_files: - split: train path: a_okvqa_shuffled/train-* - config_name: aguvis-stage-1_shuffled data_files: - split: train path: aguvis-stage-1_shuffled/train-* - config_name: ai2d_merged_shuffled data_files: - split: train path: ai2d_merged_shuffled/train-* - config_name: alfworldgpt_shuffled data_files: - split: train path: alfworldgpt_shuffled/train-* - config_name: allava_laion_shuffled data_files: - split: train path: allava_laion_shuffled/train-* - config_name: allava_vflan_shuffled data_files: - split: train path: allava_vflan_shuffled/train-* - config_name: aokvqa_shuffled data_files: - split: train path: aokvqa_shuffled/train-* - config_name: art_shuffled data_files: - split: train path: art_shuffled/train-* - config_name: arxivqa_shuffled data_files: - split: train path: arxivqa_shuffled/train-* - config_name: bentham_shuffled data_files: - split: train path: bentham_shuffled/train-* - config_name: blockdiagramcomputerized_shuffled data_files: - split: train path: blockdiagramcomputerized_shuffled/train-* - config_name: blockdiagramhandwritten_shuffled data_files: - split: train path: blockdiagramhandwritten_shuffled/train-* - config_name: cambrian(filtered)_processed_shuffled data_files: - split: train path: cambrian(filtered)_processed_shuffled/train-* - config_name: captcha_shuffled data_files: - split: train path: captcha_shuffled/train-* - config_name: chart2text_shuffled data_files: - split: train path: chart2text_shuffled/train-* - config_name: chartqa_shuffled data_files: - split: train path: chartqa_shuffled/train-* - config_name: chinesememe_shuffled data_files: - split: train path: chinesememe_shuffled/train-* - config_name: chrome_writting_shuffled data_files: - split: train path: chrome_writting_shuffled/train-* - config_name: clevr_math(mathv360k)_shuffled data_files: - split: train path: clevr_math(mathv360k)_shuffled/train-* - config_name: clevr_math_shuffled data_files: - split: train path: clevr_math_shuffled/train-* - config_name: clevr_shuffled data_files: - split: train path: clevr_shuffled/train-* - config_name: coco_colors_shuffled data_files: - split: train path: coco_colors_shuffled/train-* - config_name: cocoqa_shuffled data_files: - split: train path: cocoqa_shuffled/train-* - config_name: cocotext_shuffled data_files: - split: train path: cocotext_shuffled/train-* - config_name: ctw_shuffled data_files: - split: train path: ctw_shuffled/train-* - config_name: datik_shuffled data_files: - split: train path: datik_shuffled/train-* - config_name: datikz_shuffled data_files: - split: train path: datikz_shuffled/train-* - config_name: densefusion_1m_shuffled data_files: - split: train path: densefusion_1m_shuffled/train-* - config_name: diagram_image_to_text_shuffled data_files: - split: train path: diagram_image_to_text_shuffled/train-* - config_name: docvqa_shuffled data_files: - split: train path: docvqa_shuffled/train-* - config_name: drivelm_shuffled data_files: - split: train path: drivelm_shuffled/train-* - config_name: dvqa_shuffled data_files: - split: train path: dvqa_shuffled/train-* - config_name: est_vqa_shuffled data_files: - split: train path: est_vqa_shuffled/train-* - config_name: face_emotion_shuffled data_files: - split: train path: face_emotion_shuffled/train-* - config_name: figureqa(mathv360k)_shuffled data_files: - split: train path: figureqa(mathv360k)_shuffled/train-* - config_name: figureqa_shuffled data_files: - split: train path: figureqa_shuffled/train-* - config_name: finqa_shuffled data_files: - split: train path: finqa_shuffled/train-* - config_name: funsd_shuffled data_files: - split: train path: funsd_shuffled/train-* - config_name: geo170k(align)_shuffled data_files: - split: train path: geo170k(align)_shuffled/train-* - config_name: geo170k(qa)_shuffled data_files: - split: train path: geo170k(qa)_shuffled/train-* - config_name: geo3k_shuffled data_files: - split: train path: geo3k_shuffled/train-* - config_name: geometry3k(mathv360k)_shuffled data_files: - split: train path: geometry3k(mathv360k)_shuffled/train-* - config_name: geomverse_shuffled data_files: - split: train path: geomverse_shuffled/train-* - config_name: geoqa+(mathv360k)_shuffled data_files: - split: train path: geoqa+(mathv360k)_shuffled/train-* - config_name: geos(mathv360k)_shuffled data_files: - split: train path: geos(mathv360k)_shuffled/train-* - config_name: google_landmarks_shuffled data_files: - split: train path: google_landmarks_shuffled/train-* - config_name: groundui_shuffled data_files: - split: train path: groundui_shuffled/train-* - config_name: handwriting_forms_shuffled data_files: - split: train path: handwriting_forms_shuffled/train-* - config_name: hateful_memes_shuffled data_files: - split: train path: hateful_memes_shuffled/train-* - config_name: hitab_shuffled data_files: - split: train path: hitab_shuffled/train-* - config_name: hme100k_shuffled data_files: - split: train path: hme100k_shuffled/train-* - config_name: hw_squad_shuffled data_files: - split: train path: hw_squad_shuffled/train-* - config_name: iam_shuffled data_files: - split: train path: iam_shuffled/train-* - config_name: iconqa(mathv360k)_shuffled data_files: - split: train path: iconqa(mathv360k)_shuffled/train-* - config_name: iconqa_shuffled data_files: - split: train path: iconqa_shuffled/train-* - config_name: idk_shuffled data_files: - split: train path: idk_shuffled/train-* - config_name: iiit5k_shuffled data_files: - split: train path: iiit5k_shuffled/train-* - config_name: image_textualization(filtered)_shuffled data_files: - split: train path: image_textualization(filtered)_shuffled/train-* - config_name: imgur5k_shuffled data_files: - split: train path: imgur5k_shuffled/train-* - config_name: indoor_qa_shuffled data_files: - split: train path: indoor_qa_shuffled/train-* - config_name: infographic(gpt4v)_shuffled data_files: - split: train path: infographic(gpt4v)_shuffled/train-* - config_name: infographic_vqa_llava_format_shuffled data_files: - split: train path: infographic_vqa_llava_format_shuffled/train-* - config_name: infographic_vqa_shuffled data_files: - split: train path: infographic_vqa_shuffled/train-* - config_name: intergps_shuffled data_files: - split: train path: intergps_shuffled/train-* - config_name: invoices_receipts_shuffled data_files: - split: train path: invoices_receipts_shuffled/train-* - config_name: k12_printing_shuffled data_files: - split: train path: k12_printing_shuffled/train-* - config_name: laion_gpt4v_shuffled data_files: - split: train path: laion_gpt4v_shuffled/train-* - config_name: latex_handwritten_shuffled data_files: - split: train path: latex_handwritten_shuffled/train-* - config_name: latexformulas_shuffled data_files: - split: train path: latexformulas_shuffled/train-* - config_name: llavar_gpt4_20k_shuffled data_files: - split: train path: llavar_gpt4_20k_shuffled/train-* - config_name: lnqa_shuffled data_files: - split: train path: lnqa_shuffled/train-* - config_name: localized_narratives_shuffled data_files: - split: train path: localized_narratives_shuffled/train-* - config_name: lrv_chart_shuffled data_files: - split: train path: lrv_chart_shuffled/train-* - config_name: lrv_normal(filtered)_shuffled data_files: - split: train path: lrv_normal(filtered)_shuffled/train-* - config_name: lvis_instruct4v_shuffled data_files: - split: train path: lvis_instruct4v_shuffled/train-* - config_name: mapqa(mathv360k)_shuffled data_files: - split: train path: mapqa(mathv360k)_shuffled/train-* - config_name: mapqa_shuffled data_files: - split: train path: mapqa_shuffled/train-* - config_name: maptext_shuffled data_files: - split: train path: maptext_shuffled/train-* - config_name: mathwriting-google_shuffled data_files: - split: train path: mathwriting-google_shuffled/train-* - config_name: mavis_math_metagen_shuffled data_files: - split: train path: mavis_math_metagen_shuffled/train-* - config_name: mavis_math_rule_geo_shuffled data_files: - split: train path: mavis_math_rule_geo_shuffled/train-* - config_name: memotion_shuffled data_files: - split: train path: memotion_shuffled/train-* - config_name: mimic_cgd_shuffled data_files: - split: train path: mimic_cgd_shuffled/train-* - config_name: mmc_instruct_shuffled data_files: - split: train path: mmc_instruct_shuffled/train-* - config_name: mmevol_shuffled data_files: - split: train path: mmevol_shuffled/train-* - config_name: mmra_shuffled data_files: - split: train path: mmra_shuffled/train-* - config_name: mmsoc_memotion_shuffled data_files: - split: train path: mmsoc_memotion_shuffled/train-* - config_name: multihiertt_shuffled data_files: - split: train path: multihiertt_shuffled/train-* - config_name: nlvr2_shuffled data_files: - split: train path: nlvr2_shuffled/train-* - config_name: objects365_qa_shuffled data_files: - split: train path: objects365_qa_shuffled/train-* - config_name: ocrvqa_shuffled data_files: - split: train path: ocrvqa_shuffled/train-* - config_name: olmOCR-mix-0225-books_shuffled data_files: - split: train path: olmOCR-mix-0225-books_shuffled/train-* - config_name: olmOCR-mix-0225-documents_shuffled data_files: - split: train path: olmOCR-mix-0225-documents_shuffled/train-* - config_name: oodvqa_shuffled data_files: - split: train path: oodvqa_shuffled/train-* - config_name: orand_car_a_shuffled data_files: - split: train path: orand_car_a_shuffled/train-* - config_name: pathvqa_shuffled data_files: - split: train path: pathvqa_shuffled/train-* - config_name: pdfvqa_shuffled data_files: - split: train path: pdfvqa_shuffled/train-* - config_name: plotqa_shuffled data_files: - split: train path: plotqa_shuffled/train-* - config_name: pmc_vqa(mathv360k)_shuffled data_files: - split: train path: pmc_vqa(mathv360k)_shuffled/train-* - config_name: raven_shuffled data_files: - split: train path: raven_shuffled/train-* - config_name: rendered_text_shuffled data_files: - split: train path: rendered_text_shuffled/train-* - config_name: robut_sqa_shuffled data_files: - split: train path: robut_sqa_shuffled/train-* - config_name: robut_wikisql_shuffled data_files: - split: train path: robut_wikisql_shuffled/train-* - config_name: robut_wtq_shuffled data_files: - split: train path: robut_wtq_shuffled/train-* - config_name: scienceqa(nona_context)_shuffled data_files: - split: train path: scienceqa(nona_context)_shuffled/train-* - config_name: scienceqa_shuffled data_files: - split: train path: scienceqa_shuffled/train-* - config_name: screen2words_shuffled data_files: - split: train path: screen2words_shuffled/train-* - config_name: screenqa_shuffled data_files: - split: train path: screenqa_shuffled/train-* - config_name: sharegpt4o_shuffled data_files: - split: train path: sharegpt4o_shuffled/train-* - config_name: sharegpt4v(coco)_shuffled data_files: - split: train path: sharegpt4v(coco)_shuffled/train-* - config_name: sharegpt4v(knowledge)_shuffled data_files: - split: train path: sharegpt4v(knowledge)_shuffled/train-* - config_name: sharegpt4v(llava)_shuffled data_files: - split: train path: sharegpt4v(llava)_shuffled/train-* - config_name: sharegpt4v(sam)_shuffled data_files: - split: train path: sharegpt4v(sam)_shuffled/train-* - config_name: sketchyvqa_shuffled data_files: - split: train path: sketchyvqa_shuffled/train-* - config_name: slidevqa_shuffled data_files: - split: train path: slidevqa_shuffled/train-* - config_name: spark_shuffled data_files: - split: train path: spark_shuffled/train-* - config_name: spatialsense_shuffled data_files: - split: train path: spatialsense_shuffled/train-* - config_name: spot_the_diff_shuffled data_files: - split: train path: spot_the_diff_shuffled/train-* - config_name: sroie_shuffled data_files: - split: train path: sroie_shuffled/train-* - config_name: st_vqa_shuffled data_files: - split: train path: st_vqa_shuffled/train-* - config_name: sujet_finance_shuffled data_files: - split: train path: sujet_finance_shuffled/train-* - config_name: super_clevr(mathv360k)_shuffled data_files: - split: train path: super_clevr(mathv360k)_shuffled/train-* - config_name: svrd_shuffled data_files: - split: train path: svrd_shuffled/train-* - config_name: synthdog_shuffled data_files: - split: train path: synthdog_shuffled/train-* - config_name: tabmwp(mathv360k)_shuffled data_files: - split: train path: tabmwp(mathv360k)_shuffled/train-* - config_name: tabmwp_shuffled data_files: - split: train path: tabmwp_shuffled/train-* - config_name: tal_ocr_eng_shuffled data_files: - split: train path: tal_ocr_eng_shuffled/train-* - config_name: tallyqa_shuffled data_files: - split: train path: tallyqa_shuffled/train-* - config_name: tat_dqa_shuffled data_files: - split: train path: tat_dqa_shuffled/train-* - config_name: tat_qa_shuffled data_files: - split: train path: tat_qa_shuffled/train-* - config_name: text_OpenMathInstruct-2_shuffled data_files: - split: train path: text_OpenMathInstruct-2_shuffled/train-* - config_name: text_code_feedback_shuffled data_files: - split: train path: text_code_feedback_shuffled/train-* - config_name: text_codefeedback_filtered_instruction_shuffled data_files: - split: train path: text_codefeedback_filtered_instruction_shuffled/train-* - config_name: text_infinitymath_shuffled data_files: - split: train path: text_infinitymath_shuffled/train-* - config_name: text_mathinstruct_shuffled data_files: - split: train path: text_mathinstruct_shuffled/train-* - config_name: text_mathqa_shuffled data_files: - split: train path: text_mathqa_shuffled/train-* - config_name: text_mathstepdpo10k_shuffled data_files: - split: train path: text_mathstepdpo10k_shuffled/train-* - config_name: text_numinamath_cot_shuffled data_files: - split: train path: text_numinamath_cot_shuffled/train-* - config_name: text_openhermes_2_5_shuffled data_files: - split: train path: text_openhermes_2_5_shuffled/train-* - config_name: text_openorca_shuffled data_files: - split: train path: text_openorca_shuffled/train-* - config_name: text_orcamath_shuffled data_files: - split: train path: text_orcamath_shuffled/train-* - config_name: text_pythoncode25k_shuffled data_files: - split: train path: text_pythoncode25k_shuffled/train-* - config_name: text_pythoncodealpaca_shuffled data_files: - split: train path: text_pythoncodealpaca_shuffled/train-* - config_name: text_ruozhiba_shuffled data_files: - split: train path: text_ruozhiba_shuffled/train-* - config_name: text_theoremqa_shuffled data_files: - split: train path: text_theoremqa_shuffled/train-* - config_name: text_wizardlm_evol_shuffled data_files: - split: train path: text_wizardlm_evol_shuffled/train-* - config_name: textcaps_shuffled data_files: - split: train path: textcaps_shuffled/train-* - config_name: textocr(gpt4v)_shuffled data_files: - split: train path: textocr(gpt4v)_shuffled/train-* - config_name: textvqa_shuffled data_files: - split: train path: textvqa_shuffled/train-* - config_name: tqa_shuffled data_files: - split: train path: tqa_shuffled/train-* - config_name: unigeo(mathv360k)_shuffled data_files: - split: train path: unigeo(mathv360k)_shuffled/train-* - config_name: ureader_cap_shuffled data_files: - split: train path: ureader_cap_shuffled/train-* - config_name: ureader_ie_shuffled data_files: - split: train path: ureader_ie_shuffled/train-* - config_name: ureader_kg_processed_shuffled data_files: - split: train path: ureader_kg_processed_shuffled/train-* - config_name: ureader_qa_processed_shuffled data_files: - split: train path: ureader_qa_processed_shuffled/train-* - config_name: vision_flan(filtered)_shuffled data_files: - split: train path: vision_flan(filtered)_shuffled/train-* - config_name: vistext_shuffled data_files: - split: train path: vistext_shuffled/train-* - config_name: visual7w_shuffled data_files: - split: train path: visual7w_shuffled/train-* - config_name: visualmrc_shuffled data_files: - split: train path: visualmrc_shuffled/train-* - config_name: visualwebinstruct(filtered)_shuffled data_files: - split: train path: visualwebinstruct(filtered)_shuffled/train-* - config_name: vizwiz(mathv360k)_shuffled data_files: - split: train path: vizwiz(mathv360k)_shuffled/train-* - config_name: vqaonbd_shuffled data_files: - split: train path: vqaonbd_shuffled/train-* - config_name: vqarad_shuffled data_files: - split: train path: vqarad_shuffled/train-* - config_name: vqav2_shuffled data_files: - split: train path: vqav2_shuffled/train-* - config_name: vsr_shuffled data_files: - split: train path: vsr_shuffled/train-* - config_name: websight_shuffled data_files: - split: train path: websight_shuffled/train-* - config_name: wildvision_shuffled data_files: - split: train path: wildvision_shuffled/train-* - config_name: wordart_shuffled data_files: - split: train path: wordart_shuffled/train-* - config_name: yesbut_shuffled data_files: - split: train path: yesbut_shuffled/train-* ---
提供机构:
ifx-pse-sys-ml
5,000+
优质数据集
54 个
任务类型
进入经典数据集
二维码
社区交流群

面向社区/商业的数据集话题

二维码
科研交流群

面向高校/科研机构的开源数据集话题

数据驱动未来

携手共赢发展

商业合作