Name: cyzcz233/bbh-cot
Creator: cyzcz233
Published: 2025-12-11 05:32:16
License: 暂无描述

下载链接：

https://hf-mirror.com/datasets/cyzcz233/bbh-cot

下载链接

链接失效反馈

官方服务：

资源简介：

--- dataset_info: - config_name: boolean_expressions features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 203913 num_examples: 250 download_size: 54373 dataset_size: 203913 - config_name: causal_judgement features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 482513 num_examples: 187 download_size: 197017 dataset_size: 482513 - config_name: date_understanding features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 226821 num_examples: 250 download_size: 87034 dataset_size: 226821 - config_name: disambiguation_qa features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 404834 num_examples: 250 download_size: 140241 dataset_size: 404834 - config_name: dyck_languages features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 314190 num_examples: 250 download_size: 112374 dataset_size: 314190 - config_name: formal_fallacies features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 569642 num_examples: 250 download_size: 165128 dataset_size: 569642 - config_name: geometric_shapes features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 414655 num_examples: 250 download_size: 159841 dataset_size: 414655 - config_name: hyperbaton features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 337794 num_examples: 250 download_size: 95994 dataset_size: 337794 - config_name: logical_deduction_five_objects features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 518939 num_examples: 250 download_size: 154780 dataset_size: 518939 - config_name: logical_deduction_seven_objects features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 606840 num_examples: 250 download_size: 184936 dataset_size: 606840 - config_name: logical_deduction_three_objects features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 326208 num_examples: 250 download_size: 87961 dataset_size: 326208 - config_name: movie_recommendation features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 487418 num_examples: 250 download_size: 205765 dataset_size: 487418 - config_name: multistep_arithmetic_two features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 221027 num_examples: 250 download_size: 77085 dataset_size: 221027 - config_name: navigate features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 261605 num_examples: 250 download_size: 74631 dataset_size: 261605 - config_name: object_counting features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 144982 num_examples: 250 download_size: 52903 dataset_size: 144982 - config_name: penguins_in_a_table features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 192824 num_examples: 146 download_size: 52726 dataset_size: 192824 - config_name: reasoning_about_colored_objects features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 273177 num_examples: 250 download_size: 81884 dataset_size: 273177 - config_name: ruin_names features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 318465 num_examples: 250 download_size: 123836 dataset_size: 318465 - config_name: salient_translation_error_detection features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 635521 num_examples: 250 download_size: 199758 dataset_size: 635521 - config_name: snarks features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 232381 num_examples: 178 download_size: 97225 dataset_size: 232381 - config_name: sports_understanding features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 370852 num_examples: 250 download_size: 163220 dataset_size: 370852 - config_name: temporal_sequences features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 468207 num_examples: 250 download_size: 126152 dataset_size: 468207 - config_name: tracking_shuffled_objects_five_objects features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 538984 num_examples: 250 download_size: 123752 dataset_size: 538984 - config_name: tracking_shuffled_objects_seven_objects features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 614134 num_examples: 250 download_size: 150589 dataset_size: 614134 - config_name: tracking_shuffled_objects_three_objects features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 385134 num_examples: 250 download_size: 88076 dataset_size: 385134 - config_name: web_of_lies features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 352110 num_examples: 250 download_size: 103647 dataset_size: 352110 - config_name: word_sorting features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 330756 num_examples: 250 download_size: 162409 dataset_size: 330756 configs: - config_name: boolean_expressions data_files: - split: test path: boolean_expressions/test-* - config_name: causal_judgement data_files: - split: test path: causal_judgement/test-* - config_name: date_understanding data_files: - split: test path: date_understanding/test-* - config_name: disambiguation_qa data_files: - split: test path: disambiguation_qa/test-* - config_name: dyck_languages data_files: - split: test path: dyck_languages/test-* - config_name: formal_fallacies data_files: - split: test path: formal_fallacies/test-* - config_name: geometric_shapes data_files: - split: test path: geometric_shapes/test-* - config_name: hyperbaton data_files: - split: test path: hyperbaton/test-* - config_name: logical_deduction_five_objects data_files: - split: test path: logical_deduction_five_objects/test-* - config_name: logical_deduction_seven_objects data_files: - split: test path: logical_deduction_seven_objects/test-* - config_name: logical_deduction_three_objects data_files: - split: test path: logical_deduction_three_objects/test-* - config_name: movie_recommendation data_files: - split: test path: movie_recommendation/test-* - config_name: multistep_arithmetic_two data_files: - split: test path: multistep_arithmetic_two/test-* - config_name: navigate data_files: - split: test path: navigate/test-* - config_name: object_counting data_files: - split: test path: object_counting/test-* - config_name: penguins_in_a_table data_files: - split: test path: penguins_in_a_table/test-* - config_name: reasoning_about_colored_objects data_files: - split: test path: reasoning_about_colored_objects/test-* - config_name: ruin_names data_files: - split: test path: ruin_names/test-* - config_name: salient_translation_error_detection data_files: - split: test path: salient_translation_error_detection/test-* - config_name: snarks data_files: - split: test path: snarks/test-* - config_name: sports_understanding data_files: - split: test path: sports_understanding/test-* - config_name: temporal_sequences data_files: - split: test path: temporal_sequences/test-* - config_name: tracking_shuffled_objects_five_objects data_files: - split: test path: tracking_shuffled_objects_five_objects/test-* - config_name: tracking_shuffled_objects_seven_objects data_files: - split: test path: tracking_shuffled_objects_seven_objects/test-* - config_name: tracking_shuffled_objects_three_objects data_files: - split: test path: tracking_shuffled_objects_three_objects/test-* - config_name: web_of_lies data_files: - split: test path: web_of_lies/test-* - config_name: word_sorting data_files: - split: test path: word_sorting/test-* ---

应用场景：