Name: cyzcz233/bbh-gpt-cot
Creator: cyzcz233
Published: 2025-12-10 06:43:46
License: 暂无描述

下载链接：

https://hf-mirror.com/datasets/cyzcz233/bbh-gpt-cot

下载链接

链接失效反馈

官方服务：

资源简介：

--- dataset_info: - config_name: boolean_expressions features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 203913 num_examples: 250 download_size: 54373 dataset_size: 203913 - config_name: causal_judgement features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 540339 num_examples: 187 download_size: 224963 dataset_size: 540339 - config_name: date_understanding features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 249517 num_examples: 250 download_size: 102564 dataset_size: 249517 - config_name: disambiguation_qa features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 410707 num_examples: 250 download_size: 149298 dataset_size: 410707 - config_name: dyck_languages features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 500348 num_examples: 250 download_size: 186526 dataset_size: 500348 - config_name: formal_fallacies features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 641320 num_examples: 250 download_size: 197495 dataset_size: 641320 - config_name: geometric_shapes features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 484284 num_examples: 250 download_size: 194230 dataset_size: 484284 - config_name: hyperbaton features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 340757 num_examples: 250 download_size: 98685 dataset_size: 340757 - config_name: logical_deduction_five_objects features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 520708 num_examples: 250 download_size: 155657 dataset_size: 520708 - config_name: logical_deduction_seven_objects features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 658846 num_examples: 250 download_size: 211726 dataset_size: 658846 - config_name: logical_deduction_three_objects features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 326331 num_examples: 250 download_size: 87993 dataset_size: 326331 - config_name: movie_recommendation features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 537067 num_examples: 250 download_size: 232620 dataset_size: 537067 - config_name: multistep_arithmetic_two features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 221027 num_examples: 250 download_size: 77085 dataset_size: 221027 - config_name: navigate features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 261605 num_examples: 250 download_size: 74631 dataset_size: 261605 - config_name: object_counting features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 144982 num_examples: 250 download_size: 52903 dataset_size: 144982 - config_name: penguins_in_a_table features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 192824 num_examples: 146 download_size: 52726 dataset_size: 192824 - config_name: reasoning_about_colored_objects features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 273247 num_examples: 250 download_size: 81981 dataset_size: 273247 - config_name: ruin_names features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 323649 num_examples: 250 download_size: 130210 dataset_size: 323649 - config_name: salient_translation_error_detection features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 679551 num_examples: 250 download_size: 219882 dataset_size: 679551 - config_name: snarks features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 234973 num_examples: 178 download_size: 102300 dataset_size: 234973 - config_name: sports_understanding features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 379457 num_examples: 250 download_size: 172266 dataset_size: 379457 - config_name: temporal_sequences features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 468207 num_examples: 250 download_size: 126152 dataset_size: 468207 - config_name: tracking_shuffled_objects_five_objects features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 538984 num_examples: 250 download_size: 123752 dataset_size: 538984 - config_name: tracking_shuffled_objects_seven_objects features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 688425 num_examples: 250 download_size: 163307 dataset_size: 688425 - config_name: tracking_shuffled_objects_three_objects features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 385134 num_examples: 250 download_size: 88076 dataset_size: 385134 - config_name: web_of_lies features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 370423 num_examples: 250 download_size: 110240 dataset_size: 370423 - config_name: word_sorting features: - name: task dtype: string - name: input dtype: string - name: answer dtype: string - name: label dtype: string - name: model_final_answer dtype: string - name: is_correct dtype: bool splits: - name: test num_bytes: 338792 num_examples: 250 download_size: 169870 dataset_size: 338792 configs: - config_name: boolean_expressions data_files: - split: test path: boolean_expressions/test-* - config_name: causal_judgement data_files: - split: test path: causal_judgement/test-* - config_name: date_understanding data_files: - split: test path: date_understanding/test-* - config_name: disambiguation_qa data_files: - split: test path: disambiguation_qa/test-* - config_name: dyck_languages data_files: - split: test path: dyck_languages/test-* - config_name: formal_fallacies data_files: - split: test path: formal_fallacies/test-* - config_name: geometric_shapes data_files: - split: test path: geometric_shapes/test-* - config_name: hyperbaton data_files: - split: test path: hyperbaton/test-* - config_name: logical_deduction_five_objects data_files: - split: test path: logical_deduction_five_objects/test-* - config_name: logical_deduction_seven_objects data_files: - split: test path: logical_deduction_seven_objects/test-* - config_name: logical_deduction_three_objects data_files: - split: test path: logical_deduction_three_objects/test-* - config_name: movie_recommendation data_files: - split: test path: movie_recommendation/test-* - config_name: multistep_arithmetic_two data_files: - split: test path: multistep_arithmetic_two/test-* - config_name: navigate data_files: - split: test path: navigate/test-* - config_name: object_counting data_files: - split: test path: object_counting/test-* - config_name: penguins_in_a_table data_files: - split: test path: penguins_in_a_table/test-* - config_name: reasoning_about_colored_objects data_files: - split: test path: reasoning_about_colored_objects/test-* - config_name: ruin_names data_files: - split: test path: ruin_names/test-* - config_name: salient_translation_error_detection data_files: - split: test path: salient_translation_error_detection/test-* - config_name: snarks data_files: - split: test path: snarks/test-* - config_name: sports_understanding data_files: - split: test path: sports_understanding/test-* - config_name: temporal_sequences data_files: - split: test path: temporal_sequences/test-* - config_name: tracking_shuffled_objects_five_objects data_files: - split: test path: tracking_shuffled_objects_five_objects/test-* - config_name: tracking_shuffled_objects_seven_objects data_files: - split: test path: tracking_shuffled_objects_seven_objects/test-* - config_name: tracking_shuffled_objects_three_objects data_files: - split: test path: tracking_shuffled_objects_three_objects/test-* - config_name: web_of_lies data_files: - split: test path: web_of_lies/test-* - config_name: word_sorting data_files: - split: test path: word_sorting/test-* ---

应用场景：