cyzcz233/bbh-cot
收藏Hugging Face2025-12-11 更新2025-12-20 收录
下载链接:
https://hf-mirror.com/datasets/cyzcz233/bbh-cot
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: boolean_expressions
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 203913
num_examples: 250
download_size: 54373
dataset_size: 203913
- config_name: causal_judgement
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 482513
num_examples: 187
download_size: 197017
dataset_size: 482513
- config_name: date_understanding
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 226821
num_examples: 250
download_size: 87034
dataset_size: 226821
- config_name: disambiguation_qa
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 404834
num_examples: 250
download_size: 140241
dataset_size: 404834
- config_name: dyck_languages
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 314190
num_examples: 250
download_size: 112374
dataset_size: 314190
- config_name: formal_fallacies
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 569642
num_examples: 250
download_size: 165128
dataset_size: 569642
- config_name: geometric_shapes
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 414655
num_examples: 250
download_size: 159841
dataset_size: 414655
- config_name: hyperbaton
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 337794
num_examples: 250
download_size: 95994
dataset_size: 337794
- config_name: logical_deduction_five_objects
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 518939
num_examples: 250
download_size: 154780
dataset_size: 518939
- config_name: logical_deduction_seven_objects
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 606840
num_examples: 250
download_size: 184936
dataset_size: 606840
- config_name: logical_deduction_three_objects
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 326208
num_examples: 250
download_size: 87961
dataset_size: 326208
- config_name: movie_recommendation
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 487418
num_examples: 250
download_size: 205765
dataset_size: 487418
- config_name: multistep_arithmetic_two
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 221027
num_examples: 250
download_size: 77085
dataset_size: 221027
- config_name: navigate
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 261605
num_examples: 250
download_size: 74631
dataset_size: 261605
- config_name: object_counting
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 144982
num_examples: 250
download_size: 52903
dataset_size: 144982
- config_name: penguins_in_a_table
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 192824
num_examples: 146
download_size: 52726
dataset_size: 192824
- config_name: reasoning_about_colored_objects
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 273177
num_examples: 250
download_size: 81884
dataset_size: 273177
- config_name: ruin_names
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 318465
num_examples: 250
download_size: 123836
dataset_size: 318465
- config_name: salient_translation_error_detection
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 635521
num_examples: 250
download_size: 199758
dataset_size: 635521
- config_name: snarks
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 232381
num_examples: 178
download_size: 97225
dataset_size: 232381
- config_name: sports_understanding
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 370852
num_examples: 250
download_size: 163220
dataset_size: 370852
- config_name: temporal_sequences
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 468207
num_examples: 250
download_size: 126152
dataset_size: 468207
- config_name: tracking_shuffled_objects_five_objects
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 538984
num_examples: 250
download_size: 123752
dataset_size: 538984
- config_name: tracking_shuffled_objects_seven_objects
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 614134
num_examples: 250
download_size: 150589
dataset_size: 614134
- config_name: tracking_shuffled_objects_three_objects
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 385134
num_examples: 250
download_size: 88076
dataset_size: 385134
- config_name: web_of_lies
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 352110
num_examples: 250
download_size: 103647
dataset_size: 352110
- config_name: word_sorting
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 330756
num_examples: 250
download_size: 162409
dataset_size: 330756
configs:
- config_name: boolean_expressions
data_files:
- split: test
path: boolean_expressions/test-*
- config_name: causal_judgement
data_files:
- split: test
path: causal_judgement/test-*
- config_name: date_understanding
data_files:
- split: test
path: date_understanding/test-*
- config_name: disambiguation_qa
data_files:
- split: test
path: disambiguation_qa/test-*
- config_name: dyck_languages
data_files:
- split: test
path: dyck_languages/test-*
- config_name: formal_fallacies
data_files:
- split: test
path: formal_fallacies/test-*
- config_name: geometric_shapes
data_files:
- split: test
path: geometric_shapes/test-*
- config_name: hyperbaton
data_files:
- split: test
path: hyperbaton/test-*
- config_name: logical_deduction_five_objects
data_files:
- split: test
path: logical_deduction_five_objects/test-*
- config_name: logical_deduction_seven_objects
data_files:
- split: test
path: logical_deduction_seven_objects/test-*
- config_name: logical_deduction_three_objects
data_files:
- split: test
path: logical_deduction_three_objects/test-*
- config_name: movie_recommendation
data_files:
- split: test
path: movie_recommendation/test-*
- config_name: multistep_arithmetic_two
data_files:
- split: test
path: multistep_arithmetic_two/test-*
- config_name: navigate
data_files:
- split: test
path: navigate/test-*
- config_name: object_counting
data_files:
- split: test
path: object_counting/test-*
- config_name: penguins_in_a_table
data_files:
- split: test
path: penguins_in_a_table/test-*
- config_name: reasoning_about_colored_objects
data_files:
- split: test
path: reasoning_about_colored_objects/test-*
- config_name: ruin_names
data_files:
- split: test
path: ruin_names/test-*
- config_name: salient_translation_error_detection
data_files:
- split: test
path: salient_translation_error_detection/test-*
- config_name: snarks
data_files:
- split: test
path: snarks/test-*
- config_name: sports_understanding
data_files:
- split: test
path: sports_understanding/test-*
- config_name: temporal_sequences
data_files:
- split: test
path: temporal_sequences/test-*
- config_name: tracking_shuffled_objects_five_objects
data_files:
- split: test
path: tracking_shuffled_objects_five_objects/test-*
- config_name: tracking_shuffled_objects_seven_objects
data_files:
- split: test
path: tracking_shuffled_objects_seven_objects/test-*
- config_name: tracking_shuffled_objects_three_objects
data_files:
- split: test
path: tracking_shuffled_objects_three_objects/test-*
- config_name: web_of_lies
data_files:
- split: test
path: web_of_lies/test-*
- config_name: word_sorting
data_files:
- split: test
path: word_sorting/test-*
---
提供机构:
cyzcz233



