cyzcz233/bbh-gpt-cot
收藏Hugging Face2025-12-10 更新2025-12-20 收录
下载链接:
https://hf-mirror.com/datasets/cyzcz233/bbh-gpt-cot
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: boolean_expressions
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 203913
num_examples: 250
download_size: 54373
dataset_size: 203913
- config_name: causal_judgement
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 540339
num_examples: 187
download_size: 224963
dataset_size: 540339
- config_name: date_understanding
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 249517
num_examples: 250
download_size: 102564
dataset_size: 249517
- config_name: disambiguation_qa
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 410707
num_examples: 250
download_size: 149298
dataset_size: 410707
- config_name: dyck_languages
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 500348
num_examples: 250
download_size: 186526
dataset_size: 500348
- config_name: formal_fallacies
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 641320
num_examples: 250
download_size: 197495
dataset_size: 641320
- config_name: geometric_shapes
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 484284
num_examples: 250
download_size: 194230
dataset_size: 484284
- config_name: hyperbaton
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 340757
num_examples: 250
download_size: 98685
dataset_size: 340757
- config_name: logical_deduction_five_objects
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 520708
num_examples: 250
download_size: 155657
dataset_size: 520708
- config_name: logical_deduction_seven_objects
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 658846
num_examples: 250
download_size: 211726
dataset_size: 658846
- config_name: logical_deduction_three_objects
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 326331
num_examples: 250
download_size: 87993
dataset_size: 326331
- config_name: movie_recommendation
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 537067
num_examples: 250
download_size: 232620
dataset_size: 537067
- config_name: multistep_arithmetic_two
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 221027
num_examples: 250
download_size: 77085
dataset_size: 221027
- config_name: navigate
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 261605
num_examples: 250
download_size: 74631
dataset_size: 261605
- config_name: object_counting
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 144982
num_examples: 250
download_size: 52903
dataset_size: 144982
- config_name: penguins_in_a_table
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 192824
num_examples: 146
download_size: 52726
dataset_size: 192824
- config_name: reasoning_about_colored_objects
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 273247
num_examples: 250
download_size: 81981
dataset_size: 273247
- config_name: ruin_names
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 323649
num_examples: 250
download_size: 130210
dataset_size: 323649
- config_name: salient_translation_error_detection
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 679551
num_examples: 250
download_size: 219882
dataset_size: 679551
- config_name: snarks
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 234973
num_examples: 178
download_size: 102300
dataset_size: 234973
- config_name: sports_understanding
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 379457
num_examples: 250
download_size: 172266
dataset_size: 379457
- config_name: temporal_sequences
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 468207
num_examples: 250
download_size: 126152
dataset_size: 468207
- config_name: tracking_shuffled_objects_five_objects
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 538984
num_examples: 250
download_size: 123752
dataset_size: 538984
- config_name: tracking_shuffled_objects_seven_objects
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 688425
num_examples: 250
download_size: 163307
dataset_size: 688425
- config_name: tracking_shuffled_objects_three_objects
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 385134
num_examples: 250
download_size: 88076
dataset_size: 385134
- config_name: web_of_lies
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 370423
num_examples: 250
download_size: 110240
dataset_size: 370423
- config_name: word_sorting
features:
- name: task
dtype: string
- name: input
dtype: string
- name: answer
dtype: string
- name: label
dtype: string
- name: model_final_answer
dtype: string
- name: is_correct
dtype: bool
splits:
- name: test
num_bytes: 338792
num_examples: 250
download_size: 169870
dataset_size: 338792
configs:
- config_name: boolean_expressions
data_files:
- split: test
path: boolean_expressions/test-*
- config_name: causal_judgement
data_files:
- split: test
path: causal_judgement/test-*
- config_name: date_understanding
data_files:
- split: test
path: date_understanding/test-*
- config_name: disambiguation_qa
data_files:
- split: test
path: disambiguation_qa/test-*
- config_name: dyck_languages
data_files:
- split: test
path: dyck_languages/test-*
- config_name: formal_fallacies
data_files:
- split: test
path: formal_fallacies/test-*
- config_name: geometric_shapes
data_files:
- split: test
path: geometric_shapes/test-*
- config_name: hyperbaton
data_files:
- split: test
path: hyperbaton/test-*
- config_name: logical_deduction_five_objects
data_files:
- split: test
path: logical_deduction_five_objects/test-*
- config_name: logical_deduction_seven_objects
data_files:
- split: test
path: logical_deduction_seven_objects/test-*
- config_name: logical_deduction_three_objects
data_files:
- split: test
path: logical_deduction_three_objects/test-*
- config_name: movie_recommendation
data_files:
- split: test
path: movie_recommendation/test-*
- config_name: multistep_arithmetic_two
data_files:
- split: test
path: multistep_arithmetic_two/test-*
- config_name: navigate
data_files:
- split: test
path: navigate/test-*
- config_name: object_counting
data_files:
- split: test
path: object_counting/test-*
- config_name: penguins_in_a_table
data_files:
- split: test
path: penguins_in_a_table/test-*
- config_name: reasoning_about_colored_objects
data_files:
- split: test
path: reasoning_about_colored_objects/test-*
- config_name: ruin_names
data_files:
- split: test
path: ruin_names/test-*
- config_name: salient_translation_error_detection
data_files:
- split: test
path: salient_translation_error_detection/test-*
- config_name: snarks
data_files:
- split: test
path: snarks/test-*
- config_name: sports_understanding
data_files:
- split: test
path: sports_understanding/test-*
- config_name: temporal_sequences
data_files:
- split: test
path: temporal_sequences/test-*
- config_name: tracking_shuffled_objects_five_objects
data_files:
- split: test
path: tracking_shuffled_objects_five_objects/test-*
- config_name: tracking_shuffled_objects_seven_objects
data_files:
- split: test
path: tracking_shuffled_objects_seven_objects/test-*
- config_name: tracking_shuffled_objects_three_objects
data_files:
- split: test
path: tracking_shuffled_objects_three_objects/test-*
- config_name: web_of_lies
data_files:
- split: test
path: web_of_lies/test-*
- config_name: word_sorting
data_files:
- split: test
path: word_sorting/test-*
---
提供机构:
cyzcz233



