unlearning-cleanslate/generations-simnpo_qwen3-8b_20260428_063109-debug-post-qwen
收藏Hugging Face2026-04-29 更新2026-05-03 收录
下载链接:
https://hf-mirror.com/datasets/unlearning-cleanslate/generations-simnpo_qwen3-8b_20260428_063109-debug-post-qwen
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: arc_challenge
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answerKey
dtype: string
- name: choices
struct:
- name: label
list: string
- name: text
list: string
- name: id
dtype: string
- name: question
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_4
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1903390
num_examples: 1172
download_size: 1729151
dataset_size: 1903390
- config_name: bbh_cot_fewshot_boolean_expressions
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 693121
num_examples: 250
download_size: 682133
dataset_size: 693121
- config_name: bbh_cot_fewshot_causal_judgement
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1314430
num_examples: 187
download_size: 1309538
dataset_size: 1314430
- config_name: bbh_cot_fewshot_date_understanding
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 603805
num_examples: 250
download_size: 591701
dataset_size: 603805
- config_name: bbh_cot_fewshot_disambiguation_qa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1562042
num_examples: 250
download_size: 1560449
dataset_size: 1562042
- config_name: bbh_cot_fewshot_dyck_languages
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 969767
num_examples: 250
download_size: 967974
dataset_size: 969767
- config_name: bbh_cot_fewshot_formal_fallacies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1787298
num_examples: 250
download_size: 1772155
dataset_size: 1787298
- config_name: bbh_cot_fewshot_geometric_shapes
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1771796
num_examples: 250
download_size: 1757872
dataset_size: 1771796
- config_name: bbh_cot_fewshot_hyperbaton
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1225916
num_examples: 250
download_size: 1229352
dataset_size: 1225916
- config_name: bbh_cot_fewshot_logical_deduction_five_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1302137
num_examples: 250
download_size: 1301978
dataset_size: 1302137
- config_name: bbh_cot_fewshot_logical_deduction_seven_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1613577
num_examples: 250
download_size: 1626796
dataset_size: 1613577
- config_name: bbh_cot_fewshot_logical_deduction_three_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1103917
num_examples: 250
download_size: 1100982
dataset_size: 1103917
- config_name: bbh_cot_fewshot_movie_recommendation
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 884486
num_examples: 250
download_size: 877113
dataset_size: 884486
- config_name: bbh_cot_fewshot_multistep_arithmetic_two
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 876297
num_examples: 250
download_size: 881832
dataset_size: 876297
- config_name: bbh_cot_fewshot_navigate
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 840345
num_examples: 250
download_size: 832992
dataset_size: 840345
- config_name: bbh_cot_fewshot_object_counting
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 565515
num_examples: 250
download_size: 554567
dataset_size: 565515
- config_name: bbh_cot_fewshot_penguins_in_a_table
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 591588
num_examples: 146
download_size: 599443
dataset_size: 591588
- config_name: bbh_cot_fewshot_reasoning_about_colored_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 968402
num_examples: 250
download_size: 963372
dataset_size: 968402
- config_name: bbh_cot_fewshot_ruin_names
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1276256
num_examples: 250
download_size: 1275908
dataset_size: 1276256
- config_name: bbh_cot_fewshot_salient_translation_error_detection
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 2441147
num_examples: 250
download_size: 2429496
dataset_size: 2441147
- config_name: bbh_cot_fewshot_snarks
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 856659
num_examples: 178
download_size: 861311
dataset_size: 856659
- config_name: bbh_cot_fewshot_sports_understanding
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 379880
num_examples: 250
download_size: 365079
dataset_size: 379880
- config_name: bbh_cot_fewshot_temporal_sequences
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1268294
num_examples: 250
download_size: 1267351
dataset_size: 1268294
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_five_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1305067
num_examples: 250
download_size: 1304720
dataset_size: 1305067
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_seven_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1530864
num_examples: 250
download_size: 1533886
dataset_size: 1530864
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_three_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1134118
num_examples: 250
download_size: 1131586
dataset_size: 1134118
- config_name: bbh_cot_fewshot_web_of_lies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1099500
num_examples: 250
download_size: 1096545
dataset_size: 1099500
- config_name: bbh_cot_fewshot_word_sorting
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1144017
num_examples: 250
download_size: 1151702
dataset_size: 1144017
- config_name: cleanslate_qa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: string
- name: content_id
dtype: string
- name: content_title
dtype: string
- name: question
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 9486432
num_examples: 12088
download_size: 8596381
dataset_size: 9486432
- config_name: coqa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: additional_answers
struct:
- name: '0'
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: '1'
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: '2'
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: answers
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: id
dtype: string
- name: questions
struct:
- name: input_text
list: string
- name: turn_id
list: int64
- name: source
dtype: string
- name: story
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: float64
- name: score
dtype: float64
splits:
- name: train
num_bytes: 5755014
num_examples: 500
download_size: 5762922
dataset_size: 5755014
- config_name: drop
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
struct:
- name: date
struct:
- name: day
dtype: string
- name: month
dtype: string
- name: year
dtype: string
- name: hit_id
dtype: string
- name: number
dtype: string
- name: spans
list: string
- name: worker_id
dtype: string
- name: answers
list:
list: string
- name: id
dtype: string
- name: passage
dtype: string
- name: query_id
dtype: string
- name: question
dtype: string
- name: section_id
dtype: string
- name: validated_answers
struct:
- name: date
list:
- name: day
dtype: string
- name: month
dtype: string
- name: year
dtype: string
- name: hit_id
list: string
- name: number
list: string
- name: spans
list:
list: string
- name: worker_id
list: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 29459216
num_examples: 9536
download_size: 27715524
dataset_size: 29459216
- config_name: gsm8k
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: string
- name: question
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 11670739
num_examples: 2638
download_size: 10736120
dataset_size: 11670739
- config_name: hellaswag
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: activity_label
dtype: string
- name: choices
list: string
- name: ctx
dtype: string
- name: ctx_a
dtype: string
- name: ctx_b
dtype: string
- name: endings
list: string
- name: gold
dtype: int64
- name: ind
dtype: int64
- name: label
dtype: string
- name: query
dtype: string
- name: source_id
dtype: string
- name: split
dtype: string
- name: split_type
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 39604683
num_examples: 10042
download_size: 38115489
dataset_size: 39604683
- config_name: humaneval_plus
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: canonical_solution
dtype: string
- name: entry_point
dtype: string
- name: prompt
dtype: string
- name: task_id
dtype: string
- name: test
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 22230888
num_examples: 164
download_size: 14225080
dataset_size: 22230888
- config_name: lambada_openai
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: text
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 5117685
num_examples: 5153
download_size: 4755713
dataset_size: 5117685
- config_name: mmlu_abstract_algebra
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 187338
num_examples: 100
download_size: 189057
dataset_size: 187338
- config_name: mmlu_anatomy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 282153
num_examples: 135
download_size: 280327
dataset_size: 282153
- config_name: mmlu_astronomy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 366565
num_examples: 152
download_size: 363697
dataset_size: 366565
- config_name: mmlu_business_ethics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 256516
num_examples: 100
download_size: 259890
dataset_size: 256516
- config_name: mmlu_clinical_knowledge
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 556993
num_examples: 265
download_size: 535260
dataset_size: 556993
- config_name: mmlu_college_biology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 374029
num_examples: 144
download_size: 370925
dataset_size: 374029
- config_name: mmlu_college_chemistry
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 214782
num_examples: 100
download_size: 219109
dataset_size: 214782
- config_name: mmlu_college_computer_science
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 307867
num_examples: 100
download_size: 316707
dataset_size: 307867
- config_name: mmlu_college_mathematics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 215723
num_examples: 100
download_size: 217088
dataset_size: 215723
- config_name: mmlu_college_medicine
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 569020
num_examples: 173
download_size: 564487
dataset_size: 569020
- config_name: mmlu_college_physics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 242907
num_examples: 102
download_size: 245929
dataset_size: 242907
- config_name: mmlu_computer_security
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 226902
num_examples: 100
download_size: 228460
dataset_size: 226902
- config_name: mmlu_conceptual_physics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 419234
num_examples: 235
download_size: 400245
dataset_size: 419234
- config_name: mmlu_econometrics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 333923
num_examples: 114
download_size: 335236
dataset_size: 333923
- config_name: mmlu_electrical_engineering
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 261635
num_examples: 145
download_size: 255691
dataset_size: 261635
- config_name: mmlu_elementary_mathematics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 704796
num_examples: 378
download_size: 661456
dataset_size: 704796
- config_name: mmlu_formal_logic
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 360397
num_examples: 126
download_size: 361550
dataset_size: 360397
- config_name: mmlu_global_facts
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 180713
num_examples: 100
download_size: 181917
dataset_size: 180713
- config_name: mmlu_high_school_biology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 835070
num_examples: 310
download_size: 806822
dataset_size: 835070
- config_name: mmlu_high_school_chemistry
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 481654
num_examples: 203
download_size: 467938
dataset_size: 481654
- config_name: mmlu_high_school_computer_science
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 319176
num_examples: 100
download_size: 325517
dataset_size: 319176
- config_name: mmlu_high_school_european_history
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1511157
num_examples: 165
download_size: 1524803
dataset_size: 1511157
- config_name: mmlu_high_school_geography
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 395098
num_examples: 198
download_size: 381716
dataset_size: 395098
- config_name: mmlu_high_school_government_and_politics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 524176
num_examples: 193
download_size: 513754
dataset_size: 524176
- config_name: mmlu_high_school_macroeconomics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 962215
num_examples: 390
download_size: 922031
dataset_size: 962215
- config_name: mmlu_high_school_mathematics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 528720
num_examples: 270
download_size: 507222
dataset_size: 528720
- config_name: mmlu_high_school_microeconomics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 606605
num_examples: 238
download_size: 589056
dataset_size: 606605
- config_name: mmlu_high_school_physics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 436950
num_examples: 151
download_size: 439561
dataset_size: 436950
- config_name: mmlu_high_school_psychology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1308898
num_examples: 545
download_size: 1241745
dataset_size: 1308898
- config_name: mmlu_high_school_statistics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 755868
num_examples: 216
download_size: 743207
dataset_size: 755868
- config_name: mmlu_high_school_us_history
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1674946
num_examples: 204
download_size: 1686094
dataset_size: 1674946
- config_name: mmlu_high_school_world_history
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 2118861
num_examples: 237
download_size: 2123772
dataset_size: 2118861
- config_name: mmlu_human_aging
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 427442
num_examples: 223
download_size: 410339
dataset_size: 427442
- config_name: mmlu_human_sexuality
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 278898
num_examples: 131
download_size: 276066
dataset_size: 278898
- config_name: mmlu_international_law
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 378121
num_examples: 121
download_size: 383771
dataset_size: 378121
- config_name: mmlu_jurisprudence
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 266358
num_examples: 108
download_size: 266244
dataset_size: 266358
- config_name: mmlu_logical_fallacies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 399479
num_examples: 163
download_size: 394205
dataset_size: 399479
- config_name: mmlu_machine_learning
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 271008
num_examples: 112
download_size: 270854
dataset_size: 271008
- config_name: mmlu_management
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 190427
num_examples: 103
download_size: 191179
dataset_size: 190427
- config_name: mmlu_marketing
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 519077
num_examples: 234
download_size: 502415
dataset_size: 519077
- config_name: mmlu_medical_genetics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 195150
num_examples: 100
download_size: 198679
dataset_size: 195150
- config_name: mmlu_miscellaneous
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1438978
num_examples: 783
download_size: 1341327
dataset_size: 1438978
- config_name: mmlu_moral_disputes
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 848420
num_examples: 346
download_size: 813526
dataset_size: 848420
- config_name: mmlu_moral_scenarios
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 2677970
num_examples: 895
download_size: 2562209
dataset_size: 2677970
- config_name: mmlu_nutrition
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 727986
num_examples: 306
download_size: 702750
dataset_size: 727986
- config_name: mmlu_philosophy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 673428
num_examples: 311
download_size: 646036
dataset_size: 673428
- config_name: mmlu_prehistory
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 732588
num_examples: 324
download_size: 702652
dataset_size: 732588
- config_name: mmlu_professional_accounting
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 888616
num_examples: 282
download_size: 865253
dataset_size: 888616
- config_name: mmlu_professional_law
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 10851374
num_examples: 1534
download_size: 10726688
dataset_size: 10851374
- config_name: mmlu_professional_medicine
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1341475
num_examples: 272
download_size: 1336891
dataset_size: 1341475
- config_name: mmlu_professional_psychology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1706537
num_examples: 612
download_size: 1632943
dataset_size: 1706537
- config_name: mmlu_public_relations
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 243676
num_examples: 110
download_size: 244686
dataset_size: 243676
- config_name: mmlu_security_studies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1246581
num_examples: 245
download_size: 1235332
dataset_size: 1246581
- config_name: mmlu_sociology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 506773
num_examples: 201
download_size: 494100
dataset_size: 506773
- config_name: mmlu_us_foreign_policy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 233581
num_examples: 100
download_size: 235197
dataset_size: 233581
- config_name: mmlu_virology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 337883
num_examples: 166
download_size: 331684
dataset_size: 337883
- config_name: mmlu_world_religions
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 280582
num_examples: 171
download_size: 270810
dataset_size: 280582
- config_name: triviaqa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
struct:
- name: aliases
list: string
- name: matched_wiki_entity_name
dtype: string
- name: normalized_aliases
list: string
- name: normalized_matched_wiki_entity_name
dtype: string
- name: normalized_value
dtype: string
- name: type
dtype: string
- name: value
dtype: string
- name: entity_pages
struct:
- name: doc_source
list: 'null'
- name: filename
list: 'null'
- name: title
list: 'null'
- name: wiki_context
list: 'null'
- name: question
dtype: string
- name: question_id
dtype: string
- name: question_source
dtype: string
- name: search_results
struct:
- name: description
list: 'null'
- name: filename
list: 'null'
- name: rank
list: 'null'
- name: search_context
list: 'null'
- name: title
list: 'null'
- name: url
list: 'null'
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: float64
- name: score
dtype: float64
splits:
- name: train
num_bytes: 28087505
num_examples: 17944
download_size: 21365035
dataset_size: 28087505
- config_name: winogrande
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: string
- name: option1
dtype: string
- name: option2
dtype: string
- name: sentence
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 981765
num_examples: 1267
download_size: 884670
dataset_size: 981765
configs:
- config_name: arc_challenge
data_files:
- split: train
path: arc_challenge/train-*
- config_name: bbh_cot_fewshot_boolean_expressions
data_files:
- split: train
path: bbh_cot_fewshot_boolean_expressions/train-*
- config_name: bbh_cot_fewshot_causal_judgement
data_files:
- split: train
path: bbh_cot_fewshot_causal_judgement/train-*
- config_name: bbh_cot_fewshot_date_understanding
data_files:
- split: train
path: bbh_cot_fewshot_date_understanding/train-*
- config_name: bbh_cot_fewshot_disambiguation_qa
data_files:
- split: train
path: bbh_cot_fewshot_disambiguation_qa/train-*
- config_name: bbh_cot_fewshot_dyck_languages
data_files:
- split: train
path: bbh_cot_fewshot_dyck_languages/train-*
- config_name: bbh_cot_fewshot_formal_fallacies
data_files:
- split: train
path: bbh_cot_fewshot_formal_fallacies/train-*
- config_name: bbh_cot_fewshot_geometric_shapes
data_files:
- split: train
path: bbh_cot_fewshot_geometric_shapes/train-*
- config_name: bbh_cot_fewshot_hyperbaton
data_files:
- split: train
path: bbh_cot_fewshot_hyperbaton/train-*
- config_name: bbh_cot_fewshot_logical_deduction_five_objects
data_files:
- split: train
path: bbh_cot_fewshot_logical_deduction_five_objects/train-*
- config_name: bbh_cot_fewshot_logical_deduction_seven_objects
data_files:
- split: train
path: bbh_cot_fewshot_logical_deduction_seven_objects/train-*
- config_name: bbh_cot_fewshot_logical_deduction_three_objects
data_files:
- split: train
path: bbh_cot_fewshot_logical_deduction_three_objects/train-*
- config_name: bbh_cot_fewshot_movie_recommendation
data_files:
- split: train
path: bbh_cot_fewshot_movie_recommendation/train-*
- config_name: bbh_cot_fewshot_multistep_arithmetic_two
data_files:
- split: train
path: bbh_cot_fewshot_multistep_arithmetic_two/train-*
- config_name: bbh_cot_fewshot_navigate
data_files:
- split: train
path: bbh_cot_fewshot_navigate/train-*
- config_name: bbh_cot_fewshot_object_counting
data_files:
- split: train
path: bbh_cot_fewshot_object_counting/train-*
- config_name: bbh_cot_fewshot_penguins_in_a_table
data_files:
- split: train
path: bbh_cot_fewshot_penguins_in_a_table/train-*
- config_name: bbh_cot_fewshot_reasoning_about_colored_objects
data_files:
- split: train
path: bbh_cot_fewshot_reasoning_about_colored_objects/train-*
- config_name: bbh_cot_fewshot_ruin_names
data_files:
- split: train
path: bbh_cot_fewshot_ruin_names/train-*
- config_name: bbh_cot_fewshot_salient_translation_error_detection
data_files:
- split: train
path: bbh_cot_fewshot_salient_translation_error_detection/train-*
- config_name: bbh_cot_fewshot_snarks
data_files:
- split: train
path: bbh_cot_fewshot_snarks/train-*
- config_name: bbh_cot_fewshot_sports_understanding
data_files:
- split: train
path: bbh_cot_fewshot_sports_understanding/train-*
- config_name: bbh_cot_fewshot_temporal_sequences
data_files:
- split: train
path: bbh_cot_fewshot_temporal_sequences/train-*
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_five_objects
data_files:
- split: train
path: bbh_cot_fewshot_tracking_shuffled_objects_five_objects/train-*
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_seven_objects
data_files:
- split: train
path: bbh_cot_fewshot_tracking_shuffled_objects_seven_objects/train-*
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_three_objects
data_files:
- split: train
path: bbh_cot_fewshot_tracking_shuffled_objects_three_objects/train-*
- config_name: bbh_cot_fewshot_web_of_lies
data_files:
- split: train
path: bbh_cot_fewshot_web_of_lies/train-*
- config_name: bbh_cot_fewshot_word_sorting
data_files:
- split: train
path: bbh_cot_fewshot_word_sorting/train-*
- config_name: cleanslate_qa
data_files:
- split: train
path: cleanslate_qa/train-*
- config_name: coqa
data_files:
- split: train
path: coqa/train-*
- config_name: drop
data_files:
- split: train
path: drop/train-*
- config_name: gsm8k
data_files:
- split: train
path: gsm8k/train-*
- config_name: hellaswag
data_files:
- split: train
path: hellaswag/train-*
- config_name: humaneval_plus
data_files:
- split: train
path: humaneval_plus/train-*
- config_name: lambada_openai
data_files:
- split: train
path: lambada_openai/train-*
- config_name: mmlu_abstract_algebra
data_files:
- split: train
path: mmlu_abstract_algebra/train-*
- config_name: mmlu_anatomy
data_files:
- split: train
path: mmlu_anatomy/train-*
- config_name: mmlu_astronomy
data_files:
- split: train
path: mmlu_astronomy/train-*
- config_name: mmlu_business_ethics
data_files:
- split: train
path: mmlu_business_ethics/train-*
- config_name: mmlu_clinical_knowledge
data_files:
- split: train
path: mmlu_clinical_knowledge/train-*
- config_name: mmlu_college_biology
data_files:
- split: train
path: mmlu_college_biology/train-*
- config_name: mmlu_college_chemistry
data_files:
- split: train
path: mmlu_college_chemistry/train-*
- config_name: mmlu_college_computer_science
data_files:
- split: train
path: mmlu_college_computer_science/train-*
- config_name: mmlu_college_mathematics
data_files:
- split: train
path: mmlu_college_mathematics/train-*
- config_name: mmlu_college_medicine
data_files:
- split: train
path: mmlu_college_medicine/train-*
- config_name: mmlu_college_physics
data_files:
- split: train
path: mmlu_college_physics/train-*
- config_name: mmlu_computer_security
data_files:
- split: train
path: mmlu_computer_security/train-*
- config_name: mmlu_conceptual_physics
data_files:
- split: train
path: mmlu_conceptual_physics/train-*
- config_name: mmlu_econometrics
data_files:
- split: train
path: mmlu_econometrics/train-*
- config_name: mmlu_electrical_engineering
data_files:
- split: train
path: mmlu_electrical_engineering/train-*
- config_name: mmlu_elementary_mathematics
data_files:
- split: train
path: mmlu_elementary_mathematics/train-*
- config_name: mmlu_formal_logic
data_files:
- split: train
path: mmlu_formal_logic/train-*
- config_name: mmlu_global_facts
data_files:
- split: train
path: mmlu_global_facts/train-*
- config_name: mmlu_high_school_biology
data_files:
- split: train
path: mmlu_high_school_biology/train-*
- config_name: mmlu_high_school_chemistry
data_files:
- split: train
path: mmlu_high_school_chemistry/train-*
- config_name: mmlu_high_school_computer_science
data_files:
- split: train
path: mmlu_high_school_computer_science/train-*
- config_name: mmlu_high_school_european_history
data_files:
- split: train
path: mmlu_high_school_european_history/train-*
- config_name: mmlu_high_school_geography
data_files:
- split: train
path: mmlu_high_school_geography/train-*
- config_name: mmlu_high_school_government_and_politics
data_files:
- split: train
path: mmlu_high_school_government_and_politics/train-*
- config_name: mmlu_high_school_macroeconomics
data_files:
- split: train
path: mmlu_high_school_macroeconomics/train-*
- config_name: mmlu_high_school_mathematics
data_files:
- split: train
path: mmlu_high_school_mathematics/train-*
- config_name: mmlu_high_school_microeconomics
data_files:
- split: train
path: mmlu_high_school_microeconomics/train-*
- config_name: mmlu_high_school_physics
data_files:
- split: train
path: mmlu_high_school_physics/train-*
- config_name: mmlu_high_school_psychology
data_files:
- split: train
path: mmlu_high_school_psychology/train-*
- config_name: mmlu_high_school_statistics
data_files:
- split: train
path: mmlu_high_school_statistics/train-*
- config_name: mmlu_high_school_us_history
data_files:
- split: train
path: mmlu_high_school_us_history/train-*
- config_name: mmlu_high_school_world_history
data_files:
- split: train
path: mmlu_high_school_world_history/train-*
- config_name: mmlu_human_aging
data_files:
- split: train
path: mmlu_human_aging/train-*
- config_name: mmlu_human_sexuality
data_files:
- split: train
path: mmlu_human_sexuality/train-*
- config_name: mmlu_international_law
data_files:
- split: train
path: mmlu_international_law/train-*
- config_name: mmlu_jurisprudence
data_files:
- split: train
path: mmlu_jurisprudence/train-*
- config_name: mmlu_logical_fallacies
data_files:
- split: train
path: mmlu_logical_fallacies/train-*
- config_name: mmlu_machine_learning
data_files:
- split: train
path: mmlu_machine_learning/train-*
- config_name: mmlu_management
data_files:
- split: train
path: mmlu_management/train-*
- config_name: mmlu_marketing
data_files:
- split: train
path: mmlu_marketing/train-*
- config_name: mmlu_medical_genetics
data_files:
- split: train
path: mmlu_medical_genetics/train-*
- config_name: mmlu_miscellaneous
data_files:
- split: train
path: mmlu_miscellaneous/train-*
- config_name: mmlu_moral_disputes
data_files:
- split: train
path: mmlu_moral_disputes/train-*
- config_name: mmlu_moral_scenarios
data_files:
- split: train
path: mmlu_moral_scenarios/train-*
- config_name: mmlu_nutrition
data_files:
- split: train
path: mmlu_nutrition/train-*
- config_name: mmlu_philosophy
data_files:
- split: train
path: mmlu_philosophy/train-*
- config_name: mmlu_prehistory
data_files:
- split: train
path: mmlu_prehistory/train-*
- config_name: mmlu_professional_accounting
data_files:
- split: train
path: mmlu_professional_accounting/train-*
- config_name: mmlu_professional_law
data_files:
- split: train
path: mmlu_professional_law/train-*
- config_name: mmlu_professional_medicine
data_files:
- split: train
path: mmlu_professional_medicine/train-*
- config_name: mmlu_professional_psychology
data_files:
- split: train
path: mmlu_professional_psychology/train-*
- config_name: mmlu_public_relations
data_files:
- split: train
path: mmlu_public_relations/train-*
- config_name: mmlu_security_studies
data_files:
- split: train
path: mmlu_security_studies/train-*
- config_name: mmlu_sociology
data_files:
- split: train
path: mmlu_sociology/train-*
- config_name: mmlu_us_foreign_policy
data_files:
- split: train
path: mmlu_us_foreign_policy/train-*
- config_name: mmlu_virology
data_files:
- split: train
path: mmlu_virology/train-*
- config_name: mmlu_world_religions
data_files:
- split: train
path: mmlu_world_religions/train-*
- config_name: triviaqa
data_files:
- split: train
path: triviaqa/train-*
- config_name: winogrande
data_files:
- split: train
path: winogrande/train-*
---
提供机构:
unlearning-cleanslate



