unlearning-cleanslate/generations-simnpo_gemma-3-12b-pt_20260416_171305-corpus_sweep_post_eval
收藏Hugging Face2026-04-20 更新2026-04-26 收录
下载链接:
https://hf-mirror.com/datasets/unlearning-cleanslate/generations-simnpo_gemma-3-12b-pt_20260416_171305-corpus_sweep_post_eval
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: arc_challenge
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answerKey
dtype: string
- name: choices
struct:
- name: label
list: string
- name: text
list: string
- name: id
dtype: string
- name: question
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_4
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1903114
num_examples: 1172
download_size: 1728873
dataset_size: 1903114
- config_name: arc_easy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answerKey
dtype: string
- name: choices
struct:
- name: label
list: string
- name: text
list: string
- name: id
dtype: string
- name: question
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_4
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 3520361
num_examples: 2376
download_size: 3153710
dataset_size: 3520361
- config_name: bbh_cot_fewshot_boolean_expressions
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 683825
num_examples: 250
download_size: 672583
dataset_size: 683825
- config_name: bbh_cot_fewshot_causal_judgement
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1207775
num_examples: 187
download_size: 1199625
dataset_size: 1207775
- config_name: bbh_cot_fewshot_date_understanding
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 533128
num_examples: 250
download_size: 520724
dataset_size: 533128
- config_name: bbh_cot_fewshot_disambiguation_qa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1346072
num_examples: 250
download_size: 1347002
dataset_size: 1346072
- config_name: bbh_cot_fewshot_dyck_languages
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 939160
num_examples: 250
download_size: 937081
dataset_size: 939160
- config_name: bbh_cot_fewshot_formal_fallacies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1824217
num_examples: 250
download_size: 1810421
dataset_size: 1824217
- config_name: bbh_cot_fewshot_geometric_shapes
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1744480
num_examples: 250
download_size: 1733226
dataset_size: 1744480
- config_name: bbh_cot_fewshot_hyperbaton
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1177269
num_examples: 250
download_size: 1174893
dataset_size: 1177269
- config_name: bbh_cot_fewshot_logical_deduction_five_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1143552
num_examples: 250
download_size: 1140790
dataset_size: 1143552
- config_name: bbh_cot_fewshot_logical_deduction_seven_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1285336
num_examples: 250
download_size: 1285520
dataset_size: 1285336
- config_name: bbh_cot_fewshot_logical_deduction_three_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1016668
num_examples: 250
download_size: 1012422
dataset_size: 1016668
- config_name: bbh_cot_fewshot_movie_recommendation
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 822595
num_examples: 250
download_size: 814353
dataset_size: 822595
- config_name: bbh_cot_fewshot_multistep_arithmetic_two
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 871220
num_examples: 250
download_size: 876444
dataset_size: 871220
- config_name: bbh_cot_fewshot_navigate
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 836675
num_examples: 250
download_size: 829447
dataset_size: 836675
- config_name: bbh_cot_fewshot_object_counting
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 565322
num_examples: 250
download_size: 554372
dataset_size: 565322
- config_name: bbh_cot_fewshot_penguins_in_a_table
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 589262
num_examples: 146
download_size: 597399
dataset_size: 589262
- config_name: bbh_cot_fewshot_reasoning_about_colored_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 916236
num_examples: 250
download_size: 910441
dataset_size: 916236
- config_name: bbh_cot_fewshot_ruin_names
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1283986
num_examples: 250
download_size: 1283463
dataset_size: 1283986
- config_name: bbh_cot_fewshot_salient_translation_error_detection
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 2431210
num_examples: 250
download_size: 2418323
dataset_size: 2431210
- config_name: bbh_cot_fewshot_snarks
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 782637
num_examples: 178
download_size: 787886
dataset_size: 782637
- config_name: bbh_cot_fewshot_sports_understanding
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 365098
num_examples: 250
download_size: 349841
dataset_size: 365098
- config_name: bbh_cot_fewshot_temporal_sequences
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1203234
num_examples: 250
download_size: 1201382
dataset_size: 1203234
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_five_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1248661
num_examples: 250
download_size: 1247408
dataset_size: 1248661
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_seven_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1470665
num_examples: 250
download_size: 1472977
dataset_size: 1470665
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_three_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1079801
num_examples: 250
download_size: 1076164
dataset_size: 1079801
- config_name: bbh_cot_fewshot_web_of_lies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1099172
num_examples: 250
download_size: 1096247
dataset_size: 1099172
- config_name: bbh_cot_fewshot_word_sorting
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1135173
num_examples: 250
download_size: 1143121
dataset_size: 1135173
- config_name: cleanslate_qa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: string
- name: content_id
dtype: string
- name: content_title
dtype: string
- name: question
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 7964656
num_examples: 12088
download_size: 7073707
dataset_size: 7964656
- config_name: coqa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: additional_answers
struct:
- name: '0'
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: '1'
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: '2'
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: answers
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: id
dtype: string
- name: questions
struct:
- name: input_text
list: string
- name: turn_id
list: int64
- name: source
dtype: string
- name: story
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: float64
- name: score
dtype: float64
splits:
- name: train
num_bytes: 5702442
num_examples: 500
download_size: 5709767
dataset_size: 5702442
- config_name: drop
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
struct:
- name: date
struct:
- name: day
dtype: string
- name: month
dtype: string
- name: year
dtype: string
- name: hit_id
dtype: string
- name: number
dtype: string
- name: spans
list: string
- name: worker_id
dtype: string
- name: answers
list:
list: string
- name: id
dtype: string
- name: passage
dtype: string
- name: query_id
dtype: string
- name: question
dtype: string
- name: section_id
dtype: string
- name: validated_answers
struct:
- name: date
list:
- name: day
dtype: string
- name: month
dtype: string
- name: year
dtype: string
- name: hit_id
list: string
- name: number
list: string
- name: spans
list:
list: string
- name: worker_id
list: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 29351400
num_examples: 9536
download_size: 27607010
dataset_size: 29351400
- config_name: gsm8k
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: string
- name: question
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 11580124
num_examples: 2638
download_size: 10644685
dataset_size: 11580124
- config_name: hellaswag
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: activity_label
dtype: string
- name: choices
list: string
- name: ctx
dtype: string
- name: ctx_a
dtype: string
- name: ctx_b
dtype: string
- name: endings
list: string
- name: gold
dtype: int64
- name: ind
dtype: int64
- name: label
dtype: string
- name: query
dtype: string
- name: source_id
dtype: string
- name: split
dtype: string
- name: split_type
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 39601549
num_examples: 10042
download_size: 38112388
dataset_size: 39601549
- config_name: humaneval_plus
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: canonical_solution
dtype: string
- name: entry_point
dtype: string
- name: prompt
dtype: string
- name: task_id
dtype: string
- name: test
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 22128772
num_examples: 164
download_size: 14122333
dataset_size: 22128772
- config_name: lambada_openai
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: text
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 5115431
num_examples: 5153
download_size: 4753487
dataset_size: 5115431
- config_name: mmlu_abstract_algebra
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 187262
num_examples: 100
download_size: 188981
dataset_size: 187262
- config_name: mmlu_anatomy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 282021
num_examples: 135
download_size: 280181
dataset_size: 282021
- config_name: mmlu_astronomy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 366307
num_examples: 152
download_size: 363433
dataset_size: 366307
- config_name: mmlu_business_ethics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 256414
num_examples: 100
download_size: 259780
dataset_size: 256414
- config_name: mmlu_clinical_knowledge
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 556747
num_examples: 265
download_size: 535011
dataset_size: 556747
- config_name: mmlu_college_biology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 373769
num_examples: 144
download_size: 370660
dataset_size: 373769
- config_name: mmlu_college_chemistry
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 214784
num_examples: 100
download_size: 219103
dataset_size: 214784
- config_name: mmlu_college_computer_science
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 307941
num_examples: 100
download_size: 316769
dataset_size: 307941
- config_name: mmlu_college_mathematics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 215773
num_examples: 100
download_size: 217134
dataset_size: 215773
- config_name: mmlu_college_medicine
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 568854
num_examples: 173
download_size: 564310
dataset_size: 568854
- config_name: mmlu_college_physics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 242917
num_examples: 102
download_size: 245939
dataset_size: 242917
- config_name: mmlu_computer_security
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 226750
num_examples: 100
download_size: 228290
dataset_size: 226750
- config_name: mmlu_conceptual_physics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 419004
num_examples: 235
download_size: 400006
dataset_size: 419004
- config_name: mmlu_econometrics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 333921
num_examples: 114
download_size: 335229
dataset_size: 333921
- config_name: mmlu_electrical_engineering
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 261567
num_examples: 145
download_size: 255609
dataset_size: 261567
- config_name: mmlu_elementary_mathematics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 704802
num_examples: 378
download_size: 661458
dataset_size: 704802
- config_name: mmlu_formal_logic
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 360451
num_examples: 126
download_size: 361600
dataset_size: 360451
- config_name: mmlu_global_facts
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 180769
num_examples: 100
download_size: 181973
dataset_size: 180769
- config_name: mmlu_high_school_biology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 834352
num_examples: 310
download_size: 806101
dataset_size: 834352
- config_name: mmlu_high_school_chemistry
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 481524
num_examples: 203
download_size: 467802
dataset_size: 481524
- config_name: mmlu_high_school_computer_science
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 319098
num_examples: 100
download_size: 325430
dataset_size: 319098
- config_name: mmlu_high_school_european_history
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1511009
num_examples: 165
download_size: 1524645
dataset_size: 1511009
- config_name: mmlu_high_school_geography
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 394588
num_examples: 198
download_size: 381194
dataset_size: 394588
- config_name: mmlu_high_school_government_and_politics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 523638
num_examples: 193
download_size: 513203
dataset_size: 523638
- config_name: mmlu_high_school_macroeconomics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 961705
num_examples: 390
download_size: 921514
dataset_size: 961705
- config_name: mmlu_high_school_mathematics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 528770
num_examples: 270
download_size: 507268
dataset_size: 528770
- config_name: mmlu_high_school_microeconomics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 606267
num_examples: 238
download_size: 588712
dataset_size: 606267
- config_name: mmlu_high_school_physics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 436944
num_examples: 151
download_size: 439551
dataset_size: 436944
- config_name: mmlu_high_school_psychology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1307512
num_examples: 545
download_size: 1240340
dataset_size: 1307512
- config_name: mmlu_high_school_statistics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 756004
num_examples: 216
download_size: 743337
dataset_size: 756004
- config_name: mmlu_high_school_us_history
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1674810
num_examples: 204
download_size: 1685956
dataset_size: 1674810
- config_name: mmlu_high_school_world_history
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 2118763
num_examples: 237
download_size: 2123656
dataset_size: 2118763
- config_name: mmlu_human_aging
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 427180
num_examples: 223
download_size: 410072
dataset_size: 427180
- config_name: mmlu_human_sexuality
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 278690
num_examples: 131
download_size: 275848
dataset_size: 278690
- config_name: mmlu_international_law
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 377993
num_examples: 121
download_size: 383637
dataset_size: 377993
- config_name: mmlu_jurisprudence
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 266202
num_examples: 108
download_size: 266080
dataset_size: 266202
- config_name: mmlu_logical_fallacies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 399169
num_examples: 163
download_size: 393885
dataset_size: 399169
- config_name: mmlu_machine_learning
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 271016
num_examples: 112
download_size: 270854
dataset_size: 271016
- config_name: mmlu_management
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 190353
num_examples: 103
download_size: 191100
dataset_size: 190353
- config_name: mmlu_marketing
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 518557
num_examples: 234
download_size: 501891
dataset_size: 518557
- config_name: mmlu_medical_genetics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 194950
num_examples: 100
download_size: 198473
dataset_size: 194950
- config_name: mmlu_miscellaneous
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1436660
num_examples: 783
download_size: 1338995
dataset_size: 1436660
- config_name: mmlu_moral_disputes
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 848216
num_examples: 346
download_size: 813329
dataset_size: 848216
- config_name: mmlu_moral_scenarios
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 2677844
num_examples: 895
download_size: 2562078
dataset_size: 2677844
- config_name: mmlu_nutrition
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 727614
num_examples: 306
download_size: 702361
dataset_size: 727614
- config_name: mmlu_philosophy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 673222
num_examples: 311
download_size: 645829
dataset_size: 673222
- config_name: mmlu_prehistory
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 732096
num_examples: 324
download_size: 702158
dataset_size: 732096
- config_name: mmlu_professional_accounting
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 888648
num_examples: 282
download_size: 865275
dataset_size: 888648
- config_name: mmlu_professional_law
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 10851556
num_examples: 1534
download_size: 10726868
dataset_size: 10851556
- config_name: mmlu_professional_medicine
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1341307
num_examples: 272
download_size: 1336724
dataset_size: 1341307
- config_name: mmlu_professional_psychology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1706197
num_examples: 612
download_size: 1632590
dataset_size: 1706197
- config_name: mmlu_public_relations
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 243562
num_examples: 110
download_size: 244566
dataset_size: 243562
- config_name: mmlu_security_studies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1246451
num_examples: 245
download_size: 1235198
dataset_size: 1246451
- config_name: mmlu_sociology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 506429
num_examples: 201
download_size: 493751
dataset_size: 506429
- config_name: mmlu_us_foreign_policy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 233373
num_examples: 100
download_size: 234971
dataset_size: 233373
- config_name: mmlu_virology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 337751
num_examples: 166
download_size: 331544
dataset_size: 337751
- config_name: mmlu_world_religions
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 280154
num_examples: 171
download_size: 270377
dataset_size: 280154
- config_name: piqa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: goal
dtype: string
- name: label
dtype: int64
- name: sol1
dtype: string
- name: sol2
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1782450
num_examples: 1838
download_size: 1554928
dataset_size: 1782450
- config_name: triviaqa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
struct:
- name: aliases
list: string
- name: matched_wiki_entity_name
dtype: string
- name: normalized_aliases
list: string
- name: normalized_matched_wiki_entity_name
dtype: string
- name: normalized_value
dtype: string
- name: type
dtype: string
- name: value
dtype: string
- name: entity_pages
struct:
- name: doc_source
list: 'null'
- name: filename
list: 'null'
- name: title
list: 'null'
- name: wiki_context
list: 'null'
- name: question
dtype: string
- name: question_id
dtype: string
- name: question_source
dtype: string
- name: search_results
struct:
- name: description
list: 'null'
- name: filename
list: 'null'
- name: rank
list: 'null'
- name: search_context
list: 'null'
- name: title
list: 'null'
- name: url
list: 'null'
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: float64
- name: score
dtype: float64
splits:
- name: train
num_bytes: 27564793
num_examples: 17944
download_size: 20842324
dataset_size: 27564793
- config_name: winogrande
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: string
- name: option1
dtype: string
- name: option2
dtype: string
- name: sentence
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 981729
num_examples: 1267
download_size: 884648
dataset_size: 981729
configs:
- config_name: arc_challenge
data_files:
- split: train
path: arc_challenge/train-*
- config_name: arc_easy
data_files:
- split: train
path: arc_easy/train-*
- config_name: bbh_cot_fewshot_boolean_expressions
data_files:
- split: train
path: bbh_cot_fewshot_boolean_expressions/train-*
- config_name: bbh_cot_fewshot_causal_judgement
data_files:
- split: train
path: bbh_cot_fewshot_causal_judgement/train-*
- config_name: bbh_cot_fewshot_date_understanding
data_files:
- split: train
path: bbh_cot_fewshot_date_understanding/train-*
- config_name: bbh_cot_fewshot_disambiguation_qa
data_files:
- split: train
path: bbh_cot_fewshot_disambiguation_qa/train-*
- config_name: bbh_cot_fewshot_dyck_languages
data_files:
- split: train
path: bbh_cot_fewshot_dyck_languages/train-*
- config_name: bbh_cot_fewshot_formal_fallacies
data_files:
- split: train
path: bbh_cot_fewshot_formal_fallacies/train-*
- config_name: bbh_cot_fewshot_geometric_shapes
data_files:
- split: train
path: bbh_cot_fewshot_geometric_shapes/train-*
- config_name: bbh_cot_fewshot_hyperbaton
data_files:
- split: train
path: bbh_cot_fewshot_hyperbaton/train-*
- config_name: bbh_cot_fewshot_logical_deduction_five_objects
data_files:
- split: train
path: bbh_cot_fewshot_logical_deduction_five_objects/train-*
- config_name: bbh_cot_fewshot_logical_deduction_seven_objects
data_files:
- split: train
path: bbh_cot_fewshot_logical_deduction_seven_objects/train-*
- config_name: bbh_cot_fewshot_logical_deduction_three_objects
data_files:
- split: train
path: bbh_cot_fewshot_logical_deduction_three_objects/train-*
- config_name: bbh_cot_fewshot_movie_recommendation
data_files:
- split: train
path: bbh_cot_fewshot_movie_recommendation/train-*
- config_name: bbh_cot_fewshot_multistep_arithmetic_two
data_files:
- split: train
path: bbh_cot_fewshot_multistep_arithmetic_two/train-*
- config_name: bbh_cot_fewshot_navigate
data_files:
- split: train
path: bbh_cot_fewshot_navigate/train-*
- config_name: bbh_cot_fewshot_object_counting
data_files:
- split: train
path: bbh_cot_fewshot_object_counting/train-*
- config_name: bbh_cot_fewshot_penguins_in_a_table
data_files:
- split: train
path: bbh_cot_fewshot_penguins_in_a_table/train-*
- config_name: bbh_cot_fewshot_reasoning_about_colored_objects
data_files:
- split: train
path: bbh_cot_fewshot_reasoning_about_colored_objects/train-*
- config_name: bbh_cot_fewshot_ruin_names
data_files:
- split: train
path: bbh_cot_fewshot_ruin_names/train-*
- config_name: bbh_cot_fewshot_salient_translation_error_detection
data_files:
- split: train
path: bbh_cot_fewshot_salient_translation_error_detection/train-*
- config_name: bbh_cot_fewshot_snarks
data_files:
- split: train
path: bbh_cot_fewshot_snarks/train-*
- config_name: bbh_cot_fewshot_sports_understanding
data_files:
- split: train
path: bbh_cot_fewshot_sports_understanding/train-*
- config_name: bbh_cot_fewshot_temporal_sequences
data_files:
- split: train
path: bbh_cot_fewshot_temporal_sequences/train-*
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_five_objects
data_files:
- split: train
path: bbh_cot_fewshot_tracking_shuffled_objects_five_objects/train-*
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_seven_objects
data_files:
- split: train
path: bbh_cot_fewshot_tracking_shuffled_objects_seven_objects/train-*
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_three_objects
data_files:
- split: train
path: bbh_cot_fewshot_tracking_shuffled_objects_three_objects/train-*
- config_name: bbh_cot_fewshot_web_of_lies
data_files:
- split: train
path: bbh_cot_fewshot_web_of_lies/train-*
- config_name: bbh_cot_fewshot_word_sorting
data_files:
- split: train
path: bbh_cot_fewshot_word_sorting/train-*
- config_name: cleanslate_qa
data_files:
- split: train
path: cleanslate_qa/train-*
- config_name: coqa
data_files:
- split: train
path: coqa/train-*
- config_name: drop
data_files:
- split: train
path: drop/train-*
- config_name: gsm8k
data_files:
- split: train
path: gsm8k/train-*
- config_name: hellaswag
data_files:
- split: train
path: hellaswag/train-*
- config_name: humaneval_plus
data_files:
- split: train
path: humaneval_plus/train-*
- config_name: lambada_openai
data_files:
- split: train
path: lambada_openai/train-*
- config_name: mmlu_abstract_algebra
data_files:
- split: train
path: mmlu_abstract_algebra/train-*
- config_name: mmlu_anatomy
data_files:
- split: train
path: mmlu_anatomy/train-*
- config_name: mmlu_astronomy
data_files:
- split: train
path: mmlu_astronomy/train-*
- config_name: mmlu_business_ethics
data_files:
- split: train
path: mmlu_business_ethics/train-*
- config_name: mmlu_clinical_knowledge
data_files:
- split: train
path: mmlu_clinical_knowledge/train-*
- config_name: mmlu_college_biology
data_files:
- split: train
path: mmlu_college_biology/train-*
- config_name: mmlu_college_chemistry
data_files:
- split: train
path: mmlu_college_chemistry/train-*
- config_name: mmlu_college_computer_science
data_files:
- split: train
path: mmlu_college_computer_science/train-*
- config_name: mmlu_college_mathematics
data_files:
- split: train
path: mmlu_college_mathematics/train-*
- config_name: mmlu_college_medicine
data_files:
- split: train
path: mmlu_college_medicine/train-*
- config_name: mmlu_college_physics
data_files:
- split: train
path: mmlu_college_physics/train-*
- config_name: mmlu_computer_security
data_files:
- split: train
path: mmlu_computer_security/train-*
- config_name: mmlu_conceptual_physics
data_files:
- split: train
path: mmlu_conceptual_physics/train-*
- config_name: mmlu_econometrics
data_files:
- split: train
path: mmlu_econometrics/train-*
- config_name: mmlu_electrical_engineering
data_files:
- split: train
path: mmlu_electrical_engineering/train-*
- config_name: mmlu_elementary_mathematics
data_files:
- split: train
path: mmlu_elementary_mathematics/train-*
- config_name: mmlu_formal_logic
data_files:
- split: train
path: mmlu_formal_logic/train-*
- config_name: mmlu_global_facts
data_files:
- split: train
path: mmlu_global_facts/train-*
- config_name: mmlu_high_school_biology
data_files:
- split: train
path: mmlu_high_school_biology/train-*
- config_name: mmlu_high_school_chemistry
data_files:
- split: train
path: mmlu_high_school_chemistry/train-*
- config_name: mmlu_high_school_computer_science
data_files:
- split: train
path: mmlu_high_school_computer_science/train-*
- config_name: mmlu_high_school_european_history
data_files:
- split: train
path: mmlu_high_school_european_history/train-*
- config_name: mmlu_high_school_geography
data_files:
- split: train
path: mmlu_high_school_geography/train-*
- config_name: mmlu_high_school_government_and_politics
data_files:
- split: train
path: mmlu_high_school_government_and_politics/train-*
- config_name: mmlu_high_school_macroeconomics
data_files:
- split: train
path: mmlu_high_school_macroeconomics/train-*
- config_name: mmlu_high_school_mathematics
data_files:
- split: train
path: mmlu_high_school_mathematics/train-*
- config_name: mmlu_high_school_microeconomics
data_files:
- split: train
path: mmlu_high_school_microeconomics/train-*
- config_name: mmlu_high_school_physics
data_files:
- split: train
path: mmlu_high_school_physics/train-*
- config_name: mmlu_high_school_psychology
data_files:
- split: train
path: mmlu_high_school_psychology/train-*
- config_name: mmlu_high_school_statistics
data_files:
- split: train
path: mmlu_high_school_statistics/train-*
- config_name: mmlu_high_school_us_history
data_files:
- split: train
path: mmlu_high_school_us_history/train-*
- config_name: mmlu_high_school_world_history
data_files:
- split: train
path: mmlu_high_school_world_history/train-*
- config_name: mmlu_human_aging
data_files:
- split: train
path: mmlu_human_aging/train-*
- config_name: mmlu_human_sexuality
data_files:
- split: train
path: mmlu_human_sexuality/train-*
- config_name: mmlu_international_law
data_files:
- split: train
path: mmlu_international_law/train-*
- config_name: mmlu_jurisprudence
data_files:
- split: train
path: mmlu_jurisprudence/train-*
- config_name: mmlu_logical_fallacies
data_files:
- split: train
path: mmlu_logical_fallacies/train-*
- config_name: mmlu_machine_learning
data_files:
- split: train
path: mmlu_machine_learning/train-*
- config_name: mmlu_management
data_files:
- split: train
path: mmlu_management/train-*
- config_name: mmlu_marketing
data_files:
- split: train
path: mmlu_marketing/train-*
- config_name: mmlu_medical_genetics
data_files:
- split: train
path: mmlu_medical_genetics/train-*
- config_name: mmlu_miscellaneous
data_files:
- split: train
path: mmlu_miscellaneous/train-*
- config_name: mmlu_moral_disputes
data_files:
- split: train
path: mmlu_moral_disputes/train-*
- config_name: mmlu_moral_scenarios
data_files:
- split: train
path: mmlu_moral_scenarios/train-*
- config_name: mmlu_nutrition
data_files:
- split: train
path: mmlu_nutrition/train-*
- config_name: mmlu_philosophy
data_files:
- split: train
path: mmlu_philosophy/train-*
- config_name: mmlu_prehistory
data_files:
- split: train
path: mmlu_prehistory/train-*
- config_name: mmlu_professional_accounting
data_files:
- split: train
path: mmlu_professional_accounting/train-*
- config_name: mmlu_professional_law
data_files:
- split: train
path: mmlu_professional_law/train-*
- config_name: mmlu_professional_medicine
data_files:
- split: train
path: mmlu_professional_medicine/train-*
- config_name: mmlu_professional_psychology
data_files:
- split: train
path: mmlu_professional_psychology/train-*
- config_name: mmlu_public_relations
data_files:
- split: train
path: mmlu_public_relations/train-*
- config_name: mmlu_security_studies
data_files:
- split: train
path: mmlu_security_studies/train-*
- config_name: mmlu_sociology
data_files:
- split: train
path: mmlu_sociology/train-*
- config_name: mmlu_us_foreign_policy
data_files:
- split: train
path: mmlu_us_foreign_policy/train-*
- config_name: mmlu_virology
data_files:
- split: train
path: mmlu_virology/train-*
- config_name: mmlu_world_religions
data_files:
- split: train
path: mmlu_world_religions/train-*
- config_name: piqa
data_files:
- split: train
path: piqa/train-*
- config_name: triviaqa
data_files:
- split: train
path: triviaqa/train-*
- config_name: winogrande
data_files:
- split: train
path: winogrande/train-*
---
提供机构:
unlearning-cleanslate



