unlearning-cleanslate/generations-olmo-3-32b-pre_val
收藏Hugging Face2026-04-28 更新2026-05-03 收录
下载链接:
https://hf-mirror.com/datasets/unlearning-cleanslate/generations-olmo-3-32b-pre_val
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: arc_challenge
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answerKey
dtype: string
- name: choices
struct:
- name: label
list: string
- name: text
list: string
- name: id
dtype: string
- name: question
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_4
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1903184
num_examples: 1172
download_size: 1728939
dataset_size: 1903184
- config_name: bbh_cot_fewshot_boolean_expressions
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 690451
num_examples: 250
download_size: 679559
dataset_size: 690451
- config_name: bbh_cot_fewshot_causal_judgement
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1220382
num_examples: 187
download_size: 1212242
dataset_size: 1220382
- config_name: bbh_cot_fewshot_date_understanding
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 524986
num_examples: 250
download_size: 512785
dataset_size: 524986
- config_name: bbh_cot_fewshot_disambiguation_qa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1348416
num_examples: 250
download_size: 1349351
dataset_size: 1348416
- config_name: bbh_cot_fewshot_dyck_languages
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 966990
num_examples: 250
download_size: 965136
dataset_size: 966990
- config_name: bbh_cot_fewshot_formal_fallacies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1761896
num_examples: 250
download_size: 1746552
dataset_size: 1761896
- config_name: bbh_cot_fewshot_geometric_shapes
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1694182
num_examples: 250
download_size: 1681157
dataset_size: 1694182
- config_name: bbh_cot_fewshot_hyperbaton
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1170123
num_examples: 250
download_size: 1167776
dataset_size: 1170123
- config_name: bbh_cot_fewshot_logical_deduction_five_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1233976
num_examples: 250
download_size: 1231782
dataset_size: 1233976
- config_name: bbh_cot_fewshot_logical_deduction_seven_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1500477
num_examples: 250
download_size: 1501809
dataset_size: 1500477
- config_name: bbh_cot_fewshot_logical_deduction_three_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1020379
num_examples: 250
download_size: 1016216
dataset_size: 1020379
- config_name: bbh_cot_fewshot_movie_recommendation
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 850797
num_examples: 250
download_size: 842629
dataset_size: 850797
- config_name: bbh_cot_fewshot_multistep_arithmetic_two
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 882156
num_examples: 250
download_size: 887693
dataset_size: 882156
- config_name: bbh_cot_fewshot_navigate
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 835941
num_examples: 250
download_size: 828710
dataset_size: 835941
- config_name: bbh_cot_fewshot_object_counting
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 565659
num_examples: 250
download_size: 554669
dataset_size: 565659
- config_name: bbh_cot_fewshot_penguins_in_a_table
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 588412
num_examples: 146
download_size: 596161
dataset_size: 588412
- config_name: bbh_cot_fewshot_reasoning_about_colored_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 925466
num_examples: 250
download_size: 919599
dataset_size: 925466
- config_name: bbh_cot_fewshot_ruin_names
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1234474
num_examples: 250
download_size: 1233076
dataset_size: 1234474
- config_name: bbh_cot_fewshot_salient_translation_error_detection
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 2390200
num_examples: 250
download_size: 2377575
dataset_size: 2390200
- config_name: bbh_cot_fewshot_snarks
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 792528
num_examples: 178
download_size: 796823
dataset_size: 792528
- config_name: bbh_cot_fewshot_sports_understanding
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 360873
num_examples: 250
download_size: 345553
dataset_size: 360873
- config_name: bbh_cot_fewshot_temporal_sequences
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1203559
num_examples: 250
download_size: 1201694
dataset_size: 1203559
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_five_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1248613
num_examples: 250
download_size: 1247409
dataset_size: 1248613
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_seven_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1468412
num_examples: 250
download_size: 1470704
dataset_size: 1468412
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_three_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1080254
num_examples: 250
download_size: 1076563
dataset_size: 1080254
- config_name: bbh_cot_fewshot_web_of_lies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1099486
num_examples: 250
download_size: 1096516
dataset_size: 1099486
- config_name: bbh_cot_fewshot_word_sorting
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1104628
num_examples: 250
download_size: 1112501
dataset_size: 1104628
- config_name: cleanslate_qa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: string
- name: content_id
dtype: string
- name: content_title
dtype: string
- name: question
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 7892678
num_examples: 12088
download_size: 7001746
dataset_size: 7892678
- config_name: coqa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: additional_answers
struct:
- name: '0'
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: '1'
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: '2'
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: answers
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: id
dtype: string
- name: questions
struct:
- name: input_text
list: string
- name: turn_id
list: int64
- name: source
dtype: string
- name: story
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: float64
- name: score
dtype: float64
splits:
- name: train
num_bytes: 5936774
num_examples: 500
download_size: 5944403
dataset_size: 5936774
- config_name: drop
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
struct:
- name: date
struct:
- name: day
dtype: string
- name: month
dtype: string
- name: year
dtype: string
- name: hit_id
dtype: string
- name: number
dtype: string
- name: spans
list: string
- name: worker_id
dtype: string
- name: answers
list:
list: string
- name: id
dtype: string
- name: passage
dtype: string
- name: query_id
dtype: string
- name: question
dtype: string
- name: section_id
dtype: string
- name: validated_answers
struct:
- name: date
list:
- name: day
dtype: string
- name: month
dtype: string
- name: year
dtype: string
- name: hit_id
list: string
- name: number
list: string
- name: spans
list:
list: string
- name: worker_id
list: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 27515636
num_examples: 9536
download_size: 25772420
dataset_size: 27515636
- config_name: gsm8k
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: string
- name: question
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 11661895
num_examples: 2638
download_size: 10728202
dataset_size: 11661895
- config_name: hellaswag
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: activity_label
dtype: string
- name: choices
list: string
- name: ctx
dtype: string
- name: ctx_a
dtype: string
- name: ctx_b
dtype: string
- name: endings
list: string
- name: gold
dtype: int64
- name: ind
dtype: int64
- name: label
dtype: string
- name: query
dtype: string
- name: source_id
dtype: string
- name: split
dtype: string
- name: split_type
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 39600995
num_examples: 10042
download_size: 38111813
dataset_size: 39600995
- config_name: humaneval_plus
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: canonical_solution
dtype: string
- name: entry_point
dtype: string
- name: prompt
dtype: string
- name: task_id
dtype: string
- name: test
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 22083184
num_examples: 164
download_size: 14077134
dataset_size: 22083184
- config_name: lambada_openai
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: text
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 5115949
num_examples: 5153
download_size: 4754062
dataset_size: 5115949
- config_name: mmlu_abstract_algebra
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 187444
num_examples: 100
download_size: 189163
dataset_size: 187444
- config_name: mmlu_anatomy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 282055
num_examples: 135
download_size: 280221
dataset_size: 282055
- config_name: mmlu_astronomy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 366481
num_examples: 152
download_size: 363608
dataset_size: 366481
- config_name: mmlu_business_ethics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 256406
num_examples: 100
download_size: 259770
dataset_size: 256406
- config_name: mmlu_clinical_knowledge
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 557023
num_examples: 265
download_size: 535282
dataset_size: 557023
- config_name: mmlu_college_biology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 373863
num_examples: 144
download_size: 370755
dataset_size: 373863
- config_name: mmlu_college_chemistry
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 214788
num_examples: 100
download_size: 219109
dataset_size: 214788
- config_name: mmlu_college_computer_science
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 307945
num_examples: 100
download_size: 316779
dataset_size: 307945
- config_name: mmlu_college_mathematics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 215801
num_examples: 100
download_size: 217162
dataset_size: 215801
- config_name: mmlu_college_medicine
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 569026
num_examples: 173
download_size: 564487
dataset_size: 569026
- config_name: mmlu_college_physics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 242971
num_examples: 102
download_size: 245997
dataset_size: 242971
- config_name: mmlu_computer_security
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 226830
num_examples: 100
download_size: 228381
dataset_size: 226830
- config_name: mmlu_conceptual_physics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 419200
num_examples: 235
download_size: 400202
dataset_size: 419200
- config_name: mmlu_econometrics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 333895
num_examples: 114
download_size: 335203
dataset_size: 333895
- config_name: mmlu_electrical_engineering
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 261631
num_examples: 145
download_size: 255677
dataset_size: 261631
- config_name: mmlu_elementary_mathematics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 704956
num_examples: 378
download_size: 661612
dataset_size: 704956
- config_name: mmlu_formal_logic
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 360483
num_examples: 126
download_size: 361632
dataset_size: 360483
- config_name: mmlu_global_facts
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 180753
num_examples: 100
download_size: 181961
dataset_size: 180753
- config_name: mmlu_high_school_biology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 834644
num_examples: 310
download_size: 806391
dataset_size: 834644
- config_name: mmlu_high_school_chemistry
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 481714
num_examples: 203
download_size: 467988
dataset_size: 481714
- config_name: mmlu_high_school_computer_science
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 319108
num_examples: 100
download_size: 325442
dataset_size: 319108
- config_name: mmlu_high_school_european_history
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1510995
num_examples: 165
download_size: 1524632
dataset_size: 1510995
- config_name: mmlu_high_school_geography
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 394784
num_examples: 198
download_size: 381393
dataset_size: 394784
- config_name: mmlu_high_school_government_and_politics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 523904
num_examples: 193
download_size: 513462
dataset_size: 523904
- config_name: mmlu_high_school_macroeconomics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 961933
num_examples: 390
download_size: 921736
dataset_size: 961933
- config_name: mmlu_high_school_mathematics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 528962
num_examples: 270
download_size: 507463
dataset_size: 528962
- config_name: mmlu_high_school_microeconomics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 606435
num_examples: 238
download_size: 588883
dataset_size: 606435
- config_name: mmlu_high_school_physics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 436998
num_examples: 151
download_size: 439605
dataset_size: 436998
- config_name: mmlu_high_school_psychology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1307856
num_examples: 545
download_size: 1240693
dataset_size: 1307856
- config_name: mmlu_high_school_statistics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 756024
num_examples: 216
download_size: 743357
dataset_size: 756024
- config_name: mmlu_high_school_us_history
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1674826
num_examples: 204
download_size: 1685971
dataset_size: 1674826
- config_name: mmlu_high_school_world_history
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 2118847
num_examples: 237
download_size: 2123747
dataset_size: 2118847
- config_name: mmlu_human_aging
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 427410
num_examples: 223
download_size: 410311
dataset_size: 427410
- config_name: mmlu_human_sexuality
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 278708
num_examples: 131
download_size: 275873
dataset_size: 278708
- config_name: mmlu_international_law
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 377981
num_examples: 121
download_size: 383630
dataset_size: 377981
- config_name: mmlu_jurisprudence
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 266288
num_examples: 108
download_size: 266163
dataset_size: 266288
- config_name: mmlu_logical_fallacies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 399255
num_examples: 163
download_size: 393974
dataset_size: 399255
- config_name: mmlu_machine_learning
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 271058
num_examples: 112
download_size: 270900
dataset_size: 271058
- config_name: mmlu_management
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 190323
num_examples: 103
download_size: 191062
dataset_size: 190323
- config_name: mmlu_marketing
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 518791
num_examples: 234
download_size: 502133
dataset_size: 518791
- config_name: mmlu_medical_genetics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 195036
num_examples: 100
download_size: 198560
dataset_size: 195036
- config_name: mmlu_miscellaneous
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1437282
num_examples: 783
download_size: 1339620
dataset_size: 1437282
- config_name: mmlu_moral_disputes
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 848376
num_examples: 346
download_size: 813489
dataset_size: 848376
- config_name: mmlu_moral_scenarios
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 2679030
num_examples: 895
download_size: 2563285
dataset_size: 2679030
- config_name: mmlu_nutrition
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 727926
num_examples: 306
download_size: 702674
dataset_size: 727926
- config_name: mmlu_philosophy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 673292
num_examples: 311
download_size: 645902
dataset_size: 673292
- config_name: mmlu_prehistory
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 732272
num_examples: 324
download_size: 702326
dataset_size: 732272
- config_name: mmlu_professional_accounting
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 888830
num_examples: 282
download_size: 865467
dataset_size: 888830
- config_name: mmlu_professional_law
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 10852714
num_examples: 1534
download_size: 10728022
dataset_size: 10852714
- config_name: mmlu_professional_medicine
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1341633
num_examples: 272
download_size: 1337051
dataset_size: 1341633
- config_name: mmlu_professional_psychology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1706431
num_examples: 612
download_size: 1632826
dataset_size: 1706431
- config_name: mmlu_public_relations
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 243636
num_examples: 110
download_size: 244642
dataset_size: 243636
- config_name: mmlu_security_studies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1246451
num_examples: 245
download_size: 1235199
dataset_size: 1246451
- config_name: mmlu_sociology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 506523
num_examples: 201
download_size: 493853
dataset_size: 506523
- config_name: mmlu_us_foreign_policy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 233453
num_examples: 100
download_size: 235065
dataset_size: 233453
- config_name: mmlu_virology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 337833
num_examples: 166
download_size: 331630
dataset_size: 337833
- config_name: mmlu_world_religions
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 280308
num_examples: 171
download_size: 270531
dataset_size: 280308
- config_name: triviaqa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
struct:
- name: aliases
list: string
- name: matched_wiki_entity_name
dtype: string
- name: normalized_aliases
list: string
- name: normalized_matched_wiki_entity_name
dtype: string
- name: normalized_value
dtype: string
- name: type
dtype: string
- name: value
dtype: string
- name: entity_pages
struct:
- name: doc_source
list: 'null'
- name: filename
list: 'null'
- name: title
list: 'null'
- name: wiki_context
list: 'null'
- name: question
dtype: string
- name: question_id
dtype: string
- name: question_source
dtype: string
- name: search_results
struct:
- name: description
list: 'null'
- name: filename
list: 'null'
- name: rank
list: 'null'
- name: search_context
list: 'null'
- name: title
list: 'null'
- name: url
list: 'null'
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: float64
- name: score
dtype: float64
splits:
- name: train
num_bytes: 27466496
num_examples: 17944
download_size: 20743986
dataset_size: 27466496
- config_name: winogrande
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: string
- name: option1
dtype: string
- name: option2
dtype: string
- name: sentence
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 981549
num_examples: 1267
download_size: 884463
dataset_size: 981549
configs:
- config_name: arc_challenge
data_files:
- split: train
path: arc_challenge/train-*
- config_name: bbh_cot_fewshot_boolean_expressions
data_files:
- split: train
path: bbh_cot_fewshot_boolean_expressions/train-*
- config_name: bbh_cot_fewshot_causal_judgement
data_files:
- split: train
path: bbh_cot_fewshot_causal_judgement/train-*
- config_name: bbh_cot_fewshot_date_understanding
data_files:
- split: train
path: bbh_cot_fewshot_date_understanding/train-*
- config_name: bbh_cot_fewshot_disambiguation_qa
data_files:
- split: train
path: bbh_cot_fewshot_disambiguation_qa/train-*
- config_name: bbh_cot_fewshot_dyck_languages
data_files:
- split: train
path: bbh_cot_fewshot_dyck_languages/train-*
- config_name: bbh_cot_fewshot_formal_fallacies
data_files:
- split: train
path: bbh_cot_fewshot_formal_fallacies/train-*
- config_name: bbh_cot_fewshot_geometric_shapes
data_files:
- split: train
path: bbh_cot_fewshot_geometric_shapes/train-*
- config_name: bbh_cot_fewshot_hyperbaton
data_files:
- split: train
path: bbh_cot_fewshot_hyperbaton/train-*
- config_name: bbh_cot_fewshot_logical_deduction_five_objects
data_files:
- split: train
path: bbh_cot_fewshot_logical_deduction_five_objects/train-*
- config_name: bbh_cot_fewshot_logical_deduction_seven_objects
data_files:
- split: train
path: bbh_cot_fewshot_logical_deduction_seven_objects/train-*
- config_name: bbh_cot_fewshot_logical_deduction_three_objects
data_files:
- split: train
path: bbh_cot_fewshot_logical_deduction_three_objects/train-*
- config_name: bbh_cot_fewshot_movie_recommendation
data_files:
- split: train
path: bbh_cot_fewshot_movie_recommendation/train-*
- config_name: bbh_cot_fewshot_multistep_arithmetic_two
data_files:
- split: train
path: bbh_cot_fewshot_multistep_arithmetic_two/train-*
- config_name: bbh_cot_fewshot_navigate
data_files:
- split: train
path: bbh_cot_fewshot_navigate/train-*
- config_name: bbh_cot_fewshot_object_counting
data_files:
- split: train
path: bbh_cot_fewshot_object_counting/train-*
- config_name: bbh_cot_fewshot_penguins_in_a_table
data_files:
- split: train
path: bbh_cot_fewshot_penguins_in_a_table/train-*
- config_name: bbh_cot_fewshot_reasoning_about_colored_objects
data_files:
- split: train
path: bbh_cot_fewshot_reasoning_about_colored_objects/train-*
- config_name: bbh_cot_fewshot_ruin_names
data_files:
- split: train
path: bbh_cot_fewshot_ruin_names/train-*
- config_name: bbh_cot_fewshot_salient_translation_error_detection
data_files:
- split: train
path: bbh_cot_fewshot_salient_translation_error_detection/train-*
- config_name: bbh_cot_fewshot_snarks
data_files:
- split: train
path: bbh_cot_fewshot_snarks/train-*
- config_name: bbh_cot_fewshot_sports_understanding
data_files:
- split: train
path: bbh_cot_fewshot_sports_understanding/train-*
- config_name: bbh_cot_fewshot_temporal_sequences
data_files:
- split: train
path: bbh_cot_fewshot_temporal_sequences/train-*
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_five_objects
data_files:
- split: train
path: bbh_cot_fewshot_tracking_shuffled_objects_five_objects/train-*
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_seven_objects
data_files:
- split: train
path: bbh_cot_fewshot_tracking_shuffled_objects_seven_objects/train-*
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_three_objects
data_files:
- split: train
path: bbh_cot_fewshot_tracking_shuffled_objects_three_objects/train-*
- config_name: bbh_cot_fewshot_web_of_lies
data_files:
- split: train
path: bbh_cot_fewshot_web_of_lies/train-*
- config_name: bbh_cot_fewshot_word_sorting
data_files:
- split: train
path: bbh_cot_fewshot_word_sorting/train-*
- config_name: cleanslate_qa
data_files:
- split: train
path: cleanslate_qa/train-*
- config_name: coqa
data_files:
- split: train
path: coqa/train-*
- config_name: drop
data_files:
- split: train
path: drop/train-*
- config_name: gsm8k
data_files:
- split: train
path: gsm8k/train-*
- config_name: hellaswag
data_files:
- split: train
path: hellaswag/train-*
- config_name: humaneval_plus
data_files:
- split: train
path: humaneval_plus/train-*
- config_name: lambada_openai
data_files:
- split: train
path: lambada_openai/train-*
- config_name: mmlu_abstract_algebra
data_files:
- split: train
path: mmlu_abstract_algebra/train-*
- config_name: mmlu_anatomy
data_files:
- split: train
path: mmlu_anatomy/train-*
- config_name: mmlu_astronomy
data_files:
- split: train
path: mmlu_astronomy/train-*
- config_name: mmlu_business_ethics
data_files:
- split: train
path: mmlu_business_ethics/train-*
- config_name: mmlu_clinical_knowledge
data_files:
- split: train
path: mmlu_clinical_knowledge/train-*
- config_name: mmlu_college_biology
data_files:
- split: train
path: mmlu_college_biology/train-*
- config_name: mmlu_college_chemistry
data_files:
- split: train
path: mmlu_college_chemistry/train-*
- config_name: mmlu_college_computer_science
data_files:
- split: train
path: mmlu_college_computer_science/train-*
- config_name: mmlu_college_mathematics
data_files:
- split: train
path: mmlu_college_mathematics/train-*
- config_name: mmlu_college_medicine
data_files:
- split: train
path: mmlu_college_medicine/train-*
- config_name: mmlu_college_physics
data_files:
- split: train
path: mmlu_college_physics/train-*
- config_name: mmlu_computer_security
data_files:
- split: train
path: mmlu_computer_security/train-*
- config_name: mmlu_conceptual_physics
data_files:
- split: train
path: mmlu_conceptual_physics/train-*
- config_name: mmlu_econometrics
data_files:
- split: train
path: mmlu_econometrics/train-*
- config_name: mmlu_electrical_engineering
data_files:
- split: train
path: mmlu_electrical_engineering/train-*
- config_name: mmlu_elementary_mathematics
data_files:
- split: train
path: mmlu_elementary_mathematics/train-*
- config_name: mmlu_formal_logic
data_files:
- split: train
path: mmlu_formal_logic/train-*
- config_name: mmlu_global_facts
data_files:
- split: train
path: mmlu_global_facts/train-*
- config_name: mmlu_high_school_biology
data_files:
- split: train
path: mmlu_high_school_biology/train-*
- config_name: mmlu_high_school_chemistry
data_files:
- split: train
path: mmlu_high_school_chemistry/train-*
- config_name: mmlu_high_school_computer_science
data_files:
- split: train
path: mmlu_high_school_computer_science/train-*
- config_name: mmlu_high_school_european_history
data_files:
- split: train
path: mmlu_high_school_european_history/train-*
- config_name: mmlu_high_school_geography
data_files:
- split: train
path: mmlu_high_school_geography/train-*
- config_name: mmlu_high_school_government_and_politics
data_files:
- split: train
path: mmlu_high_school_government_and_politics/train-*
- config_name: mmlu_high_school_macroeconomics
data_files:
- split: train
path: mmlu_high_school_macroeconomics/train-*
- config_name: mmlu_high_school_mathematics
data_files:
- split: train
path: mmlu_high_school_mathematics/train-*
- config_name: mmlu_high_school_microeconomics
data_files:
- split: train
path: mmlu_high_school_microeconomics/train-*
- config_name: mmlu_high_school_physics
data_files:
- split: train
path: mmlu_high_school_physics/train-*
- config_name: mmlu_high_school_psychology
data_files:
- split: train
path: mmlu_high_school_psychology/train-*
- config_name: mmlu_high_school_statistics
data_files:
- split: train
path: mmlu_high_school_statistics/train-*
- config_name: mmlu_high_school_us_history
data_files:
- split: train
path: mmlu_high_school_us_history/train-*
- config_name: mmlu_high_school_world_history
data_files:
- split: train
path: mmlu_high_school_world_history/train-*
- config_name: mmlu_human_aging
data_files:
- split: train
path: mmlu_human_aging/train-*
- config_name: mmlu_human_sexuality
data_files:
- split: train
path: mmlu_human_sexuality/train-*
- config_name: mmlu_international_law
data_files:
- split: train
path: mmlu_international_law/train-*
- config_name: mmlu_jurisprudence
data_files:
- split: train
path: mmlu_jurisprudence/train-*
- config_name: mmlu_logical_fallacies
data_files:
- split: train
path: mmlu_logical_fallacies/train-*
- config_name: mmlu_machine_learning
data_files:
- split: train
path: mmlu_machine_learning/train-*
- config_name: mmlu_management
data_files:
- split: train
path: mmlu_management/train-*
- config_name: mmlu_marketing
data_files:
- split: train
path: mmlu_marketing/train-*
- config_name: mmlu_medical_genetics
data_files:
- split: train
path: mmlu_medical_genetics/train-*
- config_name: mmlu_miscellaneous
data_files:
- split: train
path: mmlu_miscellaneous/train-*
- config_name: mmlu_moral_disputes
data_files:
- split: train
path: mmlu_moral_disputes/train-*
- config_name: mmlu_moral_scenarios
data_files:
- split: train
path: mmlu_moral_scenarios/train-*
- config_name: mmlu_nutrition
data_files:
- split: train
path: mmlu_nutrition/train-*
- config_name: mmlu_philosophy
data_files:
- split: train
path: mmlu_philosophy/train-*
- config_name: mmlu_prehistory
data_files:
- split: train
path: mmlu_prehistory/train-*
- config_name: mmlu_professional_accounting
data_files:
- split: train
path: mmlu_professional_accounting/train-*
- config_name: mmlu_professional_law
data_files:
- split: train
path: mmlu_professional_law/train-*
- config_name: mmlu_professional_medicine
data_files:
- split: train
path: mmlu_professional_medicine/train-*
- config_name: mmlu_professional_psychology
data_files:
- split: train
path: mmlu_professional_psychology/train-*
- config_name: mmlu_public_relations
data_files:
- split: train
path: mmlu_public_relations/train-*
- config_name: mmlu_security_studies
data_files:
- split: train
path: mmlu_security_studies/train-*
- config_name: mmlu_sociology
data_files:
- split: train
path: mmlu_sociology/train-*
- config_name: mmlu_us_foreign_policy
data_files:
- split: train
path: mmlu_us_foreign_policy/train-*
- config_name: mmlu_virology
data_files:
- split: train
path: mmlu_virology/train-*
- config_name: mmlu_world_religions
data_files:
- split: train
path: mmlu_world_religions/train-*
- config_name: triviaqa
data_files:
- split: train
path: triviaqa/train-*
- config_name: winogrande
data_files:
- split: train
path: winogrande/train-*
---
提供机构:
unlearning-cleanslate



