unlearning-cleanslate/generations-llama-3_1-8b-simnpo-gentle-bm25-10b
收藏Hugging Face2026-04-29 更新2026-05-03 收录
下载链接:
https://hf-mirror.com/datasets/unlearning-cleanslate/generations-llama-3_1-8b-simnpo-gentle-bm25-10b
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: arc_challenge
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answerKey
dtype: string
- name: choices
struct:
- name: label
list: string
- name: text
list: string
- name: id
dtype: string
- name: question
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_4
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1903362
num_examples: 1172
download_size: 1729120
dataset_size: 1903362
- config_name: bbh_cot_fewshot_boolean_expressions
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 702750
num_examples: 250
download_size: 698677
dataset_size: 702750
- config_name: bbh_cot_fewshot_causal_judgement
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1219289
num_examples: 187
download_size: 1211993
dataset_size: 1219289
- config_name: bbh_cot_fewshot_date_understanding
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 525194
num_examples: 250
download_size: 512976
dataset_size: 525194
- config_name: bbh_cot_fewshot_disambiguation_qa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1344715
num_examples: 250
download_size: 1345727
dataset_size: 1344715
- config_name: bbh_cot_fewshot_dyck_languages
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1030667
num_examples: 250
download_size: 1027939
dataset_size: 1030667
- config_name: bbh_cot_fewshot_formal_fallacies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1770084
num_examples: 250
download_size: 1754710
dataset_size: 1770084
- config_name: bbh_cot_fewshot_geometric_shapes
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1732198
num_examples: 250
download_size: 1718433
dataset_size: 1732198
- config_name: bbh_cot_fewshot_hyperbaton
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1177419
num_examples: 250
download_size: 1175040
dataset_size: 1177419
- config_name: bbh_cot_fewshot_logical_deduction_five_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1155735
num_examples: 250
download_size: 1152779
dataset_size: 1155735
- config_name: bbh_cot_fewshot_logical_deduction_seven_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1292641
num_examples: 250
download_size: 1292845
dataset_size: 1292641
- config_name: bbh_cot_fewshot_logical_deduction_three_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1020418
num_examples: 250
download_size: 1016241
dataset_size: 1020418
- config_name: bbh_cot_fewshot_movie_recommendation
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 819721
num_examples: 250
download_size: 811347
dataset_size: 819721
- config_name: bbh_cot_fewshot_multistep_arithmetic_two
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 888563
num_examples: 250
download_size: 894321
dataset_size: 888563
- config_name: bbh_cot_fewshot_navigate
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 835492
num_examples: 250
download_size: 828277
dataset_size: 835492
- config_name: bbh_cot_fewshot_object_counting
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 565928
num_examples: 250
download_size: 554946
dataset_size: 565928
- config_name: bbh_cot_fewshot_penguins_in_a_table
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 587199
num_examples: 146
download_size: 595208
dataset_size: 587199
- config_name: bbh_cot_fewshot_reasoning_about_colored_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 927798
num_examples: 250
download_size: 921999
dataset_size: 927798
- config_name: bbh_cot_fewshot_ruin_names
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1228160
num_examples: 250
download_size: 1226825
dataset_size: 1228160
- config_name: bbh_cot_fewshot_salient_translation_error_detection
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 2375650
num_examples: 250
download_size: 2363042
dataset_size: 2375650
- config_name: bbh_cot_fewshot_snarks
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 798973
num_examples: 178
download_size: 804025
dataset_size: 798973
- config_name: bbh_cot_fewshot_sports_understanding
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 363387
num_examples: 250
download_size: 348134
dataset_size: 363387
- config_name: bbh_cot_fewshot_temporal_sequences
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1203747
num_examples: 250
download_size: 1201908
dataset_size: 1203747
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_five_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1248552
num_examples: 250
download_size: 1247249
dataset_size: 1248552
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_seven_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1469350
num_examples: 250
download_size: 1471312
dataset_size: 1469350
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_three_objects
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1081411
num_examples: 250
download_size: 1077768
dataset_size: 1081411
- config_name: bbh_cot_fewshot_web_of_lies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1099452
num_examples: 250
download_size: 1096497
dataset_size: 1099452
- config_name: bbh_cot_fewshot_word_sorting
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: input
dtype: string
- name: target
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1085005
num_examples: 250
download_size: 1092612
dataset_size: 1085005
- config_name: cleanslate_qa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: string
- name: content_id
dtype: string
- name: content_title
dtype: string
- name: question
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 8558746
num_examples: 12088
download_size: 7668431
dataset_size: 8558746
- config_name: coqa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: additional_answers
struct:
- name: '0'
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: '1'
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: '2'
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: answers
struct:
- name: input_text
list: string
- name: span_end
list: int64
- name: span_start
list: int64
- name: span_text
list: string
- name: turn_id
list: int64
- name: id
dtype: string
- name: questions
struct:
- name: input_text
list: string
- name: turn_id
list: int64
- name: source
dtype: string
- name: story
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: float64
- name: score
dtype: float64
splits:
- name: train
num_bytes: 5563780
num_examples: 500
download_size: 5567141
dataset_size: 5563780
- config_name: drop
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
struct:
- name: date
struct:
- name: day
dtype: string
- name: month
dtype: string
- name: year
dtype: string
- name: hit_id
dtype: string
- name: number
dtype: string
- name: spans
list: string
- name: worker_id
dtype: string
- name: answers
list:
list: string
- name: id
dtype: string
- name: passage
dtype: string
- name: query_id
dtype: string
- name: question
dtype: string
- name: section_id
dtype: string
- name: validated_answers
struct:
- name: date
list:
- name: day
dtype: string
- name: month
dtype: string
- name: year
dtype: string
- name: hit_id
list: string
- name: number
list: string
- name: spans
list:
list: string
- name: worker_id
list: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 29271434
num_examples: 9536
download_size: 27527509
dataset_size: 29271434
- config_name: gsm8k
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: string
- name: question
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 11673634
num_examples: 2638
download_size: 10738637
dataset_size: 11673634
- config_name: hellaswag
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: activity_label
dtype: string
- name: choices
list: string
- name: ctx
dtype: string
- name: ctx_a
dtype: string
- name: ctx_b
dtype: string
- name: endings
list: string
- name: gold
dtype: int64
- name: ind
dtype: int64
- name: label
dtype: string
- name: query
dtype: string
- name: source_id
dtype: string
- name: split
dtype: string
- name: split_type
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 39601751
num_examples: 10042
download_size: 38112584
dataset_size: 39601751
- config_name: humaneval_plus
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: canonical_solution
dtype: string
- name: entry_point
dtype: string
- name: prompt
dtype: string
- name: task_id
dtype: string
- name: test
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: max_gen_toks
dtype: int64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: 'null'
- name: score
dtype: float64
splits:
- name: train
num_bytes: 22087500
num_examples: 164
download_size: 14085076
dataset_size: 22087500
- config_name: lambada_openai
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: text
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 5116335
num_examples: 5153
download_size: 4754417
dataset_size: 5116335
- config_name: mmlu_abstract_algebra
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 187468
num_examples: 100
download_size: 189187
dataset_size: 187468
- config_name: mmlu_anatomy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 282133
num_examples: 135
download_size: 280304
dataset_size: 282133
- config_name: mmlu_astronomy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 366619
num_examples: 152
download_size: 363747
dataset_size: 366619
- config_name: mmlu_business_ethics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 256482
num_examples: 100
download_size: 259850
dataset_size: 256482
- config_name: mmlu_clinical_knowledge
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 557113
num_examples: 265
download_size: 535380
dataset_size: 557113
- config_name: mmlu_college_biology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 374037
num_examples: 144
download_size: 370937
dataset_size: 374037
- config_name: mmlu_college_chemistry
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 214798
num_examples: 100
download_size: 219123
dataset_size: 214798
- config_name: mmlu_college_computer_science
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 307989
num_examples: 100
download_size: 316825
dataset_size: 307989
- config_name: mmlu_college_mathematics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 215827
num_examples: 100
download_size: 217188
dataset_size: 215827
- config_name: mmlu_college_medicine
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 569122
num_examples: 173
download_size: 564588
dataset_size: 569122
- config_name: mmlu_college_physics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 242863
num_examples: 102
download_size: 245889
dataset_size: 242863
- config_name: mmlu_computer_security
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 226860
num_examples: 100
download_size: 228412
dataset_size: 226860
- config_name: mmlu_conceptual_physics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 419334
num_examples: 235
download_size: 400338
dataset_size: 419334
- config_name: mmlu_econometrics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 333941
num_examples: 114
download_size: 335255
dataset_size: 333941
- config_name: mmlu_electrical_engineering
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 261787
num_examples: 145
download_size: 255837
dataset_size: 261787
- config_name: mmlu_elementary_mathematics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 705194
num_examples: 378
download_size: 661851
dataset_size: 705194
- config_name: mmlu_formal_logic
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 360455
num_examples: 126
download_size: 361604
dataset_size: 360455
- config_name: mmlu_global_facts
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 180845
num_examples: 100
download_size: 182057
dataset_size: 180845
- config_name: mmlu_high_school_biology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 834830
num_examples: 310
download_size: 806582
dataset_size: 834830
- config_name: mmlu_high_school_chemistry
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 481748
num_examples: 203
download_size: 468028
dataset_size: 481748
- config_name: mmlu_high_school_computer_science
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 319150
num_examples: 100
download_size: 325486
dataset_size: 319150
- config_name: mmlu_high_school_european_history
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1511235
num_examples: 165
download_size: 1524877
dataset_size: 1511235
- config_name: mmlu_high_school_geography
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 394924
num_examples: 198
download_size: 381534
dataset_size: 394924
- config_name: mmlu_high_school_government_and_politics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 523964
num_examples: 193
download_size: 513535
dataset_size: 523964
- config_name: mmlu_high_school_macroeconomics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 962251
num_examples: 390
download_size: 922054
dataset_size: 962251
- config_name: mmlu_high_school_mathematics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 528952
num_examples: 270
download_size: 507450
dataset_size: 528952
- config_name: mmlu_high_school_microeconomics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 606479
num_examples: 238
download_size: 588927
dataset_size: 606479
- config_name: mmlu_high_school_physics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 437084
num_examples: 151
download_size: 439697
dataset_size: 437084
- config_name: mmlu_high_school_psychology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1308374
num_examples: 545
download_size: 1241227
dataset_size: 1308374
- config_name: mmlu_high_school_statistics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 756212
num_examples: 216
download_size: 743549
dataset_size: 756212
- config_name: mmlu_high_school_us_history
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1674950
num_examples: 204
download_size: 1686099
dataset_size: 1674950
- config_name: mmlu_high_school_world_history
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 2118959
num_examples: 237
download_size: 2123865
dataset_size: 2118959
- config_name: mmlu_human_aging
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 427400
num_examples: 223
download_size: 410299
dataset_size: 427400
- config_name: mmlu_human_sexuality
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 278812
num_examples: 131
download_size: 275974
dataset_size: 278812
- config_name: mmlu_international_law
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 378043
num_examples: 121
download_size: 383687
dataset_size: 378043
- config_name: mmlu_jurisprudence
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 266350
num_examples: 108
download_size: 266237
dataset_size: 266350
- config_name: mmlu_logical_fallacies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 399417
num_examples: 163
download_size: 394141
dataset_size: 399417
- config_name: mmlu_machine_learning
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 271120
num_examples: 112
download_size: 270962
dataset_size: 271120
- config_name: mmlu_management
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 190373
num_examples: 103
download_size: 191125
dataset_size: 190373
- config_name: mmlu_marketing
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 518913
num_examples: 234
download_size: 502259
dataset_size: 518913
- config_name: mmlu_medical_genetics
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 195128
num_examples: 100
download_size: 198654
dataset_size: 195128
- config_name: mmlu_miscellaneous
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1437668
num_examples: 783
download_size: 1340013
dataset_size: 1437668
- config_name: mmlu_moral_disputes
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 848482
num_examples: 346
download_size: 813587
dataset_size: 848482
- config_name: mmlu_moral_scenarios
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 2679030
num_examples: 895
download_size: 2563272
dataset_size: 2679030
- config_name: mmlu_nutrition
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 728120
num_examples: 306
download_size: 702878
dataset_size: 728120
- config_name: mmlu_philosophy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 673374
num_examples: 311
download_size: 645984
dataset_size: 673374
- config_name: mmlu_prehistory
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 732482
num_examples: 324
download_size: 702545
dataset_size: 732482
- config_name: mmlu_professional_accounting
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 888784
num_examples: 282
download_size: 865413
dataset_size: 888784
- config_name: mmlu_professional_law
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 10852640
num_examples: 1534
download_size: 10727957
dataset_size: 10852640
- config_name: mmlu_professional_medicine
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1341667
num_examples: 272
download_size: 1337088
dataset_size: 1341667
- config_name: mmlu_professional_psychology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1706835
num_examples: 612
download_size: 1633230
dataset_size: 1706835
- config_name: mmlu_public_relations
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 243678
num_examples: 110
download_size: 244681
dataset_size: 243678
- config_name: mmlu_security_studies
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 1246629
num_examples: 245
download_size: 1235379
dataset_size: 1246629
- config_name: mmlu_sociology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 506719
num_examples: 201
download_size: 494053
dataset_size: 506719
- config_name: mmlu_us_foreign_policy
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 233387
num_examples: 100
download_size: 235003
dataset_size: 233387
- config_name: mmlu_virology
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 337857
num_examples: 166
download_size: 331658
dataset_size: 337857
- config_name: mmlu_world_religions
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: int64
- name: choices
list: string
- name: question
dtype: string
- name: subject
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_2
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_3
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 280392
num_examples: 171
download_size: 270622
dataset_size: 280392
- config_name: triviaqa
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
struct:
- name: aliases
list: string
- name: matched_wiki_entity_name
dtype: string
- name: normalized_aliases
list: string
- name: normalized_matched_wiki_entity_name
dtype: string
- name: normalized_value
dtype: string
- name: type
dtype: string
- name: value
dtype: string
- name: entity_pages
struct:
- name: doc_source
list: 'null'
- name: filename
list: 'null'
- name: title
list: 'null'
- name: wiki_context
list: 'null'
- name: question
dtype: string
- name: question_id
dtype: string
- name: question_source
dtype: string
- name: search_results
struct:
- name: description
list: 'null'
- name: filename
list: 'null'
- name: rank
list: 'null'
- name: search_context
list: 'null'
- name: title
list: 'null'
- name: url
list: 'null'
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
struct:
- name: do_sample
dtype: bool
- name: temperature
dtype: float64
- name: until
list: string
- name: resps
list:
list: string
- name: filtered_resps
list: string
- name: filter
dtype: string
- name: metrics
list: string
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: bypass
dtype: float64
- name: score
dtype: float64
splits:
- name: train
num_bytes: 27643616
num_examples: 17944
download_size: 20921242
dataset_size: 27643616
- config_name: winogrande
features:
- name: doc_id
dtype: int64
- name: doc
struct:
- name: answer
dtype: string
- name: option1
dtype: string
- name: option2
dtype: string
- name: sentence
dtype: string
- name: target
dtype: string
- name: arguments
struct:
- name: gen_args_0
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: gen_args_1
struct:
- name: arg_0
dtype: string
- name: arg_1
dtype: string
- name: resps
list:
list:
list: string
- name: filtered_resps
list:
list: string
- name: filter
dtype: string
- name: metrics
list: 'null'
- name: doc_hash
dtype: string
- name: prompt_hash
dtype: string
- name: target_hash
dtype: string
- name: score
dtype: float64
splits:
- name: train
num_bytes: 981747
num_examples: 1267
download_size: 884659
dataset_size: 981747
configs:
- config_name: arc_challenge
data_files:
- split: train
path: arc_challenge/train-*
- config_name: bbh_cot_fewshot_boolean_expressions
data_files:
- split: train
path: bbh_cot_fewshot_boolean_expressions/train-*
- config_name: bbh_cot_fewshot_causal_judgement
data_files:
- split: train
path: bbh_cot_fewshot_causal_judgement/train-*
- config_name: bbh_cot_fewshot_date_understanding
data_files:
- split: train
path: bbh_cot_fewshot_date_understanding/train-*
- config_name: bbh_cot_fewshot_disambiguation_qa
data_files:
- split: train
path: bbh_cot_fewshot_disambiguation_qa/train-*
- config_name: bbh_cot_fewshot_dyck_languages
data_files:
- split: train
path: bbh_cot_fewshot_dyck_languages/train-*
- config_name: bbh_cot_fewshot_formal_fallacies
data_files:
- split: train
path: bbh_cot_fewshot_formal_fallacies/train-*
- config_name: bbh_cot_fewshot_geometric_shapes
data_files:
- split: train
path: bbh_cot_fewshot_geometric_shapes/train-*
- config_name: bbh_cot_fewshot_hyperbaton
data_files:
- split: train
path: bbh_cot_fewshot_hyperbaton/train-*
- config_name: bbh_cot_fewshot_logical_deduction_five_objects
data_files:
- split: train
path: bbh_cot_fewshot_logical_deduction_five_objects/train-*
- config_name: bbh_cot_fewshot_logical_deduction_seven_objects
data_files:
- split: train
path: bbh_cot_fewshot_logical_deduction_seven_objects/train-*
- config_name: bbh_cot_fewshot_logical_deduction_three_objects
data_files:
- split: train
path: bbh_cot_fewshot_logical_deduction_three_objects/train-*
- config_name: bbh_cot_fewshot_movie_recommendation
data_files:
- split: train
path: bbh_cot_fewshot_movie_recommendation/train-*
- config_name: bbh_cot_fewshot_multistep_arithmetic_two
data_files:
- split: train
path: bbh_cot_fewshot_multistep_arithmetic_two/train-*
- config_name: bbh_cot_fewshot_navigate
data_files:
- split: train
path: bbh_cot_fewshot_navigate/train-*
- config_name: bbh_cot_fewshot_object_counting
data_files:
- split: train
path: bbh_cot_fewshot_object_counting/train-*
- config_name: bbh_cot_fewshot_penguins_in_a_table
data_files:
- split: train
path: bbh_cot_fewshot_penguins_in_a_table/train-*
- config_name: bbh_cot_fewshot_reasoning_about_colored_objects
data_files:
- split: train
path: bbh_cot_fewshot_reasoning_about_colored_objects/train-*
- config_name: bbh_cot_fewshot_ruin_names
data_files:
- split: train
path: bbh_cot_fewshot_ruin_names/train-*
- config_name: bbh_cot_fewshot_salient_translation_error_detection
data_files:
- split: train
path: bbh_cot_fewshot_salient_translation_error_detection/train-*
- config_name: bbh_cot_fewshot_snarks
data_files:
- split: train
path: bbh_cot_fewshot_snarks/train-*
- config_name: bbh_cot_fewshot_sports_understanding
data_files:
- split: train
path: bbh_cot_fewshot_sports_understanding/train-*
- config_name: bbh_cot_fewshot_temporal_sequences
data_files:
- split: train
path: bbh_cot_fewshot_temporal_sequences/train-*
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_five_objects
data_files:
- split: train
path: bbh_cot_fewshot_tracking_shuffled_objects_five_objects/train-*
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_seven_objects
data_files:
- split: train
path: bbh_cot_fewshot_tracking_shuffled_objects_seven_objects/train-*
- config_name: bbh_cot_fewshot_tracking_shuffled_objects_three_objects
data_files:
- split: train
path: bbh_cot_fewshot_tracking_shuffled_objects_three_objects/train-*
- config_name: bbh_cot_fewshot_web_of_lies
data_files:
- split: train
path: bbh_cot_fewshot_web_of_lies/train-*
- config_name: bbh_cot_fewshot_word_sorting
data_files:
- split: train
path: bbh_cot_fewshot_word_sorting/train-*
- config_name: cleanslate_qa
data_files:
- split: train
path: cleanslate_qa/train-*
- config_name: coqa
data_files:
- split: train
path: coqa/train-*
- config_name: drop
data_files:
- split: train
path: drop/train-*
- config_name: gsm8k
data_files:
- split: train
path: gsm8k/train-*
- config_name: hellaswag
data_files:
- split: train
path: hellaswag/train-*
- config_name: humaneval_plus
data_files:
- split: train
path: humaneval_plus/train-*
- config_name: lambada_openai
data_files:
- split: train
path: lambada_openai/train-*
- config_name: mmlu_abstract_algebra
data_files:
- split: train
path: mmlu_abstract_algebra/train-*
- config_name: mmlu_anatomy
data_files:
- split: train
path: mmlu_anatomy/train-*
- config_name: mmlu_astronomy
data_files:
- split: train
path: mmlu_astronomy/train-*
- config_name: mmlu_business_ethics
data_files:
- split: train
path: mmlu_business_ethics/train-*
- config_name: mmlu_clinical_knowledge
data_files:
- split: train
path: mmlu_clinical_knowledge/train-*
- config_name: mmlu_college_biology
data_files:
- split: train
path: mmlu_college_biology/train-*
- config_name: mmlu_college_chemistry
data_files:
- split: train
path: mmlu_college_chemistry/train-*
- config_name: mmlu_college_computer_science
data_files:
- split: train
path: mmlu_college_computer_science/train-*
- config_name: mmlu_college_mathematics
data_files:
- split: train
path: mmlu_college_mathematics/train-*
- config_name: mmlu_college_medicine
data_files:
- split: train
path: mmlu_college_medicine/train-*
- config_name: mmlu_college_physics
data_files:
- split: train
path: mmlu_college_physics/train-*
- config_name: mmlu_computer_security
data_files:
- split: train
path: mmlu_computer_security/train-*
- config_name: mmlu_conceptual_physics
data_files:
- split: train
path: mmlu_conceptual_physics/train-*
- config_name: mmlu_econometrics
data_files:
- split: train
path: mmlu_econometrics/train-*
- config_name: mmlu_electrical_engineering
data_files:
- split: train
path: mmlu_electrical_engineering/train-*
- config_name: mmlu_elementary_mathematics
data_files:
- split: train
path: mmlu_elementary_mathematics/train-*
- config_name: mmlu_formal_logic
data_files:
- split: train
path: mmlu_formal_logic/train-*
- config_name: mmlu_global_facts
data_files:
- split: train
path: mmlu_global_facts/train-*
- config_name: mmlu_high_school_biology
data_files:
- split: train
path: mmlu_high_school_biology/train-*
- config_name: mmlu_high_school_chemistry
data_files:
- split: train
path: mmlu_high_school_chemistry/train-*
- config_name: mmlu_high_school_computer_science
data_files:
- split: train
path: mmlu_high_school_computer_science/train-*
- config_name: mmlu_high_school_european_history
data_files:
- split: train
path: mmlu_high_school_european_history/train-*
- config_name: mmlu_high_school_geography
data_files:
- split: train
path: mmlu_high_school_geography/train-*
- config_name: mmlu_high_school_government_and_politics
data_files:
- split: train
path: mmlu_high_school_government_and_politics/train-*
- config_name: mmlu_high_school_macroeconomics
data_files:
- split: train
path: mmlu_high_school_macroeconomics/train-*
- config_name: mmlu_high_school_mathematics
data_files:
- split: train
path: mmlu_high_school_mathematics/train-*
- config_name: mmlu_high_school_microeconomics
data_files:
- split: train
path: mmlu_high_school_microeconomics/train-*
- config_name: mmlu_high_school_physics
data_files:
- split: train
path: mmlu_high_school_physics/train-*
- config_name: mmlu_high_school_psychology
data_files:
- split: train
path: mmlu_high_school_psychology/train-*
- config_name: mmlu_high_school_statistics
data_files:
- split: train
path: mmlu_high_school_statistics/train-*
- config_name: mmlu_high_school_us_history
data_files:
- split: train
path: mmlu_high_school_us_history/train-*
- config_name: mmlu_high_school_world_history
data_files:
- split: train
path: mmlu_high_school_world_history/train-*
- config_name: mmlu_human_aging
data_files:
- split: train
path: mmlu_human_aging/train-*
- config_name: mmlu_human_sexuality
data_files:
- split: train
path: mmlu_human_sexuality/train-*
- config_name: mmlu_international_law
data_files:
- split: train
path: mmlu_international_law/train-*
- config_name: mmlu_jurisprudence
data_files:
- split: train
path: mmlu_jurisprudence/train-*
- config_name: mmlu_logical_fallacies
data_files:
- split: train
path: mmlu_logical_fallacies/train-*
- config_name: mmlu_machine_learning
data_files:
- split: train
path: mmlu_machine_learning/train-*
- config_name: mmlu_management
data_files:
- split: train
path: mmlu_management/train-*
- config_name: mmlu_marketing
data_files:
- split: train
path: mmlu_marketing/train-*
- config_name: mmlu_medical_genetics
data_files:
- split: train
path: mmlu_medical_genetics/train-*
- config_name: mmlu_miscellaneous
data_files:
- split: train
path: mmlu_miscellaneous/train-*
- config_name: mmlu_moral_disputes
data_files:
- split: train
path: mmlu_moral_disputes/train-*
- config_name: mmlu_moral_scenarios
data_files:
- split: train
path: mmlu_moral_scenarios/train-*
- config_name: mmlu_nutrition
data_files:
- split: train
path: mmlu_nutrition/train-*
- config_name: mmlu_philosophy
data_files:
- split: train
path: mmlu_philosophy/train-*
- config_name: mmlu_prehistory
data_files:
- split: train
path: mmlu_prehistory/train-*
- config_name: mmlu_professional_accounting
data_files:
- split: train
path: mmlu_professional_accounting/train-*
- config_name: mmlu_professional_law
data_files:
- split: train
path: mmlu_professional_law/train-*
- config_name: mmlu_professional_medicine
data_files:
- split: train
path: mmlu_professional_medicine/train-*
- config_name: mmlu_professional_psychology
data_files:
- split: train
path: mmlu_professional_psychology/train-*
- config_name: mmlu_public_relations
data_files:
- split: train
path: mmlu_public_relations/train-*
- config_name: mmlu_security_studies
data_files:
- split: train
path: mmlu_security_studies/train-*
- config_name: mmlu_sociology
data_files:
- split: train
path: mmlu_sociology/train-*
- config_name: mmlu_us_foreign_policy
data_files:
- split: train
path: mmlu_us_foreign_policy/train-*
- config_name: mmlu_virology
data_files:
- split: train
path: mmlu_virology/train-*
- config_name: mmlu_world_religions
data_files:
- split: train
path: mmlu_world_religions/train-*
- config_name: triviaqa
data_files:
- split: train
path: triviaqa/train-*
- config_name: winogrande
data_files:
- split: train
path: winogrande/train-*
---
提供机构:
unlearning-cleanslate



