open-llm-leaderboard-old/details_0-hero__Matter-0.2-7B-DPO
收藏数据集概述
数据集简介
该数据集是在对模型 0-hero/Matter-0.2-7B-DPO 进行评估运行期间自动创建的,用于 Open LLM Leaderboard。
数据集结构
- 配置数量:63个配置,每个配置对应一个评估任务。
- 数据来源:数据集从1次运行中创建,每个运行在每个配置中作为一个特定的分割存在,分割名称使用运行的时间戳。
- 最新结果:"train" 分割总是指向最新的结果。
- 结果汇总:一个额外的配置 "results" 存储所有运行的汇总结果,用于计算和显示 Open LLM Leaderboard 上的聚合指标。
数据加载示例
python from datasets import load_dataset data = load_dataset("open-llm-leaderboard/details_0-hero__Matter-0.2-7B-DPO", "harness_winogrande_5", split="train")
最新结果
这些是最新的结果,来自 2024-04-15T21:23:53.765896 运行:
python { "all": { "acc": 0.6304635219670391, "acc_stderr": 0.03247611955077603, "acc_norm": 0.6324699949079374, "acc_norm_stderr": 0.033134141472393366, "mc1": 0.3488372093023256, "mc1_stderr": 0.016684419859986893, "mc2": 0.5030082038523329, "mc2_stderr": 0.01531844462963793 }, "harness|arc:challenge|25": { "acc": 0.5989761092150171, "acc_stderr": 0.014322255790719867, "acc_norm": 0.6331058020477816, "acc_norm_stderr": 0.014084133118104303 }, "harness|hellaswag|10": { "acc": 0.6362278430591516, "acc_stderr": 0.004801009657690438, "acc_norm": 0.8316072495518821, "acc_norm_stderr": 0.003734498979207306 }, "harness|hendrycksTest-abstract_algebra|5": { "acc": 0.31, "acc_stderr": 0.04648231987117316, "acc_norm": 0.31, "acc_norm_stderr": 0.04648231987117316 }, "harness|hendrycksTest-anatomy|5": { "acc": 0.6370370370370371, "acc_stderr": 0.041539484047423976, "acc_norm": 0.6370370370370371, "acc_norm_stderr": 0.041539484047423976 }, "harness|hendrycksTest-astronomy|5": { "acc": 0.7039473684210527, "acc_stderr": 0.03715062154998904, "acc_norm": 0.7039473684210527, "acc_norm_stderr": 0.03715062154998904 }, "harness|hendrycksTest-business_ethics|5": { "acc": 0.6, "acc_stderr": 0.04923659639173309, "acc_norm": 0.6, "acc_norm_stderr": 0.04923659639173309 }, "harness|hendrycksTest-clinical_knowledge|5": { "acc": 0.6679245283018868, "acc_stderr": 0.02898545565233439, "acc_norm": 0.6679245283018868, "acc_norm_stderr": 0.02898545565233439 }, "harness|hendrycksTest-college_biology|5": { "acc": 0.7222222222222222, "acc_stderr": 0.03745554791462456, "acc_norm": 0.7222222222222222, "acc_norm_stderr": 0.03745554791462456 }, "harness|hendrycksTest-college_chemistry|5": { "acc": 0.42, "acc_stderr": 0.049604496374885836, "acc_norm": 0.42, "acc_norm_stderr": 0.049604496374885836 }, "harness|hendrycksTest-college_computer_science|5": { "acc": 0.48, "acc_stderr": 0.050211673156867795, "acc_norm": 0.48, "acc_norm_stderr": 0.050211673156867795 }, "harness|hendrycksTest-college_mathematics|5": { "acc": 0.37, "acc_stderr": 0.04852365870939099, "acc_norm": 0.37, "acc_norm_stderr": 0.04852365870939099 }, "harness|hendrycksTest-college_medicine|5": { "acc": 0.6127167630057804, "acc_stderr": 0.037143259063020656, "acc_norm": 0.6127167630057804, "acc_norm_stderr": 0.037143259063020656 }, "harness|hendrycksTest-college_physics|5": { "acc": 0.35294117647058826, "acc_stderr": 0.04755129616062946, "acc_norm": 0.35294117647058826, "acc_norm_stderr": 0.04755129616062946 }, "harness|hendrycksTest-computer_security|5": { "acc": 0.74, "acc_stderr": 0.0440844002276808, "acc_norm": 0.74, "acc_norm_stderr": 0.0440844002276808 }, "harness|hendrycksTest-conceptual_physics|5": { "acc": 0.5617021276595745, "acc_stderr": 0.03243618636108101, "acc_norm": 0.5617021276595745, "acc_norm_stderr": 0.03243618636108101 }, "harness|hendrycksTest-econometrics|5": { "acc": 0.5087719298245614, "acc_stderr": 0.047028804320496165, "acc_norm": 0.5087719298245614, "acc_norm_stderr": 0.047028804320496165 }, "harness|hendrycksTest-electrical_engineering|5": { "acc": 0.5586206896551724, "acc_stderr": 0.04137931034482758, "acc_norm": 0.5586206896551724, "acc_norm_stderr": 0.04137931034482758 }, "harness|hendrycksTest-elementary_mathematics|5": { "acc": 0.41798941798941797, "acc_stderr": 0.025402555503260912, "acc_norm": 0.41798941798941797, "acc_norm_stderr": 0.025402555503260912 }, "harness|hendrycksTest-formal_logic|5": { "acc": 0.4126984126984127, "acc_stderr": 0.04403438954768177, "acc_norm": 0.4126984126984127, "acc_norm_stderr": 0.04403438954768177 }, "harness|hendrycksTest-global_facts|5": { "acc": 0.37, "acc_stderr": 0.04852365870939099, "acc_norm": 0.37, "acc_norm_stderr": 0.04852365870939099 }, "harness|hendrycksTest-high_school_biology|5": { "acc": 0.7419354838709677, "acc_stderr": 0.02489246917246283, "acc_norm": 0.7419354838709677, "acc_norm_stderr": 0.02489246917246283 }, "harness|hendrycksTest-high_school_chemistry|5": { "acc": 0.47783251231527096, "acc_stderr": 0.035145285621750094, "acc_norm": 0.47783251231527096, "acc_norm_stderr": 0.035145285621750094 }, "harness|hendrycksTest-high_school_computer_science|5": { "acc": 0.75, "acc_stderr": 0.04351941398892446, "acc_norm": 0.75, "acc_norm_stderr": 0.04351941398892446 }, "harness|hendrycksTest-high_school_european_history|5": { "acc": 0.7393939393939394, "acc_stderr": 0.034277431758165236, "acc_norm": 0.7393939393939394, "acc_norm_stderr": 0.034277431758165236 }, "harness|hendrycksTest-high_school_geography|5": { "acc": 0.8080808080808081, "acc_stderr": 0.02805779167298902, "acc_norm": 0.8080808080808081, "acc_norm_stderr": 0.02805779167298902 }, "harness|hendrycksTest-high_school_government_and_politics|5": { "acc": 0.8601036269430051, "acc_stderr": 0.025033870583015178, "acc_norm": 0.8601036269430051, "acc_norm_stderr": 0.025033870583015178 }, "harness|hendrycksTest-high_school_macroeconomics|5": { "acc": 0.617948717948718, "acc_stderr": 0.024635549163908234, "acc_norm": 0.617948717948718, "acc_norm_stderr": 0.024635549163908234 }, "harness|hendrycksTest-high_school_mathematics|5": { "acc": 0.3037037037037037, "acc_stderr": 0.028037929969114993, "acc_norm": 0.3037037037037037, "acc_norm_stderr": 0.028037929969114993 }, "harness|hendrycksTest-high_school_microeconomics|5": { "acc": 0.6386554621848739, "acc_stderr": 0.03120469122515002, "acc_norm": 0.6386554621848739, "acc_norm_stderr": 0.03120469122515002 }, "harness|hendrycksTest-high_school_physics|5": { "acc": 0.31788079470198677, "acc_stderr": 0.03802039760107903, "acc_norm": 0.31788079470198677, "acc_norm_stderr": 0.03802039760107903 }, "harness|hendrycksTest-high_school_psychology|5": { "acc": 0.8201834862385321, "acc_stderr": 0.01646534546739152, "acc_norm": 0.8201834862385321, "acc_norm_stderr": 0.01646534546739152 }, "harness|hendrycksTest-high_school_statistics|5": { "acc": 0.4583333333333333, "acc_stderr": 0.03398110890294636, "acc_norm": 0.4583333333333333, "acc_norm_stderr": 0.03398110890294636 }, "harness|hendrycksTest-high_school_us_history|5": { "acc": 0.8088235294117647, "acc_stderr": 0.027599174300640766, "acc_norm": 0.8088235294117647, "acc_norm_stderr": 0.027599174300640766 }, "harness|hendrycksTest-high_school_world_history|5": { "acc": 0.8016877637130801, "acc_stderr": 0.02595502084162113, "acc_norm": 0.8016877637130801, "acc_norm_stderr": 0.02595502084162113 }, "harness|hendrycksTest-human_aging|5": { "acc": 0.6905829596412556, "acc_stderr": 0.031024411740572206, "acc_norm": 0.6905829596412556, "acc_norm_stderr": 0.031024411740572206 }, "harness|hendrycksTest-human_sexuality|5": { "acc": 0.7786259541984732, "acc_stderr": 0.03641297081313731, "acc_norm": 0.7786259541984732, "acc_norm_stderr": 0.03641297081313731 }, "harness|hendrycksTest-international_law|5": { "acc": 0.7355371900826446, "acc_stderr": 0.04026187527591207, "acc_norm": 0.7355371900826446, "acc_norm_stderr": 0.04026187527591207 }, "harness|hendrycksTest-jurisprudence|5": { "acc": 0.7777777777777778, "acc_stderr": 0.0401910747255735, "acc_norm": 0.7777777777777778, "acc_norm_stderr": 0.0401910747255735 }, "harness|hendrycksTest-logical_fallacies|5": { "acc": 0.7300613496932515, "acc_stderr": 0.03487825168497892, "acc_norm": 0.7300613496932515, "acc_norm_stderr": 0.03487825168497892 }, "harness|hendrycksTest-machine_learning|5": { "acc": 0.4732142857142857, "acc_stderr": 0.047389751192741546, "acc_norm": 0.4732142857142857, "acc_norm_stderr": 0.047389751192741546 }, "harness|hendrycksTest-management|5": { "acc": 0.8155339805825242, "acc_stderr": 0.03840423627288276, "acc_norm": 0.8155339805825242, "acc_norm_stderr": 0.03840423627288276 }, "harness|hendrycksTest-marketing|5": { "acc": 0.8632478632478633, "acc_stderr": 0.022509033937077795, "acc_norm": 0.8632478632478633, "acc_norm_stderr": 0.022509033937077795 }, "harness|hendrycksTest-medical_genetics|5": { "acc": 0.72, "acc_stderr": 0.045126085985421276, "acc_norm": 0.72, "acc_norm_stderr": 0.045126085985421276 }, "harness|hendrycksTest-miscellaneous|5": { "acc": 0.7956577266922095, "acc_stderr": 0.014419123980931894, "acc_norm": 0.7956577266922095, "acc_norm_stderr": 0.014419123980931894 }, "harness|hendrycksTest-moral_disputes|5": { "acc": 0.7196531791907514, "acc_stderr": 0.024182427496577612, "acc_norm": 0.7196531791907514, "acc_norm_stderr": 0.024182427496577612 }, "harness|hendrycksTest-moral_scenarios|5": { "acc": 0.41564245810055866, "acc_stderr": 0.016482782187500673, "acc_norm": 0.41564245810055866, "acc_norm_stderr": 0.016482782187500673 }, "harness|hendrycksTest-nutrition|5": { "acc": 0.7058823529411765, "acc_stderr": 0.026090162504279053, "acc_norm": 0.7058823529411765, "acc_norm_stderr": 0.026090162504279053 }, "harness|hendrycksTest-philosophy|5": { "acc": 0.7041800643086816, "acc_stderr": 0.025922371788818763, "acc_norm": 0.7041800643086816, "acc_norm_stderr": 0.025922371788818763 }, "harness|hendrycksTest-prehistory|5": { "acc": 0.7006172839506173, "acc_stderr": 0.025483115601195448, "acc_norm": 0.7006172839506173, "acc_norm_stderr": 0.025483115601195448 }, "harness|hendrycksTest-professional_accounting|5": { "acc": 0.48936170212765956, "acc_stderr": 0.029820747191422466, "acc_norm": 0.48936170212765956, "acc_norm_stderr": 0.029820747191422466 }, "harness|hendrycksTest-professional_law|5": { "acc": 0.4641460234680574, "acc_stderr": 0.012737361318730581, "acc_norm": 0.4641460234680574, "acc_norm_stderr": 0.012737361318730581 }, "harness|hendrycksTest-professional_medicine|5": { "acc": 0.5955882352941176, "acc_stderr": 0.029812630701569743, "acc_norm": 0.5955882352941176, "acc_norm_stderr": 0.029812630701569743 }, "harness|hendrycksTest-professional_psychology|5": { "acc": 0.6748366013071896, "acc_stderr": 0.01895088677080631, "acc_norm": 0.6748366013071896, "acc_norm_stderr": 0.01895088677080631 }, "harness|hendrycksTest-public_relations|5": { "acc": 0.6636363636363637, "acc_stderr": 0.04525393596302506, "acc_norm": 0.6636363636363637, "acc_norm_stderr": 0.04525393596302506 }, "harness|hendrycksTest-security_studies|5": { "acc": 0.7020408163265306, "acc_stderr": 0.02927956741106568, "acc_norm": 0.7020408163265306, "acc_norm_stderr": 0.02927956741106568 }, "harness|hendrycksTest-sociology|5": { "acc": 0.8208955223880597, "acc_stderr": 0.027113286753111837, "acc_norm": 0.8208955223880597, "acc_norm_stderr": 0.027113286753111837 }, "harness|hendrycksTest-us_foreign_policy|5": { "acc": 0.88, "acc_stderr": 0.03265986323710906, "acc_norm": 0.88, "acc_norm_stderr": 0.03265986323710906 }, "harness|hendrycksTest-virology|5": { "acc": 0.536144578313253, "acc_stderr": 0.038823108508905954, "acc_norm": 0.536144578313253, "acc_norm_stderr": 0.038823108508905954 }, "harness|hendrycksTest-world_religions|5": { "acc": 0.8070175438596491, "acc_stderr": 0.030267457554898458, "acc_norm": 0.8070175438596491, "acc_norm_stderr": 0.030267457554898458 }, "harness|truthfulqa:mc|0": { "mc1": 0.3488372093023256, "mc1_stderr": 0.016684419859986893, "mc2": 0.5030082038523329, "mc2_stderr": 0.01531844462963793 }, "harness|winogrande|5": { "acc": 0.8026835043409629, "acc_stderr": 0.011185026389050374 }, "harness|gsm8k|5": { "acc": 0.5693707354056103, "acc_stderr": 0.013639285985979927 } }
配置详情
-
harness_arc_challenge_25
- 分割:2024_04_15T21_23_53.765896
- 路径:
**/details_harness|arc:challenge|25_2024-04-15T21-23-53.765896.parquet
- 路径:
- 分割:latest
- 路径:
**/details_harness|arc:challenge|25_2024-04-15T21-23-53.765896.parquet
- 路径:
- 分割:2024_04_15T21_23_53.765896
-
harness_gsm8k_5
- 分割:2024_04_15T21_23_53.765896
- 路径:
**/details_harness|gsm8k|5_2024-04-15T21-23-53.765896.parquet
- 路径:
- 分割:latest
- 路径:
**/details_harness|gsm8k|5_2024-04-15T21-23-53.765896.parquet
- 路径:
- 分割:2024_04_15T21_23_53.765896
-
harness_hellaswag_10
- 分割:2024_04_15T21_23_53.765896
- 路径:
**/details_harness|hellaswag|10_2024-04-15T21-23-53.765896.parquet
- 路径:
- 分割:latest
- 路径:
**/details_harness|hellaswag|10_2024-04-15T21-23-53.765896.parquet
- 路径:
- 分割:2024_04_15T21_23_53.765896
-
harness_hendrycksTest_5
- 分割:2024_04_15T21_23_53.765896
- 路径:
**/details_harness|hendrycksTest-abstract_algebra|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-anatomy|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-astronomy|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-business_ethics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-clinical_knowledge|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-college_biology|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-college_chemistry|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-college_computer_science|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-college_mathematics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-college_medicine|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-college_physics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-computer_security|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-conceptual_physics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-econometrics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-electrical_engineering|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-elementary_mathematics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-formal_logic|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-global_facts|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_biology|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_chemistry|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_computer_science|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_european_history|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_geography|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_government_and_politics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_macroeconomics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_mathematics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_microeconomics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_physics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_psychology|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_statistics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_us_history|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_world_history|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-human_aging|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-human_sexuality|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-international_law|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-jurisprudence|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-logical_fallacies|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-machine_learning|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-management|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-marketing|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-medical_genetics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-miscellaneous|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-moral_disputes|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-moral_scenarios|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-nutrition|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-philosophy|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-prehistory|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-professional_accounting|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-professional_law|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-professional_medicine|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-professional_psychology|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-public_relations|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-security_studies|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-sociology|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-us_foreign_policy|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-virology|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-world_religions|5_2024-04-15T21-23-53.765896.parquet
- 路径:
- 分割:latest
- 路径:
**/details_harness|hendrycksTest-abstract_algebra|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-anatomy|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-astronomy|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-business_ethics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-clinical_knowledge|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-college_biology|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-college_chemistry|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-college_computer_science|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-college_mathematics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-college_medicine|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-college_physics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-computer_security|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-conceptual_physics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-econometrics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-electrical_engineering|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-elementary_mathematics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-formal_logic|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-global_facts|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_biology|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_chemistry|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_computer_science|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_european_history|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_geography|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_government_and_politics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_macroeconomics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_mathematics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_microeconomics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_physics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_psychology|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_statistics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_us_history|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-high_school_world_history|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-human_aging|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-human_sexuality|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-international_law|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-jurisprudence|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-logical_fallacies|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-machine_learning|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-management|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-marketing|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-medical_genetics|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-miscellaneous|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-moral_disputes|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-moral_scenarios|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-nutrition|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-philosophy|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-prehistory|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-professional_accounting|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-professional_law|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-professional_medicine|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-professional_psychology|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-public_relations|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-security_studies|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-sociology|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-us_foreign_policy|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-virology|5_2024-04-15T21-23-53.765896.parquet**/details_harness|hendrycksTest-world_religions|5_2024-04-15T21-23-53.765896.parquet
- 路径:
- 分割:2024_04_15T21_23_53.765896



