open-llm-leaderboard-old/details_Kabster__BioMistral-Zephyr-Beta-SLERP
收藏数据集概述
数据集摘要
该数据集是在评估模型 Kabster/BioMistral-Zephyr-Beta-SLERP 在 Open LLM Leaderboard 上的自动创建的。
数据集组成
- 数据集包含 63 个配置,每个配置对应一个评估任务。
- 数据集从 1 次运行中创建,每个运行可以在每个配置中找到特定的分割,分割名称使用运行的时间戳。
- "train" 分割始终指向最新结果。
- 额外的 "results" 配置存储所有运行结果的聚合,用于计算和显示在 Open LLM Leaderboard 上的聚合指标。
数据加载示例
python from datasets import load_dataset data = load_dataset("open-llm-leaderboard/details_Kabster__BioMistral-Zephyr-Beta-SLERP", "harness_winogrande_5", split="train")
最新结果
以下是 2024-03-09T23:17:12.005512 运行 的最新结果:
python { "all": { "acc": 0.5995043159633443, "acc_stderr": 0.033015404417283706, "acc_norm": 0.6105459399539238, "acc_norm_stderr": 0.03391082208761106, "mc1": 0.3806609547123623, "mc1_stderr": 0.01699762787190792, "mc2": 0.5460488636416867, "mc2_stderr": 0.015366957850368226 }, "harness|arc:challenge|25": { "acc": 0.5844709897610921, "acc_stderr": 0.014401366641216386, "acc_norm": 0.621160409556314, "acc_norm_stderr": 0.014175915490000326 }, "harness|hellaswag|10": { "acc": 0.6591316470822546, "acc_stderr": 0.004730324556624127, "acc_norm": 0.8412666799442342, "acc_norm_stderr": 0.003646803899770339 }, "harness|hendrycksTest-abstract_algebra|5": { "acc": 0.34, "acc_stderr": 0.04760952285695235, "acc_norm": 0.34, "acc_norm_stderr": 0.04760952285695235 }, "harness|hendrycksTest-anatomy|5": { "acc": 0.5777777777777777, "acc_stderr": 0.04266763404099582, "acc_norm": 0.5777777777777777, "acc_norm_stderr": 0.04266763404099582 }, "harness|hendrycksTest-astronomy|5": { "acc": 0.631578947368421, "acc_stderr": 0.03925523381052932, "acc_norm": 0.631578947368421, "acc_norm_stderr": 0.03925523381052932 }, "harness|hendrycksTest-business_ethics|5": { "acc": 0.59, "acc_stderr": 0.049431107042371025, "acc_norm": 0.59, "acc_norm_stderr": 0.049431107042371025 }, "harness|hendrycksTest-clinical_knowledge|5": { "acc": 0.6981132075471698, "acc_stderr": 0.02825420034443865, "acc_norm": 0.6981132075471698, "acc_norm_stderr": 0.02825420034443865 }, "harness|hendrycksTest-college_biology|5": { "acc": 0.6944444444444444, "acc_stderr": 0.03852084696008534, "acc_norm": 0.6944444444444444, "acc_norm_stderr": 0.03852084696008534 }, "harness|hendrycksTest-college_chemistry|5": { "acc": 0.44, "acc_stderr": 0.04988876515698589, "acc_norm": 0.44, "acc_norm_stderr": 0.04988876515698589 }, "harness|hendrycksTest-college_computer_science|5": { "acc": 0.5, "acc_stderr": 0.050251890762960605, "acc_norm": 0.5, "acc_norm_stderr": 0.050251890762960605 }, "harness|hendrycksTest-college_mathematics|5": { "acc": 0.36, "acc_stderr": 0.048241815132442176, "acc_norm": 0.36, "acc_norm_stderr": 0.048241815132442176 }, "harness|hendrycksTest-college_medicine|5": { "acc": 0.6069364161849711, "acc_stderr": 0.0372424959581773, "acc_norm": 0.6069364161849711, "acc_norm_stderr": 0.0372424959581773 }, "harness|hendrycksTest-college_physics|5": { "acc": 0.4215686274509804, "acc_stderr": 0.04913595201274498, "acc_norm": 0.4215686274509804, "acc_norm_stderr": 0.04913595201274498 }, "harness|hendrycksTest-computer_security|5": { "acc": 0.72, "acc_stderr": 0.04512608598542129, "acc_norm": 0.72, "acc_norm_stderr": 0.04512608598542129 }, "harness|hendrycksTest-conceptual_physics|5": { "acc": 0.5106382978723404, "acc_stderr": 0.03267862331014063, "acc_norm": 0.5106382978723404, "acc_norm_stderr": 0.03267862331014063 }, "harness|hendrycksTest-econometrics|5": { "acc": 0.45614035087719296, "acc_stderr": 0.04685473041907789, "acc_norm": 0.45614035087719296, "acc_norm_stderr": 0.04685473041907789 }, "harness|hendrycksTest-electrical_engineering|5": { "acc": 0.5172413793103449, "acc_stderr": 0.04164188720169375, "acc_norm": 0.5172413793103449, "acc_norm_stderr": 0.04164188720169375 }, "harness|hendrycksTest-elementary_mathematics|5": { "acc": 0.37037037037037035, "acc_stderr": 0.024870815251057093, "acc_norm": 0.37037037037037035, "acc_norm_stderr": 0.024870815251057093 }, "harness|hendrycksTest-formal_logic|5": { "acc": 0.40476190476190477, "acc_stderr": 0.04390259265377562, "acc_norm": 0.40476190476190477, "acc_norm_stderr": 0.04390259265377562 }, "harness|hendrycksTest-global_facts|5": { "acc": 0.34, "acc_stderr": 0.04760952285695236, "acc_norm": 0.34, "acc_norm_stderr": 0.04760952285695236 }, "harness|hendrycksTest-high_school_biology|5": { "acc": 0.7161290322580646, "acc_stderr": 0.02564938106302925, "acc_norm": 0.7161290322580646, "acc_norm_stderr": 0.02564938106302925 }, "harness|hendrycksTest-high_school_chemistry|5": { "acc": 0.4630541871921182, "acc_stderr": 0.035083705204426656, "acc_norm": 0.4630541871921182, "acc_norm_stderr": 0.035083705204426656 }, "harness|hendrycksTest-high_school_computer_science|5": { "acc": 0.63, "acc_stderr": 0.04852365870939099, "acc_norm": 0.63, "acc_norm_stderr": 0.04852365870939099 }, "harness|hendrycksTest-high_school_european_history|5": { "acc": 0.7090909090909091, "acc_stderr": 0.03546563019624336, "acc_norm": 0.7090909090909091, "acc_norm_stderr": 0.03546563019624336 }, "harness|hendrycksTest-high_school_geography|5": { "acc": 0.7676767676767676, "acc_stderr": 0.03008862949021749, "acc_norm": 0.7676767676767676, "acc_norm_stderr": 0.03008862949021749 }, "harness|hendrycksTest-high_school_government_and_politics|5": { "acc": 0.8238341968911918, "acc_stderr": 0.027493504244548057, "acc_norm": 0.8238341968911918, "acc_norm_stderr": 0.027493504244548057 }, "harness|hendrycksTest-high_school_macroeconomics|5": { "acc": 0.6333333333333333, "acc_stderr": 0.02443301646605247, "acc_norm": 0.6333333333333333, "acc_norm_stderr": 0.02443301646605247 }, "harness|hendrycksTest-high_school_mathematics|5": { "acc": 0.32592592592592595, "acc_stderr": 0.02857834836547308, "acc_norm": 0.32592592592592595, "acc_norm_stderr": 0.02857834836547308 }, "harness|hendrycksTest-high_school_microeconomics|5": { "acc": 0.6890756302521008, "acc_stderr": 0.030066761582977927, "acc_norm": 0.6890756302521008, "acc_norm_stderr": 0.030066761582977927 }, "harness|hendrycksTest-high_school_physics|5": { "acc": 0.3576158940397351, "acc_stderr": 0.03913453431177258, "acc_norm": 0.3576158940397351, "acc_norm_stderr": 0.03913453431177258 }, "harness|hendrycksTest-high_school_psychology|5": { "acc": 0.7743119266055046, "acc_stderr": 0.017923087667803064, "acc_norm": 0.7743119266055046, "acc_norm_stderr": 0.017923087667803064 }, "harness|hendrycksTest-high_school_statistics|5": { "acc": 0.5092592592592593, "acc_stderr": 0.034093869469927006, "acc_norm": 0.5092592592592593, "acc_norm_stderr": 0.034093869469927006 }, "harness|hendrycksTest-high_school_us_history|5": { "acc": 0.7598039215686274, "acc_stderr": 0.02998373305591361, "acc_norm": 0.7598039215686274, "acc_norm_stderr": 0.02998373305591361 }, "harness|hendrycksTest-high_school_world_history|5": { "acc": 0.7552742616033755, "acc_stderr": 0.02798569938703643, "acc_norm": 0.7552742616033755, "acc_norm_stderr": 0.02798569938703643 }, "harness|hendrycksTest-human_aging|5": { "acc": 0.6412556053811659, "acc_stderr": 0.032190792004199956, "acc_norm": 0.6412556053811659, "acc_norm_stderr": 0.032190792004199956 }, "harness|hendrycksTest-human_sexuality|5": { "acc": 0.7480916030534351, "acc_stderr": 0.03807387116306086, "acc_norm": 0.7480916030534351, "acc_norm_stderr": 0.03807387116306086 }, "harness|hendrycksTest-international_law|5": { "acc": 0.71900826446281, "acc_stderr": 0.04103203830514511, "acc_norm": 0.71900826446281, "acc_norm_stderr": 0.04103203830514511 }, "harness|hendrycksTest-jurisprudence|5": { "acc": 0.75, "acc_stderr": 0.04186091791394607, "acc_norm": 0.75, "acc_norm_stderr": 0.04186091791394607 }, "harness|hendrycksTest-logical_fallacies|5": { "acc": 0.7177914110429447, "acc_stderr": 0.03536117886664743, "acc_norm": 0.7177914110429447, "acc_norm_stderr": 0.03536117886664743 }, "harness|hendrycksTest-machine_learning|5": { "acc": 0.4107142857142857, "acc_stderr": 0.046695106638751906, "acc_norm": 0.4107142857142857, "acc_norm_stderr": 0.046695106638751906 }, "harness|hendrycksTest-management|5": { "acc": 0.7378640776699029, "acc_stderr": 0.043546310772605956, "acc_norm": 0.7378640776699029, "acc_norm_stderr": 0.043546310772605956 }, "harness|hendrycksTest-marketing|5": { "acc": 0.8333333333333334, "acc_stderr": 0.024414947304543674, "acc_norm": 0.8333333333333334, "acc_norm_stderr": 0.024414947304543674 }, "harness|hendrycksTest-medical_genetics|5": { "acc": 0.7, "acc_stderr": 0.046056618647183814, "acc_norm": 0.7, "acc_norm_stderr": 0.046056618647183814 }, "harness|hendrycksTest-miscellaneous|5": { "acc": 0.7726692209450831, "acc_stderr": 0.014987270640946012, "acc_norm": 0.7726692209450831, "acc_norm_stderr": 0.014987270640946012 }, "harness|hendrycksTest-moral_disputes|5": { "acc": 0.6907514450867052, "acc_stderr": 0.02488314057007176, "acc_norm": 0.6907514450867052, "acc_norm_stderr": 0.02488314057007176 }, "harness|hendrycksTest-moral_scenarios|5": { "acc": 0.28268156424581004, "acc_stderr": 0.0150603817300181, "acc_norm": 0.28268156424581004, "acc_norm_stderr": 0.0150603817300181 }, "harness|hendrycksTest-nutrition|5": { "acc": 0.6993464052287581, "acc_stderr": 0.02625605383571896, "acc_norm": 0.6993464052287581, "acc_norm_stderr": 0.02625605383571896 }, "harness|hendrycksTest-philosophy|5": { "acc": 0.6752411575562701, "acc_stderr": 0.026596782287697043, "acc_norm": 0.6752411575562701, "acc_norm_stderr": 0.026596782287697043 }, "harness|hendrycksTest-prehistory|5": { "acc": 0.6820987654320988, "acc_stderr": 0.02591006352824088, "acc_norm": 0.6820987654320988, "acc_norm_stderr": 0.02591006352824088 }, "harness|hendrycksTest-professional_accounting|5": { "acc": 0.42907801418439717, "acc_stderr": 0.029525914302558562, "acc_norm": 0.42907801418439717, "acc_norm_stderr": 0.029525914302558562 }, "harness|hendrycksTest-professional_law|5": { "acc": 0.438722294654498, "acc_stderr": 0.012673969883493272, "acc_norm": 0.438722294654498, "acc_norm_stderr": 0.012673969883493272 }, "harness|hendrycksTest-professional_medicine|5": { "acc": 0.6323529411764706, "acc_stderr": 0.029289413409403192, "acc_norm": 0.6323529411764706, "acc_norm_stderr": 0.029289413409403192 }, "harness|hendrycksTest-professional_psychology|5": { "acc": 0.6160130718954249, "acc_stderr": 0.019675808135281504, "acc_norm": 0.6160130718954249, "acc_norm_stderr": 0.019675808135281504 }, "harness|hendrycksTest-public_relations|5": { "acc": 0.6818181818181818, "acc_stderr": 0.04461272175910509, "acc_norm": 0.6818181818181818, "acc_norm_stderr": 0.04461272175910509 }, "harness|hendrycksTest-security_studies|5": { "acc": 0.7183673469387755, "acc_stderr": 0.028795185574291293, "acc_norm": 0.7183673469387755, "acc_norm_stderr": 0.028795185574291293 }, "harness|hendrycksTest-sociology|5": { "acc": 0.7661691542288557, "acc_stderr": 0.029929415408348384, "acc_norm": 0.7661691542288557, "acc_norm_stderr": 0.029929415408348384 }, "harness|hendrycksTest-us_foreign_policy|5": { "acc": 0.85, "acc_stderr": 0.035887028128263734, "acc_norm": 0.85, "acc_norm_stderr": 0.035887028128263734 }, "harness|hendrycksTest-virology|5": { "acc": 0.5180722891566265, "acc_stderr": 0.03889951252827216, "acc_norm": 0.5180722891566265, "acc_norm_stderr": 0.03889951252827216 }, "harness|hendrycksTest-world_religions|5": { "acc": 0.7543859649122807, "acc_stderr": 0.0330140594698725, "acc_norm": 0.7543859649122807, "acc_norm_stderr": 0.0330140594698725 }, "harness|truthfulqa:mc|0": { "mc1": 0.3806609547123623, "mc1_stderr": 0.01699762787190792, "mc2": 0.5460488636416867, "mc2_stderr": 0.015366957850368226 }, "harness|winogrande|5": { "acc": 0.7663772691397001, "acc_stderr": 0.011892194477183524 }, "harness|gsm8k|5": { "acc": 0.0, "acc_stderr": 0.0 } }
配置详情
-
config_name: harness_arc_challenge_25
- 分割: 2024_03_09T23_17_12.005512
- 路径:
**/details_harness|arc:challenge|25_2024-03-09T23-17-12.005512.parquet
- 路径:
- 分割: latest
- 路径:
**/details_harness|arc:challenge|25_2024-03-09T23-17-12.005512.parquet
- 路径:
- 分割: 2024_03_09T23_17_12.005512
-
config_name: harness_gsm8k_5
- 分割: 2024_03_09T23_17_12.005512
- 路径:
**/details_harness|gsm8k|5_2024-03-09T23-17-12.005512.parquet
- 路径:
- 分割: latest
- 路径:
**/details_harness|gsm8k|5_2024-03-09T23-17-12.005512.parquet
- 路径:
- 分割: 2024_03_09T23_17_12.005512
-
config_name: harness_hellaswag_10
- 分割: 2024_03_09T23_17_12.005512
- 路径:
**/details_harness|hellaswag|10_2024-03-09T23-17-12.005512.parquet
- 路径:
- 分割: latest
- 路径:
**/details_harness|hellaswag|10_2024-03-09T23-17-12.005512.parquet
- 路径:
- 分割: 2024_03_09T23_17_12.005512
-
config_name: harness_hendrycksTest_5
- 分割: 2024_03_09T23_17_12.005512
- 路径:
**/details_harness|hendrycksTest-abstract_algebra|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-anatomy|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-astronomy|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-business_ethics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-clinical_knowledge|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-college_biology|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-college_chemistry|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-college_computer_science|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-college_mathematics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-college_medicine|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-college_physics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-computer_security|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-conceptual_physics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-econometrics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-electrical_engineering|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-elementary_mathematics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-formal_logic|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-global_facts|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_biology|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_chemistry|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_computer_science|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_european_history|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_geography|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_government_and_politics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_macroeconomics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_mathematics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_microeconomics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_physics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_psychology|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_statistics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_us_history|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_world_history|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-human_aging|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-human_sexuality|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-international_law|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-jurisprudence|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-logical_fallacies|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-machine_learning|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-management|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-marketing|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-medical_genetics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-miscellaneous|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-moral_disputes|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-moral_scenarios|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-nutrition|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-philosophy|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-prehistory|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-professional_accounting|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-professional_law|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-professional_medicine|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-professional_psychology|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-public_relations|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-security_studies|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-sociology|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-us_foreign_policy|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-virology|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-world_religions|5_2024-03-09T23-17-12.005512.parquet
- 路径:
- 分割: latest
- 路径:
**/details_harness|hendrycksTest-abstract_algebra|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-anatomy|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-astronomy|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-business_ethics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-clinical_knowledge|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-college_biology|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-college_chemistry|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-college_computer_science|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-college_mathematics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-college_medicine|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-college_physics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-computer_security|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-conceptual_physics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-econometrics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-electrical_engineering|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-elementary_mathematics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-formal_logic|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-global_facts|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_biology|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_chemistry|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_computer_science|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_european_history|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_geography|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_government_and_politics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_macroeconomics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_mathematics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_microeconomics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_physics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_psychology|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_statistics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_us_history|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-high_school_world_history|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-human_aging|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-human_sexuality|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-international_law|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-jurisprudence|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-logical_fallacies|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-machine_learning|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-management|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-marketing|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-medical_genetics|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-miscellaneous|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-moral_disputes|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-moral_scenarios|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-nutrition|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-philosophy|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-prehistory|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-professional_accounting|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-professional_law|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-professional_medicine|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-professional_psychology|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-public_relations|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-security_studies|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-sociology|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-us_foreign_policy|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-virology|5_2024-03-09T23-17-12.005512.parquet**/details_harness|hendrycksTest-world_religions|5_2024-03-09T23-17-12.005512.parquet
- 路径:
- 分割: 2024_03_09T23_17_12.005512



