open-llm-leaderboard-old/details_Lexic0n__Mistral_7B-Open_Hermes-NSFWV1
收藏数据集概述
数据集简介
该数据集是在对模型 Lexic0n/Mistral_7B-Open_Hermes-NSFWV1 进行评估运行期间自动创建的,用于 Open LLM Leaderboard。
数据集组成
数据集包含 63 个配置,每个配置对应一个评估任务。数据集从 2 次运行中创建,每次运行在每个配置中作为一个特定的分割存在,分割名称使用运行的时间戳。"train" 分割始终指向最新的结果。
额外配置
一个额外的配置 "results" 存储了所有运行的聚合结果,用于计算和显示 Open LLM Leaderboard 上的聚合指标。
数据加载示例
python from datasets import load_dataset data = load_dataset("open-llm-leaderboard/details_Lexic0n__Mistral_7B-Open_Hermes-NSFWV1", "harness_winogrande_5", split="train")
最新结果
以下是 2024-04-07T23:21:52.091033 运行的最新结果:
python { "all": { "acc": 0.6280637867830843, "acc_stderr": 0.032575149309255966, "acc_norm": 0.6308289809481329, "acc_norm_stderr": 0.03322430229877023, "mc1": 0.2962056303549572, "mc1_stderr": 0.015983595101811392, "mc2": 0.4414488242528058, "mc2_stderr": 0.014991506330800888 }, "harness|arc:challenge|25": { "acc": 0.5895904436860068, "acc_stderr": 0.014374922192642662, "acc_norm": 0.6348122866894198, "acc_norm_stderr": 0.014070265519268804 }, "harness|hellaswag|10": { "acc": 0.662617008564031, "acc_stderr": 0.0047185047710837655, "acc_norm": 0.852320254929297, "acc_norm_stderr": 0.0035405716545956313 }, "harness|hendrycksTest-abstract_algebra|5": { "acc": 0.33, "acc_stderr": 0.04725815626252606, "acc_norm": 0.33, "acc_norm_stderr": 0.04725815626252606 }, "harness|hendrycksTest-anatomy|5": { "acc": 0.5925925925925926, "acc_stderr": 0.04244633238353228, "acc_norm": 0.5925925925925926, "acc_norm_stderr": 0.04244633238353228 }, "harness|hendrycksTest-astronomy|5": { "acc": 0.6447368421052632, "acc_stderr": 0.03894734487013317, "acc_norm": 0.6447368421052632, "acc_norm_stderr": 0.03894734487013317 }, "harness|hendrycksTest-business_ethics|5": { "acc": 0.56, "acc_stderr": 0.04988876515698589, "acc_norm": 0.56, "acc_norm_stderr": 0.04988876515698589 }, "harness|hendrycksTest-clinical_knowledge|5": { "acc": 0.6867924528301886, "acc_stderr": 0.028544793319055326, "acc_norm": 0.6867924528301886, "acc_norm_stderr": 0.028544793319055326 }, "harness|hendrycksTest-college_biology|5": { "acc": 0.7430555555555556, "acc_stderr": 0.03653946969442099, "acc_norm": 0.7430555555555556, "acc_norm_stderr": 0.03653946969442099 }, "harness|hendrycksTest-college_chemistry|5": { "acc": 0.47, "acc_stderr": 0.05016135580465919, "acc_norm": 0.47, "acc_norm_stderr": 0.05016135580465919 }, "harness|hendrycksTest-college_computer_science|5": { "acc": 0.5, "acc_stderr": 0.050251890762960605, "acc_norm": 0.5, "acc_norm_stderr": 0.050251890762960605 }, "harness|hendrycksTest-college_mathematics|5": { "acc": 0.4, "acc_stderr": 0.049236596391733084, "acc_norm": 0.4, "acc_norm_stderr": 0.049236596391733084 }, "harness|hendrycksTest-college_medicine|5": { "acc": 0.630057803468208, "acc_stderr": 0.036812296333943194, "acc_norm": 0.630057803468208, "acc_norm_stderr": 0.036812296333943194 }, "harness|hendrycksTest-college_physics|5": { "acc": 0.43137254901960786, "acc_stderr": 0.04928099597287534, "acc_norm": 0.43137254901960786, "acc_norm_stderr": 0.04928099597287534 }, "harness|hendrycksTest-computer_security|5": { "acc": 0.8, "acc_stderr": 0.04020151261036845, "acc_norm": 0.8, "acc_norm_stderr": 0.04020151261036845 }, "harness|hendrycksTest-conceptual_physics|5": { "acc": 0.5829787234042553, "acc_stderr": 0.03223276266711712, "acc_norm": 0.5829787234042553, "acc_norm_stderr": 0.03223276266711712 }, "harness|hendrycksTest-econometrics|5": { "acc": 0.4298245614035088, "acc_stderr": 0.046570472605949625, "acc_norm": 0.4298245614035088, "acc_norm_stderr": 0.046570472605949625 }, "harness|hendrycksTest-electrical_engineering|5": { "acc": 0.5241379310344828, "acc_stderr": 0.0416180850350153, "acc_norm": 0.5241379310344828, "acc_norm_stderr": 0.0416180850350153 }, "harness|hendrycksTest-elementary_mathematics|5": { "acc": 0.42592592592592593, "acc_stderr": 0.025467149045469553, "acc_norm": 0.42592592592592593, "acc_norm_stderr": 0.025467149045469553 }, "harness|hendrycksTest-formal_logic|5": { "acc": 0.46825396825396826, "acc_stderr": 0.04463112720677172, "acc_norm": 0.46825396825396826, "acc_norm_stderr": 0.04463112720677172 }, "harness|hendrycksTest-global_facts|5": { "acc": 0.35, "acc_stderr": 0.0479372485441102, "acc_norm": 0.35, "acc_norm_stderr": 0.0479372485441102 }, "harness|hendrycksTest-high_school_biology|5": { "acc": 0.7483870967741936, "acc_stderr": 0.02468597928623996, "acc_norm": 0.7483870967741936, "acc_norm_stderr": 0.02468597928623996 }, "harness|hendrycksTest-high_school_chemistry|5": { "acc": 0.47783251231527096, "acc_stderr": 0.03514528562175007, "acc_norm": 0.47783251231527096, "acc_norm_stderr": 0.03514528562175007 }, "harness|hendrycksTest-high_school_computer_science|5": { "acc": 0.63, "acc_stderr": 0.04852365870939099, "acc_norm": 0.63, "acc_norm_stderr": 0.04852365870939099 }, "harness|hendrycksTest-high_school_european_history|5": { "acc": 0.7636363636363637, "acc_stderr": 0.033175059300091805, "acc_norm": 0.7636363636363637, "acc_norm_stderr": 0.033175059300091805 }, "harness|hendrycksTest-high_school_geography|5": { "acc": 0.8131313131313131, "acc_stderr": 0.027772533334218957, "acc_norm": 0.8131313131313131, "acc_norm_stderr": 0.027772533334218957 }, "harness|hendrycksTest-high_school_government_and_politics|5": { "acc": 0.844559585492228, "acc_stderr": 0.026148483469153324, "acc_norm": 0.844559585492228, "acc_norm_stderr": 0.026148483469153324 }, "harness|hendrycksTest-high_school_macroeconomics|5": { "acc": 0.6461538461538462, "acc_stderr": 0.024243783994062153, "acc_norm": 0.6461538461538462, "acc_norm_stderr": 0.024243783994062153 }, "harness|hendrycksTest-high_school_mathematics|5": { "acc": 0.2962962962962963, "acc_stderr": 0.027840811495871944, "acc_norm": 0.29



