open-llm-leaderboard-old/details_xxyyy123__20k_v1_lora_qkvo_rank14_v2
收藏数据集概述
数据集名称
Evaluation run of xxyyy123/20k_v1_lora_qkvo_rank14_v2
数据集来源
该数据集是在模型 xxyyy123/20k_v1_lora_qkvo_rank14_v2 在 Open LLM Leaderboard 上的评估运行期间自动创建的。
数据集组成
数据集由 61 个配置组成,每个配置对应一个评估任务。数据集是从 1 次运行中创建的,每次运行可以在每个配置中找到特定的分割,分割名称使用运行的时间戳。"train" 分割始终指向最新的结果。
数据集加载示例
python from datasets import load_dataset data = load_dataset("open-llm-leaderboard/details_xxyyy123__20k_v1_lora_qkvo_rank14_v2", "harness_truthfulqa_mc_0", split="train")
最新结果
以下是 2023-09-03T13:20:05.284068 运行的最新结果:
python { "all": { "acc": 0.5085078094647929, "acc_stderr": 0.03515476481930117, "acc_norm": 0.5121639524782741, "acc_norm_stderr": 0.035139678425659286, "mc1": 0.3574051407588739, "mc1_stderr": 0.0167765996767294, "mc2": 0.5157743333677478, "mc2_stderr": 0.01586124547215222 }, "harness|arc:challenge|25": { "acc": 0.5264505119453925, "acc_stderr": 0.01459093135812017, "acc_norm": 0.5537542662116041, "acc_norm_stderr": 0.014526705548539982 }, "harness|hellaswag|10": { "acc": 0.6025692093208525, "acc_stderr": 0.004883663587184775, "acc_norm": 0.7909778928500298, "acc_norm_stderr": 0.004057792171893577 }, "harness|hendrycksTest-abstract_algebra|5": { "acc": 0.28, "acc_stderr": 0.04512608598542128, "acc_norm": 0.28, "acc_norm_stderr": 0.04512608598542128 }, "harness|hendrycksTest-anatomy|5": { "acc": 0.48148148148148145, "acc_stderr": 0.043163785995113245, "acc_norm": 0.48148148148148145, "acc_norm_stderr": 0.043163785995113245 }, "harness|hendrycksTest-astronomy|5": { "acc": 0.4605263157894737, "acc_stderr": 0.04056242252249033, "acc_norm": 0.4605263157894737, "acc_norm_stderr": 0.04056242252249033 }, "harness|hendrycksTest-business_ethics|5": { "acc": 0.45, "acc_stderr": 0.049999999999999996, "acc_norm": 0.45, "acc_norm_stderr": 0.049999999999999996 }, "harness|hendrycksTest-clinical_knowledge|5": { "acc": 0.5735849056603773, "acc_stderr": 0.030437794342983056, "acc_norm": 0.5735849056603773, "acc_norm_stderr": 0.030437794342983056 }, "harness|hendrycksTest-college_biology|5": { "acc": 0.5555555555555556, "acc_stderr": 0.041553199555931467, "acc_norm": 0.5555555555555556, "acc_norm_stderr": 0.041553199555931467 }, "harness|hendrycksTest-college_chemistry|5": { "acc": 0.39, "acc_stderr": 0.04902071300001974, "acc_norm": 0.39, "acc_norm_stderr": 0.04902071300001974 }, "harness|hendrycksTest-college_computer_science|5": { "acc": 0.39, "acc_stderr": 0.04902071300001974, "acc_norm": 0.39, "acc_norm_stderr": 0.04902071300001974 }, "harness|hendrycksTest-college_mathematics|5": { "acc": 0.33, "acc_stderr": 0.047258156262526045, "acc_norm": 0.33, "acc_norm_stderr": 0.047258156262526045 }, "harness|hendrycksTest-college_medicine|5": { "acc": 0.44508670520231214, "acc_stderr": 0.03789401760283647, "acc_norm": 0.44508670520231214, "acc_norm_stderr": 0.03789401760283647 }, "harness|hendrycksTest-college_physics|5": { "acc": 0.2647058823529412, "acc_stderr": 0.04389869956808778, "acc_norm": 0.2647058823529412, "acc_norm_stderr": 0.04389869956808778 }, "harness|hendrycksTest-computer_security|5": { "acc": 0.64, "acc_stderr": 0.048241815132442176, "acc_norm": 0.64, "acc_norm_stderr": 0.048241815132442176 }, "harness|hendrycksTest-conceptual_physics|5": { "acc": 0.46382978723404256, "acc_stderr": 0.032600385118357715, "acc_norm": 0.46382978723404256, "acc_norm_stderr": 0.032600385118357715 }, "harness|hendrycksTest-econometrics|5": { "acc": 0.3508771929824561, "acc_stderr": 0.044895393502707, "acc_norm": 0.3508771929824561, "acc_norm_stderr": 0.044895393502707 }, "harness|hendrycksTest-electrical_engineering|5": { "acc": 0.46206896551724136, "acc_stderr": 0.041546596717075474, "acc_norm": 0.46206896551724136, "acc_norm_stderr": 0.041546596717075474 }, "harness|hendrycksTest-elementary_mathematics|5": { "acc": 0.28835978835978837, "acc_stderr": 0.023330654054535886, "acc_norm": 0.28835978835978837, "acc_norm_stderr": 0.023330654054535886 }, "harness|hendrycksTest-formal_logic|5": { "acc": 0.30952380952380953, "acc_stderr": 0.04134913018303316, "acc_norm": 0.30952380952380953, "acc_norm_stderr": 0.04134913018303316 }, "harness|hendrycksTest-global_facts|5": { "acc": 0.4, "acc_stderr": 0.04923659639173309, "acc_norm": 0.4, "acc_norm_stderr": 0.04923659639173309 }, "harness|hendrycksTest-high_school_biology|5": { "acc": 0.5161290322580645, "acc_stderr": 0.028429203176724555, "acc_norm": 0.5161290322580645, "acc_norm_stderr": 0.028429203176724555 }, "harness|hendrycksTest-high_school_chemistry|5": { "acc": 0.37438423645320196, "acc_stderr": 0.03405155380561952, "acc_norm": 0.37438423645320196, "acc_norm_stderr": 0.03405155380561952 }, "harness|hendrycksTest-high_school_computer_science|5": { "acc": 0.39, "acc_stderr": 0.04902071300001975, "acc_norm": 0.39, "acc_norm_stderr": 0.04902071300001975 }, "harness|hendrycksTest-high_school_european_history|5": { "acc": 0.7212121212121212, "acc_stderr": 0.035014387062967806, "acc_norm": 0.7212121212121212, "acc_norm_stderr": 0.035014387062967806 }, "harness|hendrycksTest-high_school_geography|5": { "acc": 0.6313131313131313, "acc_stderr": 0.034373055019806184, "acc_norm": 0.6313131313131313, "acc_norm_stderr": 0.034373055019806184 }, "harness|hendrycksTest-high_school_government_and_politics|5": { "acc": 0.7202072538860104, "acc_stderr": 0.032396370467357036, "acc_norm": 0.7202072538860104, "acc_norm_stderr": 0.032396370467357036 }, "harness|hendrycksTest-high_school_macroeconomics|5": { "acc": 0.4641025641025641, "acc_stderr": 0.025285585990017845, "acc_norm": 0.4641025641025641, "acc_norm_stderr": 0.025285585990017845 }, "harness|hendrycksTest-high_school_mathematics|5": { "acc": 0.25555555555555554, "acc_stderr": 0.026



