open-llm-leaderboard-old/details_adonlee__LLaMA_2_70B_LoRA
收藏数据集概述
数据集摘要
该数据集是在对模型 adonlee/LLaMA_2_70B_LoRA 进行评估运行期间自动创建的,用于 Open LLM Leaderboard。
数据集结构
- 配置数量:61个配置,每个配置对应一个评估任务。
- 数据来源:数据集由1次运行创建,每个运行在每个配置中作为一个特定的分割存在,分割名称使用运行的时间戳。
- 最新结果:"train" 分割始终指向最新的结果。
- 汇总结果:一个额外的配置 "results" 存储所有运行的汇总结果,用于计算和显示 Open LLM Leaderboard 上的聚合指标。
数据加载示例
python from datasets import load_dataset data = load_dataset("open-llm-leaderboard/details_adonlee__LLaMA_2_70B_LoRA", "harness_truthfulqa_mc_0", split="train")
最新结果
以下是 2023-09-22T21:35:51.410251 运行的最新结果:
python { "all": { "acc": 0.7077096775676626, "acc_stderr": 0.030867670314758275, "acc_norm": 0.7114995822621553, "acc_norm_stderr": 0.030836833292351554, "mc1": 0.4663402692778458, "mc1_stderr": 0.017463793867168106, "mc2": 0.6451679386365279, "mc2_stderr": 0.014753028795637621 }, "harness|arc:challenge|25": { "acc": 0.6902730375426621, "acc_stderr": 0.013512058415238361, "acc_norm": 0.726962457337884, "acc_norm_stderr": 0.013019332762635743 }, "harness|hellaswag|10": { "acc": 0.6886078470424218, "acc_stderr": 0.004621163476949205, "acc_norm": 0.8755228042222665, "acc_norm_stderr": 0.003294504807555228 }, "harness|hendrycksTest-abstract_algebra|5": { "acc": 0.35, "acc_stderr": 0.0479372485441102, "acc_norm": 0.35, "acc_norm_stderr": 0.0479372485441102 }, "harness|hendrycksTest-anatomy|5": { "acc": 0.6370370370370371, "acc_stderr": 0.041539484047424, "acc_norm": 0.6370370370370371, "acc_norm_stderr": 0.041539484047424 }, "harness|hendrycksTest-astronomy|5": { "acc": 0.8223684210526315, "acc_stderr": 0.03110318238312338, "acc_norm": 0.8223684210526315, "acc_norm_stderr": 0.03110318238312338 }, "harness|hendrycksTest-business_ethics|5": { "acc": 0.76, "acc_stderr": 0.04292346959909283, "acc_norm": 0.76, "acc_norm_stderr": 0.04292346959909283 }, "harness|hendrycksTest-clinical_knowledge|5": { "acc": 0.7358490566037735, "acc_stderr": 0.02713429162874171, "acc_norm": 0.7358490566037735, "acc_norm_stderr": 0.02713429162874171 }, "harness|hendrycksTest-college_biology|5": { "acc": 0.8263888888888888, "acc_stderr": 0.03167473383795718, "acc_norm": 0.8263888888888888, "acc_norm_stderr": 0.03167473383795718 }, "harness|hendrycksTest-college_chemistry|5": { "acc": 0.49, "acc_stderr": 0.05024183937956912, "acc_norm": 0.49, "acc_norm_stderr": 0.05024183937956912 }, "harness|hendrycksTest-college_computer_science|5": { "acc": 0.56, "acc_stderr": 0.04988876515698589, "acc_norm": 0.56, "acc_norm_stderr": 0.04988876515698589 }, "harness|hendrycksTest-college_mathematics|5": { "acc": 0.41, "acc_stderr": 0.04943110704237102, "acc_norm": 0.41, "acc_norm_stderr": 0.04943110704237102 }, "harness|hendrycksTest-college_medicine|5": { "acc": 0.6936416184971098, "acc_stderr": 0.03514942551267439, "acc_norm": 0.6936416184971098, "acc_norm_stderr": 0.03514942551267439 }, "harness|hendrycksTest-college_physics|5": { "acc": 0.37254901960784315, "acc_stderr": 0.048108401480826346, "acc_norm": 0.37254901960784315, "acc_norm_stderr": 0.048108401480826346 }, "harness|hendrycksTest-computer_security|5": { "acc": 0.78, "acc_stderr": 0.04163331998932263, "acc_norm": 0.78, "acc_norm_stderr": 0.04163331998932263 }, "harness|hendrycksTest-conceptual_physics|5": { "acc": 0.7106382978723405, "acc_stderr": 0.02964400657700962, "acc_norm": 0.7106382978723405, "acc_norm_stderr": 0.02964400657700962 }, "harness|hendrycksTest-econometrics|5": { "acc": 0.45614035087719296, "acc_stderr": 0.04685473041907789, "acc_norm": 0.45614035087719296, "acc_norm_stderr": 0.04685473041907789 }, "harness|hendrycksTest-electrical_engineering|5": { "acc": 0.6206896551724138, "acc_stderr": 0.04043461861916746, "acc_norm": 0.6206896551724138, "acc_norm_stderr": 0.04043461861916746 }, "harness|hendrycksTest-elementary_mathematics|5": { "acc": 0.47619047619047616, "acc_stderr": 0.02572209706438853, "acc_norm": 0.47619047619047616, "acc_norm_stderr": 0.02572209706438853 }, "harness|hendrycksTest-formal_logic|5": { "acc": 0.5079365079365079, "acc_stderr": 0.044715725362943486, "acc_norm": 0.5079365079365079, "acc_norm_stderr": 0.044715725362943486 }, "harness|hendrycksTest-global_facts|5": { "acc": 0.47, "acc_stderr": 0.050161355804659205, "acc_norm": 0.47, "acc_norm_stderr": 0.050161355804659205 }, "harness|hendrycksTest-high_school_biology|5": { "acc": 0.8096774193548387, "acc_stderr": 0.022331707611823078, "acc_norm": 0.8096774193548387, "acc_norm_stderr": 0.022331707611823078 }, "harness|hendrycksTest-high_school_chemistry|5": { "acc": 0.5714285714285714, "acc_stderr": 0.034819048444388045, "acc_norm": 0.5714285714285714, "acc_norm_stderr": 0.034819048444388045 }, "harness|hendrycksTest-high_school_computer_science|5": { "acc": 0.78, "acc_stderr": 0.04163331998932262, "acc_norm": 0.78, "acc_norm_stderr": 0.04163331998932262 }, "harness|hendrycksTest-high_school_european_history|5": { "acc": 0.8545454545454545, "acc_stderr": 0.027530196355066584, "acc_norm": 0.8545454545454545, "acc_norm_stderr": 0.027530196355066584 }, "harness|hendrycksTest-high_school_geography|5": { "acc": 0.898989898989899, "acc_stderr": 0.021469735576055343, "acc_norm": 0.898989898989899, "acc_norm_stderr": 0.021469735576055343 }, "harness|hendrycksTest-high_school_government_and_politics|5": { "acc": 0.9326424870466321, "acc_stderr": 0.0180883938390789, "acc_norm": 0.9326424870466321, "acc_norm_stderr": 0.0180883938390789 }, "harness|hendrycksTest-high_school_macroeconomics|5": { "acc": 0.7102564102564103, "acc_stderr": 0.023000628243687968, "acc_norm": 0.7102564102564103, "acc_norm_stderr": 0.023000628243687968 }, "harness|hendrycksTest-high_school_mathematics|5": { "acc": 0.337037037037037, "acc_stderr": 0.028820884666253252, "acc_norm": 0.33703703703703



