open-llm-leaderboard-old/details_liuchanghf__phi2-mmlu-lora
收藏数据集概述
数据集简介
该数据集是在对模型 liuchanghf/phi2-mmlu-lora 进行评估运行期间自动创建的。数据集包含 63 个配置,每个配置对应一个评估任务。数据集从 2 次运行中创建,每次运行可以在每个配置中找到特定的分割,分割名称使用运行的时间戳。"train" 分割始终指向最新的结果。
数据集结构
数据集包含多个配置,每个配置对应不同的评估任务。每个配置下有多个分割,包括特定时间戳的分割和最新的 "train" 分割。
数据加载示例
以下是加载数据集的示例代码: python from datasets import load_dataset data = load_dataset("open-llm-leaderboard/details_liuchanghf__phi2-mmlu-lora", "harness_winogrande_5", split="train")
最新结果
以下是 2024-04-10T11:11:31.663865 运行的最新结果: python { "all": { "acc": 0.5786822458633217, "acc_stderr": 0.033530708089314624, "acc_norm": 0.588875565225012, "acc_norm_stderr": 0.03440320614990488, "mc1": 0.31701346389228885, "mc1_stderr": 0.016289203374403382, "mc2": 0.4418888720112363, "mc2_stderr": 0.01551223464874866 }, "harness|arc:challenge|25": { "acc": 0.5938566552901023, "acc_stderr": 0.01435165669009786, "acc_norm": 0.621160409556314, "acc_norm_stderr": 0.014175915490000326 }, "harness|hellaswag|10": { "acc": 0.5601473809998009, "acc_stderr": 0.004953546708512329, "acc_norm": 0.7404899422425811, "acc_norm_stderr": 0.0043746991892848605 }, "harness|hendrycksTest-abstract_algebra|5": { "acc": 0.29, "acc_stderr": 0.045604802157206845, "acc_norm": 0.29, "acc_norm_stderr": 0.045604802157206845 }, "harness|hendrycksTest-anatomy|5": { "acc": 0.45925925925925926, "acc_stderr": 0.04304979692464242, "acc_norm": 0.45925925925925926, "acc_norm_stderr": 0.04304979692464242 }, "harness|hendrycksTest-astronomy|5": { "acc": 0.6381578947368421, "acc_stderr": 0.03910525752849724, "acc_norm": 0.6381578947368421, "acc_norm_stderr": 0.03910525752849724 }, "harness|hendrycksTest-business_ethics|5": { "acc": 0.59, "acc_stderr": 0.04943110704237102, "acc_norm": 0.59, "acc_norm_stderr": 0.04943110704237102 }, "harness|hendrycksTest-clinical_knowledge|5": { "acc": 0.6075471698113207, "acc_stderr": 0.03005258057955785, "acc_norm": 0.6075471698113207, "acc_norm_stderr": 0.03005258057955785 }, "harness|hendrycksTest-college_biology|5": { "acc": 0.6666666666666666, "acc_stderr": 0.039420826399272135, "acc_norm": 0.6666666666666666, "acc_norm_stderr": 0.039420826399272135 }, "harness|hendrycksTest-college_chemistry|5": { "acc": 0.42, "acc_stderr": 0.049604496374885836, "acc_norm": 0.42, "acc_norm_stderr": 0.049604496374885836 }, "harness|hendrycksTest-college_computer_science|5": { "acc": 0.49, "acc_stderr": 0.05024183937956913, "acc_norm": 0.49, "acc_norm_stderr": 0.05024183937956913 }, "harness|hendrycksTest-college_mathematics|5": { "acc": 0.41, "acc_stderr": 0.04943110704237101, "acc_norm": 0.41, "acc_norm_stderr": 0.04943110704237101 }, "harness|hendrycksTest-college_medicine|5": { "acc": 0.5953757225433526, "acc_stderr": 0.03742461193887248, "acc_norm": 0.5953757225433526, "acc_norm_stderr": 0.03742461193887248 }, "harness|hendrycksTest-college_physics|5": { "acc": 0.4019607843137255, "acc_stderr": 0.04878608714466996, "acc_norm": 0.4019607843137255, "acc_norm_stderr": 0.04878608714466996 }, "harness|hendrycksTest-computer_security|5": { "acc": 0.7, "acc_stderr": 0.046056618647183814, "acc_norm": 0.7, "acc_norm_stderr": 0.046056618647183814 }, "harness|hendrycksTest-conceptual_physics|5": { "acc": 0.5446808510638298, "acc_stderr": 0.03255525359340356, "acc_norm": 0.5446808510638298, "acc_norm_stderr": 0.03255525359340356 }, "harness|hendrycksTest-econometrics|5": { "acc": 0.34210526315789475, "acc_stderr": 0.04462917535336937, "acc_norm": 0.34210526315789475, "acc_norm_stderr": 0.04462917535336937 }, "harness|hendrycksTest-electrical_engineering|5": { "acc": 0.496551724137931, "acc_stderr": 0.041665675771015785, "acc_norm": 0.496551724137931, "acc_norm_stderr": 0.041665675771015785 }, "harness|hendrycksTest-elementary_mathematics|5": { "acc": 0.455026455026455, "acc_stderr": 0.025646928361049398, "acc_norm": 0.455026455026455, "acc_norm_stderr": 0.025646928361049398 }, "harness|hendrycksTest-formal_logic|5": { "acc": 0.4603174603174603, "acc_stderr": 0.04458029125470973, "acc_norm": 0.4603174603174603, "acc_norm_stderr": 0.04458029125470973 }, "harness|hendrycksTest-global_facts|5": { "acc": 0.36, "acc_stderr": 0.04824181513244218, "acc_norm": 0.36, "acc_norm_stderr": 0.04824181513244218 }, "harness|hendrycksTest-high_school_biology|5": { "acc": 0.7354838709677419, "acc_stderr": 0.025091892378859275, "acc_norm": 0.7354838709677419, "acc_norm_stderr": 0.025091892378859275 }, "harness|hendrycksTest-high_school_chemistry|5": { "acc": 0.46798029556650245, "acc_stderr": 0.03510766597959215, "acc_norm": 0.46798029556650245, "acc_norm_stderr": 0.03510766597959215 }, "harness|hendrycksTest-high_school_computer_science|5": { "acc": 0.64, "acc_stderr": 0.048241815132442176, "acc_norm": 0.64, "acc_norm_stderr": 0.048241815132442176 }, "harness|hendrycksTest-high_school_european_history|5": { "acc": 0.7333333333333333, "acc_stderr": 0.03453131801885417, "acc_norm": 0.7333333333333333, "acc_norm_stderr": 0.03453131801885417 }, "harness|hendrycksTest-high_school_geography|5": { "acc": 0.7424242424242424, "acc_stderr": 0.03115626951964684, "acc_norm": 0.7424242424242424, "acc_norm_stderr": 0.03115626951964684 }, "harness|hendrycksTest-high_school_government_and_politics|5": { "acc": 0.8290155440414507, "acc_stderr": 0.027171213683164525, "acc_norm": 0.8290155440414507, "acc_norm_stderr": 0.027171213683164525 }, "harness|hendrycksTest-high_school_macroeconomics|5": { "acc": 0.5666666666666667, "acc_stderr": 0.025124653525885113, "acc_norm": 0.5666666666666667, "acc_norm_stderr": 0.025124653525885113 }, "harness|hendrycksTest-high_school_mathematics|5": { "acc": 0.37037037037037035, "acc_stderr": 0.02944316932303154, "acc_norm": 0.37037037037037035, "acc_norm_stderr": 0.02944316932303154 }, "harness|hendrycksTest-high_school_microeconomics|5": { "acc": 0.6428571428571429, "acc_stderr": 0.031124619309328177, "acc_norm": 0.6428571428571429, "acc_norm_stderr": 0.03



