open-llm-leaderboard/details_TheBloke__Platypus2-70B-Instruct-GPTQ
收藏数据集概述
数据集简介
该数据集是在对模型 TheBloke/Platypus2-70B-Instruct-GPTQ 进行评估运行时自动创建的,用于 Open LLM Leaderboard。
数据集结构
- 数据集包含 61 个配置,每个配置对应一个评估任务。
- 数据集由 1 次运行创建,每次运行可以在每个配置中找到特定的分割,分割名称使用运行的时间戳。
- "train" 分割始终指向最新的结果。
- 额外的 "results" 配置存储所有运行的聚合结果,用于计算和显示 Open LLM Leaderboard 上的聚合指标。
数据加载示例
python from datasets import load_dataset data = load_dataset("open-llm-leaderboard/details_TheBloke__Platypus2-70B-Instruct-GPTQ", "harness_truthfulqa_mc_0", split="train")
最新结果
以下是 2023-09-01T08:39:03.285201 运行的最新结果: python { "all": { "acc": 0.6985296232204664, "acc_stderr": 0.03125037426870383, "acc_norm": 0.7020835749710057, "acc_norm_stderr": 0.031223245232596956, "mc1": 0.4455324357405141, "mc1_stderr": 0.017399335280140354, "mc2": 0.6253657801165746, "mc2_stderr": 0.01474854589221215 }, "harness|arc:challenge|25": { "acc": 0.6919795221843004, "acc_stderr": 0.013491429517292038, "acc_norm": 0.712457337883959, "acc_norm_stderr": 0.013226719056266129 }, "harness|hellaswag|10": { "acc": 0.6863174666401115, "acc_stderr": 0.004630407476835178, "acc_norm": 0.8755228042222665, "acc_norm_stderr": 0.003294504807555233 }, "harness|hendrycksTest-abstract_algebra|5": { "acc": 0.35, "acc_stderr": 0.0479372485441102, "acc_norm": 0.35, "acc_norm_stderr": 0.0479372485441102 }, "harness|hendrycksTest-anatomy|5": { "acc": 0.5925925925925926, "acc_stderr": 0.042446332383532286, "acc_norm": 0.5925925925925926, "acc_norm_stderr": 0.042446332383532286 }, "harness|hendrycksTest-astronomy|5": { "acc": 0.7894736842105263, "acc_stderr": 0.03317672787533157, "acc_norm": 0.7894736842105263, "acc_norm_stderr": 0.03317672787533157 }, "harness|hendrycksTest-business_ethics|5": { "acc": 0.71, "acc_stderr": 0.04560480215720683, "acc_norm": 0.71, "acc_norm_stderr": 0.04560480215720683 }, "harness|hendrycksTest-clinical_knowledge|5": { "acc": 0.7471698113207547, "acc_stderr": 0.026749899771241214, "acc_norm": 0.7471698113207547, "acc_norm_stderr": 0.026749899771241214 }, "harness|hendrycksTest-college_biology|5": { "acc": 0.8333333333333334, "acc_stderr": 0.031164899666948614, "acc_norm": 0.8333333333333334, "acc_norm_stderr": 0.031164899666948614 }, "harness|hendrycksTest-college_chemistry|5": { "acc": 0.47, "acc_stderr": 0.05016135580465919, "acc_norm": 0.47, "acc_norm_stderr": 0.05016135580465919 }, "harness|hendrycksTest-college_computer_science|5": { "acc": 0.55, "acc_stderr": 0.049999999999999996, "acc_norm": 0.55, "acc_norm_stderr": 0.049999999999999996 }, "harness|hendrycksTest-college_mathematics|5": { "acc": 0.44, "acc_stderr": 0.04988876515698589, "acc_norm": 0.44, "acc_norm_stderr": 0.04988876515698589 }, "harness|hendrycksTest-college_medicine|5": { "acc": 0.6705202312138728, "acc_stderr": 0.03583901754736411, "acc_norm": 0.6705202312138728, "acc_norm_stderr": 0.03583901754736411 }, "harness|hendrycksTest-college_physics|5": { "acc": 0.35294117647058826, "acc_stderr": 0.04755129616062947, "acc_norm": 0.35294117647058826, "acc_norm_stderr": 0.04755129616062947 }, "harness|hendrycksTest-computer_security|5": { "acc": 0.76, "acc_stderr": 0.04292346959909281, "acc_norm": 0.76, "acc_norm_stderr": 0.04292346959909281 }, "harness|hendrycksTest-conceptual_physics|5": { "acc": 0.6680851063829787, "acc_stderr": 0.03078373675774565, "acc_norm": 0.6680851063829787, "acc_norm_stderr": 0.03078373675774565 }, "harness|hendrycksTest-econometrics|5": { "acc": 0.43859649122807015, "acc_stderr": 0.04668000738510455, "acc_norm": 0.43859649122807015, "acc_norm_stderr": 0.04668000738510455 }, "harness|hendrycksTest-electrical_engineering|5": { "acc": 0.6137931034482759, "acc_stderr": 0.04057324734419036, "acc_norm": 0.6137931034482759, "acc_norm_stderr": 0.04057324734419036 }, "harness|hendrycksTest-elementary_mathematics|5": { "acc": 0.4656084656084656, "acc_stderr": 0.02569032176249384, "acc_norm": 0.4656084656084656, "acc_norm_stderr": 0.02569032176249384 }, "harness|hendrycksTest-formal_logic|5": { "acc": 0.5396825396825397, "acc_stderr": 0.04458029125470973, "acc_norm": 0.5396825396825397, "acc_norm_stderr": 0.04458029125470973 }, "harness|hendrycksTest-global_facts|5": { "acc": 0.48, "acc_stderr": 0.050211673156867795, "acc_norm": 0.48, "acc_norm_stderr": 0.050211673156867795 }, "harness|hendrycksTest-high_school_biology|5": { "acc": 0.8064516129032258, "acc_stderr": 0.022475258525536057, "acc_norm": 0.8064516129032258, "acc_norm_stderr": 0.022475258525536057 }, "harness|hendrycksTest-high_school_chemistry|5": { "acc": 0.5467980295566502, "acc_stderr": 0.03502544650845872, "acc_norm": 0.5467980295566502, "acc_norm_stderr": 0.03502544650845872 }, "harness|hendrycksTest-high_school_computer_science|5": { "acc": 0.74, "acc_stderr": 0.04408440022768079, "acc_norm": 0.74, "acc_norm_stderr": 0.04408440022768079 }, "harness|hendrycksTest-high_school_european_history|5": { "acc": 0.8787878787878788, "acc_stderr": 0.025485498373343237, "acc_norm": 0.8787878787878788, "acc_norm_stderr": 0.025485498373343237 }, "harness|hendrycksTest-high_school_geography|5": { "acc": 0.8585858585858586, "acc_stderr": 0.02482590979334334, "acc_norm": 0.8585858585858586, "acc_norm_stderr": 0.02482590979334334 }, "harness|hendrycksTest-high_school_government_and_politics|5": { "acc": 0.9481865284974094, "acc_stderr": 0.01599622932024412, "acc_norm": 0.9481865284974094, "acc_norm_stderr": 0.01599622932024412 }, "harness|hendrycksTest-high_school_macroeconomics|5": { "acc": 0.7025641025641025, "acc_stderr": 0.023177408131465942, "acc_norm": 0.7025641025641025, "acc_norm_stderr": 0.023177408131465942 }, "harness|hendrycksTest-high_school_mathematics|5": { "acc": 0.3037037037037037, "acc_stderr": 0.028037929969114982, "acc_norm": 0.3



