open-llm-leaderboard-old/details_Mihaiii__Pallas-0.5-LASER-0.4
收藏数据集概述
数据集简介
该数据集是在对模型 Mihaiii/Pallas-0.5-LASER-0.4 进行评估运行时自动创建的,用于 Open LLM Leaderboard。
数据集结构
- 配置数量:63个配置,每个配置对应一个评估任务。
- 创建来源:数据集来自1次运行,每次运行在每个配置中都有一个特定的分割,分割名称使用运行的时间戳。
- 分割命名:每个配置中的 "train" 分割始终指向最新的结果。
- 结果汇总:一个额外的配置 "results" 存储所有运行的汇总结果,用于计算和显示 Open LLM Leaderboard 上的聚合指标。
数据加载示例
python from datasets import load_dataset data = load_dataset("open-llm-leaderboard/details_Mihaiii__Pallas-0.5-LASER-0.4", "harness_winogrande_5", split="train")
最新结果
以下是 2024-01-05T05:08:48.384269 运行的最新结果:
python { "all": { "acc": 0.7368783926681477, "acc_stderr": 0.02910571648489109, "acc_norm": 0.7428060625728449, "acc_norm_stderr": 0.029650747509347968, "mc1": 0.408812729498164, "mc1_stderr": 0.01720995215164173, "mc2": 0.5524839787667136, "mc2_stderr": 0.015871450386944985 }, "harness|arc:challenge|25": { "acc": 0.6117747440273038, "acc_stderr": 0.014241614207414044, "acc_norm": 0.6331058020477816, "acc_norm_stderr": 0.014084133118104296 }, "harness|hellaswag|10": { "acc": 0.6324437363075085, "acc_stderr": 0.004811543077792712, "acc_norm": 0.8274248157737503, "acc_norm_stderr": 0.0037710731802147236 }, "harness|hendrycksTest-abstract_algebra|5": { "acc": 0.41, "acc_stderr": 0.049431107042371025, "acc_norm": 0.41, "acc_norm_stderr": 0.049431107042371025 }, "harness|hendrycksTest-anatomy|5": { "acc": 0.7037037037037037, "acc_stderr": 0.03944624162501116, "acc_norm": 0.7037037037037037, "acc_norm_stderr": 0.03944624162501116 }, "harness|hendrycksTest-astronomy|5": { "acc": 0.8618421052631579, "acc_stderr": 0.028081042939576552, "acc_norm": 0.8618421052631579, "acc_norm_stderr": 0.028081042939576552 }, "harness|hendrycksTest-business_ethics|5": { "acc": 0.75, "acc_stderr": 0.04351941398892446, "acc_norm": 0.75, "acc_norm_stderr": 0.04351941398892446 }, "harness|hendrycksTest-clinical_knowledge|5": { "acc": 0.7811320754716982, "acc_stderr": 0.0254478638251086, "acc_norm": 0.7811320754716982, "acc_norm_stderr": 0.0254478638251086 }, "harness|hendrycksTest-college_biology|5": { "acc": 0.8680555555555556, "acc_stderr": 0.02830096838204443, "acc_norm": 0.8680555555555556, "acc_norm_stderr": 0.02830096838204443 }, "harness|hendrycksTest-college_chemistry|5": { "acc": 0.52, "acc_stderr": 0.050211673156867795, "acc_norm": 0.52, "acc_norm_stderr": 0.050211673156867795 }, "harness|hendrycksTest-college_computer_science|5": { "acc": 0.58, "acc_stderr": 0.04960449637488584, "acc_norm": 0.58, "acc_norm_stderr": 0.04960449637488584 }, "harness|hendrycksTest-college_mathematics|5": { "acc": 0.43, "acc_stderr": 0.049756985195624284, "acc_norm": 0.43, "acc_norm_stderr": 0.049756985195624284 }, "harness|hendrycksTest-college_medicine|5": { "acc": 0.7167630057803468, "acc_stderr": 0.034355680560478746, "acc_norm": 0.7167630057803468, "acc_norm_stderr": 0.034355680560478746 }, "harness|hendrycksTest-college_physics|5": { "acc": 0.5294117647058824, "acc_stderr": 0.049665709039785295, "acc_norm": 0.5294117647058824, "acc_norm_stderr": 0.049665709039785295 }, "harness|hendrycksTest-computer_security|5": { "acc": 0.8, "acc_stderr": 0.04020151261036845, "acc_norm": 0.8, "acc_norm_stderr": 0.04020151261036845 }, "harness|hendrycksTest-conceptual_physics|5": { "acc": 0.7574468085106383, "acc_stderr": 0.028020226271200217, "acc_norm": 0.7574468085106383, "acc_norm_stderr": 0.028020226271200217 }, "harness|hendrycksTest-econometrics|5": { "acc": 0.6052631578947368, "acc_stderr": 0.045981880578165414, "acc_norm": 0.6052631578947368, "acc_norm_stderr": 0.045981880578165414 }, "harness|hendrycksTest-electrical_engineering|5": { "acc": 0.7310344827586207, "acc_stderr": 0.036951833116502325, "acc_norm": 0.7310344827586207, "acc_norm_stderr": 0.036951833116502325 }, "harness|hendrycksTest-elementary_mathematics|5": { "acc": 0.6084656084656085, "acc_stderr": 0.025138091388851095, "acc_norm": 0.6084656084656085, "acc_norm_stderr": 0.025138091388851095 }, "harness|hendrycksTest-formal_logic|5": { "acc": 0.5555555555555556, "acc_stderr": 0.04444444444444449, "acc_norm": 0.5555555555555556, "acc_norm_stderr": 0.04444444444444449 }, "harness|hendrycksTest-global_facts|5": { "acc": 0.49, "acc_stderr": 0.05024183937956912, "acc_norm": 0.49, "acc_norm_stderr": 0.05024183937956912 }, "harness|hendrycksTest-high_school_biology|5": { "acc": 0.9, "acc_stderr": 0.017066403719657276, "acc_norm": 0.9, "acc_norm_stderr": 0.017066403719657276 }, "harness|hendrycksTest-high_school_chemistry|5": { "acc": 0.6551724137931034, "acc_stderr": 0.03344283744280458, "acc_norm": 0.6551724137931034, "acc_norm_stderr": 0.03344283744280458 }, "harness|hendrycksTest-high_school_computer_science|5": { "acc": 0.8, "acc_stderr": 0.04020151261036845, "acc_norm": 0.8, "acc_norm_stderr": 0.04020151261036845 }, "harness|hendrycksTest-high_school_european_history|5": { "acc": 0.8363636363636363, "acc_stderr": 0.028887872395487946, "acc_norm": 0.8363636363636363, "acc_norm_stderr": 0.028887872395487946 }, "harness|hendrycksTest-high_school_geography|5": { "acc": 0.9141414141414141, "acc_stderr": 0.01996022556317289, "acc_norm": 0.9141414141414141, "acc_norm_stderr": 0.01996022556317289 }, "harness|hendrycksTest-high_school_government_and_politics|5": { "acc": 0.9740932642487047, "acc_stderr": 0.01146452335695318, "acc_norm": 0.9740932642487047, "acc_norm_stderr": 0.01146452335695318 }, "harness|hendrycksTest-high_school_macroeconomics|5": { "acc": 0.7974358974358975, "acc_stderr": 0.020377660970371393, "acc_norm": 0.7974358974358975, "acc_norm_stderr": 0.020377660970371393 }, "harness|hendrycksTest-high_school_mathematics|5": { "acc": 0.4074074074074074, "acc_stderr": 0.029958249250082107, "acc_norm": 0.4074074074074074, "acc_norm_



