open-llm-leaderboard-old/details_Kquant03__Ryu-4x7B-MoE-bf16
收藏数据集概述
数据集来源
该数据集是在评估模型 Kquant03/Ryu-4x7B-MoE-bf16 在 Open LLM Leaderboard 上的自动创建的。
数据集组成
数据集由 63 个配置组成,每个配置对应一个评估任务。数据集从 1 次运行中创建,每个运行可以在每个配置中找到特定的分割,分割名称使用运行的时间戳。"train" 分割始终指向最新的结果。
结果配置
一个额外的配置 "results" 存储所有运行的聚合结果,用于计算和显示 Open LLM Leaderboard 上的聚合指标。
最新结果
以下是 2024-01-13T23:51:35.789085 运行的最新结果:
python { "all": { "acc": 0.6396004158808674, "acc_stderr": 0.032332778374865194, "acc_norm": 0.6426234407115231, "acc_norm_stderr": 0.03298221583193354, "mc1": 0.49938800489596086, "mc1_stderr": 0.01750348793889251, "mc2": 0.649568492897451, "mc2_stderr": 0.015609242157624164 }, "harness|arc:challenge|25": { "acc": 0.643344709897611, "acc_stderr": 0.013998056902620196, "acc_norm": 0.6646757679180887, "acc_norm_stderr": 0.01379618294778556 }, "harness|hellaswag|10": { "acc": 0.6634136626170085, "acc_stderr": 0.004715762925037027, "acc_norm": 0.831009759012149, "acc_norm_stderr": 0.0037397742854185247 }, "harness|hendrycksTest-abstract_algebra|5": { "acc": 0.33, "acc_stderr": 0.04725815626252605, "acc_norm": 0.33, "acc_norm_stderr": 0.04725815626252605 }, "harness|hendrycksTest-anatomy|5": { "acc": 0.6222222222222222, "acc_stderr": 0.04188307537595853, "acc_norm": 0.6222222222222222, "acc_norm_stderr": 0.04188307537595853 }, "harness|hendrycksTest-astronomy|5": { "acc": 0.7302631578947368, "acc_stderr": 0.03611780560284898, "acc_norm": 0.7302631578947368, "acc_norm_stderr": 0.03611780560284898 }, "harness|hendrycksTest-business_ethics|5": { "acc": 0.62, "acc_stderr": 0.048783173121456316, "acc_norm": 0.62, "acc_norm_stderr": 0.048783173121456316 }, "harness|hendrycksTest-clinical_knowledge|5": { "acc": 0.7018867924528301, "acc_stderr": 0.028152837942493857, "acc_norm": 0.7018867924528301, "acc_norm_stderr": 0.028152837942493857 }, "harness|hendrycksTest-college_biology|5": { "acc": 0.7291666666666666, "acc_stderr": 0.03716177437566017, "acc_norm": 0.7291666666666666, "acc_norm_stderr": 0.03716177437566017 }, "harness|hendrycksTest-college_chemistry|5": { "acc": 0.5, "acc_stderr": 0.050251890762960605, "acc_norm": 0.5, "acc_norm_stderr": 0.050251890762960605 }, "harness|hendrycksTest-college_computer_science|5": { "acc": 0.54, "acc_stderr": 0.05009082659620333, "acc_norm": 0.54, "acc_norm_stderr": 0.05009082659620333 }, "harness|hendrycksTest-college_mathematics|5": { "acc": 0.32, "acc_stderr": 0.046882617226215034, "acc_norm": 0.32, "acc_norm_stderr": 0.046882617226215034 }, "harness|hendrycksTest-college_medicine|5": { "acc": 0.653179190751445, "acc_stderr": 0.036291466701596636, "acc_norm": 0.653179190751445, "acc_norm_stderr": 0.036291466701596636 }, "harness|hendrycksTest-college_physics|5": { "acc": 0.4019607843137255, "acc_stderr": 0.04878608714466996, "acc_norm": 0.4019607843137255, "acc_norm_stderr": 0.04878608714466996 }, "harness|hendrycksTest-computer_security|5": { "acc": 0.75, "acc_stderr": 0.04351941398892446, "acc_norm": 0.75, "acc_norm_stderr": 0.04351941398892446 }, "harness|hendrycksTest-conceptual_physics|5": { "acc": 0.5574468085106383, "acc_stderr": 0.03246956919789958, "acc_norm": 0.5574468085106383, "acc_norm_stderr": 0.03246956919789958 }, "harness|hendrycksTest-econometrics|5": { "acc": 0.4649122807017544, "acc_stderr": 0.046920083813689104, "acc_norm": 0.4649122807017544, "acc_norm_stderr": 0.046920083813689104 }, "harness|hendrycksTest-electrical_engineering|5": { "acc": 0.5655172413793104, "acc_stderr": 0.04130740879555498, "acc_norm": 0.5655172413793104, "acc_norm_stderr": 0.04130740879555498 }, "harness|hendrycksTest-elementary_mathematics|5": { "acc": 0.3994708994708995, "acc_stderr": 0.02522545028406788, "acc_norm": 0.3994708994708995, "acc_norm_stderr": 0.02522545028406788 }, "harness|hendrycksTest-formal_logic|5": { "acc": 0.4523809523809524, "acc_stderr": 0.044518079590553275, "acc_norm": 0.4523809523809524, "acc_norm_stderr": 0.044518079590553275 }, "harness|hendrycksTest-global_facts|5": { "acc": 0.34, "acc_stderr": 0.04760952285695235, "acc_norm": 0.34, "acc_norm_stderr": 0.04760952285695235 }, "harness|hendrycksTest-high_school_biology|5": { "acc": 0.7838709677419354, "acc_stderr": 0.023415293433568525, "acc_norm": 0.7838709677419354, "acc_norm_stderr": 0.023415293433568525 }, "harness|hendrycksTest-high_school_chemistry|5": { "acc": 0.5073891625615764, "acc_stderr": 0.035176035403610105, "acc_norm": 0.5073891625615764, "acc_norm_stderr": 0.035176035403610105 }, "harness|hendrycksTest-high_school_computer_science|5": { "acc": 0.69, "acc_stderr": 0.04648231987117316, "acc_norm": 0.69, "acc_norm_stderr": 0.04648231987117316 }, "harness|hendrycksTest-high_school_european_history|5": { "acc": 0.7393939393939394, "acc_stderr": 0.034277431758165236, "acc_norm": 0.7393939393939394, "acc_norm_stderr": 0.034277431758165236 }, "harness|hendrycksTest-high_school_geography|5": { "acc": 0.7525252525252525, "acc_stderr": 0.030746300742124484, "acc_norm": 0.7525252525252525, "acc_norm_stderr": 0.030746300742124484 }, "harness|hendrycksTest-high_school_government_and_politics|5": { "acc": 0.917098445595855, "acc_stderr": 0.01989934131572178, "acc_norm": 0.917098445595855, "acc_norm_stderr": 0.01989934131572178 }, "harness|hendrycksTest-high_school_macroeconomics|5": { "acc": 0.6538461538461539, "acc_stderr": 0.02412112541694119, "acc_norm": 0.6538461538461539, "acc_norm_stderr": 0.02412112541694119 }, "harness|hendrycksTest-high_school_mathematics|5": { "acc": 0.3333333333333333, "acc_stderr": 0.028742040903948482, "acc_norm": 0.3333333333333333, "acc_norm_stderr": 0.028742040903948482 }, "harness|hendrycksTest-high_school_microeconomics|5": { "acc": 0.6806722



