open-llm-leaderboard/NousResearch__Yarn-Llama-2-13b-128k-details
收藏Hugging Face2024-06-26 更新2024-06-29 收录
下载链接:
https://hf-mirror.com/datasets/open-llm-leaderboard/NousResearch__Yarn-Llama-2-13b-128k-details
下载链接
链接失效反馈官方服务:
资源简介:
该数据集是在评估模型NousResearch/Yarn-Llama-2-13b-128k的过程中自动创建的。数据集由44个配置组成,每个配置对应一个评估任务。数据集从1次运行中创建,每次运行可以在每个配置的特定分割中找到,分割名称使用运行的时间戳命名。train分割始终指向最新的结果。此外,还有一个名为results的配置存储了所有运行的聚合结果。
该数据集是在评估模型NousResearch/Yarn-Llama-2-13b-128k的过程中自动创建的。数据集由44个配置组成,每个配置对应一个评估任务。数据集从1次运行中创建,每次运行可以在每个配置的特定分割中找到,分割名称使用运行的时间戳命名。train分割始终指向最新的结果。此外,还有一个名为results的配置存储了所有运行的聚合结果。
提供机构:
open-llm-leaderboard
原始信息汇总
数据集概述
数据集来源
- 数据集自动创建于模型评估运行过程中,具体模型为 NousResearch/Yarn-Llama-2-13b-128k。
数据集结构
- 数据集包含44个配置,每个配置对应一个评估任务。
- 数据集由1次运行生成,每次运行结果存储在特定的分割中,分割名称使用运行的时间戳。
- "train"分割始终指向最新的结果。
- 额外配置"results"存储所有运行的聚合结果。
数据加载示例
python from datasets import load_dataset data = load_dataset( "HuggingFaceEvalInternal/NousResearch__Yarn-Llama-2-13b-128k-details-private", name="NousResearch__Yarn-Llama-2-13b-128k__leaderboard_arc_challenge", split="latest" )
最新结果
- 最新结果来自2024-06-26T13-35-58.419685的运行,具体结果可在此处查看。 python { "all": { "leaderboard": { "acc,none": 0.25787640109057863, "acc_stderr,none": 0.0037386138880835585, "prompt_level_loose_acc,none": 0.13493530499075784, "prompt_level_loose_acc_stderr,none": 0.014702466942017815, "inst_level_strict_acc,none": 0.19784172661870503, "inst_level_strict_acc_stderr,none": "N/A", "exact_match,none": 0.011329305135951661, "exact_match_stderr,none": 0.002911667757315497, "acc_norm,none": 0.38644296813421913, "acc_norm_stderr,none": 0.004885868427966243, "prompt_level_strict_acc,none": 0.133086876155268, "prompt_level_strict_acc_stderr,none": 0.014617009342904514, "inst_level_loose_acc,none": 0.19784172661870503, "inst_level_loose_acc_stderr,none": "N/A", "alias": "leaderboard" }, "leaderboard_arc_challenge": { "acc,none": 0.523037542662116, "acc_stderr,none": 0.014595873205358269, "acc_norm,none": 0.5691126279863481, "acc_norm_stderr,none": 0.014471133392642473, "alias": " - leaderboard_arc_challenge" }, "leaderboard_bbh": { "acc_norm,none": 0.38118382225308106, "acc_norm_stderr,none": 0.00601214680618392, "alias": " - leaderboard_bbh" }, "leaderboard_bbh_boolean_expressions": { "acc_norm,none": 0.62, "acc_norm_stderr,none": 0.030760116042626042, "alias": " - leaderboard_bbh_boolean_expressions" }, "leaderboard_bbh_causal_judgement": { "acc_norm,none": 0.5668449197860963, "acc_norm_stderr,none": 0.03633267411102587, "alias": " - leaderboard_bbh_causal_judgement" }, "leaderboard_bbh_date_understanding": { "acc_norm,none": 0.44, "acc_norm_stderr,none": 0.03145724452223572, "alias": " - leaderboard_bbh_date_understanding" }, "leaderboard_bbh_disambiguation_qa": { "acc_norm,none": 0.304, "acc_norm_stderr,none": 0.029150213374159673, "alias": " - leaderboard_bbh_disambiguation_qa" }, "leaderboard_bbh_formal_fallacies": { "acc_norm,none": 0.54, "acc_norm_stderr,none": 0.03158465389149899, "alias": " - leaderboard_bbh_formal_fallacies" }, "leaderboard_bbh_geometric_shapes": { "acc_norm,none": 0.276, "acc_norm_stderr,none": 0.028328537274211335, "alias": " - leaderboard_bbh_geometric_shapes" }, "leaderboard_bbh_hyperbaton": { "acc_norm,none": 0.64, "acc_norm_stderr,none": 0.03041876402517498, "alias": " - leaderboard_bbh_hyperbaton" }, "leaderboard_bbh_logical_deduction_five_objects": { "acc_norm,none": 0.26, "acc_norm_stderr,none": 0.027797315752644304, "alias": " - leaderboard_bbh_logical_deduction_five_objects" }, "leaderboard_bbh_logical_deduction_seven_objects": { "acc_norm,none": 0.18, "acc_norm_stderr,none": 0.024346890650293523, "alias": " - leaderboard_bbh_logical_deduction_seven_objects" }, "leaderboard_bbh_logical_deduction_three_objects": { "acc_norm,none": 0.392, "acc_norm_stderr,none": 0.030938207620401195, "alias": " - leaderboard_bbh_logical_deduction_three_objects" }, "leaderboard_bbh_movie_recommendation": { "acc_norm,none": 0.692, "acc_norm_stderr,none": 0.029256928606501864, "alias": " - leaderboard_bbh_movie_recommendation" }, "leaderboard_bbh_navigate": { "acc_norm,none": 0.452, "acc_norm_stderr,none": 0.03153986449255662, "alias": " - leaderboard_bbh_navigate" }, "leaderboard_bbh_object_counting": { "acc_norm,none": 0.284, "acc_norm_stderr,none": 0.028576958730437398, "alias": " - leaderboard_bbh_object_counting" }, "leaderboard_bbh_penguins_in_a_table": { "acc_norm,none": 0.2534246575342466, "acc_norm_stderr,none": 0.036122454616245706, "alias": " - leaderboard_bbh_penguins_in_a_table" }, "leaderboard_bbh_reasoning_about_colored_objects": { "acc_norm,none": 0.26, "acc_norm_stderr,none": 0.027797315752644308, "alias": " - leaderboard_bbh_reasoning_about_colored_objects" }, "leaderboard_bbh_ruin_names": { "acc_norm,none": 0.336, "acc_norm_stderr,none": 0.02993325909419152, "alias": " - leaderboard_bbh_ruin_names" }, "leaderboard_bbh_salient_translation_error_detection": { "acc_norm,none": 0.232, "acc_norm_stderr,none": 0.026750070374865157, "alias": " - leaderboard_bbh_salient_translation_error_detection" }, "leaderboard_bbh_snarks": { "acc_norm,none": 0.5280898876404494, "acc_norm_stderr,none": 0.03752294651708462, "alias": " - leaderboard_bbh_snarks" }, "leaderboard_bbh_sports_understanding": { "acc_norm,none": 0.632, "acc_norm_stderr,none": 0.030562070620993163, "alias": " - leaderboard_bbh_sports_understanding" }, "leaderboard_bbh_temporal_sequences": { "acc_norm,none": 0.164, "acc_norm_stderr,none": 0.02346526100207676, "alias": " - leaderboard_bbh_temporal_sequences" }, "leaderboard_bbh_tracking_shuffled_objects_five_objects": { "acc_norm,none": 0.164, "acc_norm_stderr,none": 0.02346526100207676, "alias": " - leaderboard_bbh_tracking_shuffled_objects_five_objects" }, "leaderboard_bbh_tracking_shuffled_objects_seven_objects": { "acc_norm,none": 0.136, "acc_norm_stderr,none": 0.021723342617052065, "alias": " - leaderboard_bbh_tracking_shuffled_objects_seven_objects" }, "leaderboard_bbh_tracking_shuffled_objects_three_objects": { "acc_norm,none": 0.34, "acc_norm_stderr,none": 0.030020073605457904, "alias": " - leaderboard_bbh_tracking_shuffled_objects_three_objects" }, "leaderboard_bbh_web_of_lies": { "acc_norm,none": 0.492, "acc_norm_stderr,none": 0.031682156431413803, "alias": " - leaderboard_bbh_web_of_lies" }, "leaderboard_gpqa": { "acc_norm,none": 0.25838926174496646, "acc_norm_stderr,none": 0.012693206865728043, "alias": " - leaderboard_gpqa" }, "leaderboard_gpqa_diamond": { "acc_norm,none": 0.24242424242424243, "acc_norm_stderr,none": 0.030532892233932026, "alias": " - leaderboard_gpqa_diamond" }, "leaderboard_gpqa_extended": { "acc_norm,none": 0.26373626373626374, "acc_norm_stderr,none": 0.01887571358037249, "alias": " - leaderboard_gpqa_extended" }, "leaderboard_gpqa_main": { "acc_norm,none": 0.25892857142857145, "acc_norm_stderr,none": 0.020718879324472125, "alias": " - leaderboard_gpqa_main" }, "leaderboard_ifeval": { "prompt_level_strict_acc,none": 0.133086876155268, "prompt_level_strict_acc_stderr,none": 0.014617009342904514, "inst_level_strict_acc,none": 0.19784172661870503, "inst_level_strict_acc_stderr,none": "N/A", "prompt_level_loose_acc,none": 0.13493530499075784, "prompt_level_loose_acc_stderr,none": 0.014702466942017815, "inst_level_loose_acc,none": 0.19784172661870503, "inst_level_loose_acc_stderr,none": "N/A", "alias": " - leaderboard_ifeval" }, "leaderboard_math_hard": { "exact_match,none": 0.011329305135951661, "exact_match_stderr,none": 0.002911667757315497, "alias": " - leaderboard_math_hard" }, "leaderboard_math_algebra_hard": { "exact_match,none": 0.013029315960912053, "exact_match_stderr,none": 0.006482644725390225, "alias": " - leaderboard_math_algebra_



