ENSEONG/preprocess-full-math-private-Qwen3-4B-Instruct-2507-bon
收藏Hugging Face2026-03-01 更新2026-03-29 收录
下载链接:
https://hf-mirror.com/datasets/ENSEONG/preprocess-full-math-private-Qwen3-4B-Instruct-2507-bon
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 733716758
num_examples: 5000
download_size: 155386851
dataset_size: 733716758
- config_name: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 733430738
num_examples: 5000
download_size: 155029066
dataset_size: 733430738
- config_name: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 734042973
num_examples: 5000
download_size: 155234185
dataset_size: 734042973
- config_name: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 734468983
num_examples: 5000
download_size: 171296518
dataset_size: 734468983
- config_name: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 734218890
num_examples: 5000
download_size: 171301869
dataset_size: 734218890
- config_name: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 734249418
num_examples: 5000
download_size: 171541319
dataset_size: 734249418
- config_name: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 735321959
num_examples: 5000
download_size: 182104832
dataset_size: 735321959
- config_name: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 734929545
num_examples: 5000
download_size: 181790368
dataset_size: 734929545
- config_name: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 735211727
num_examples: 5000
download_size: 181874798
dataset_size: 735211727
- config_name: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 736061085
num_examples: 5000
download_size: 190042511
dataset_size: 736061085
- config_name: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 735671523
num_examples: 5000
download_size: 189777538
dataset_size: 735671523
- config_name: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 736290529
num_examples: 5000
download_size: 189961052
dataset_size: 736290529
- config_name: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 737214728
num_examples: 5000
download_size: 197040648
dataset_size: 737214728
- config_name: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 737744542
num_examples: 5000
download_size: 197179507
dataset_size: 737744542
- config_name: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 737253371
num_examples: 5000
download_size: 196898617
dataset_size: 737253371
- config_name: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 739270387
num_examples: 5000
download_size: 203707761
dataset_size: 739270387
- config_name: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 739028681
num_examples: 5000
download_size: 203515118
dataset_size: 739028681
- config_name: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 739627832
num_examples: 5000
download_size: 203792118
dataset_size: 739627832
- config_name: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 741670416
num_examples: 5000
download_size: 210022282
dataset_size: 741670416
- config_name: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 741461080
num_examples: 5000
download_size: 209992996
dataset_size: 741461080
- config_name: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 741102988
num_examples: 5000
download_size: 209841204
dataset_size: 741102988
- config_name: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 744225121
num_examples: 5000
download_size: 215901420
dataset_size: 744225121
- config_name: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 743189954
num_examples: 5000
download_size: 215673408
dataset_size: 743189954
- config_name: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 743520198
num_examples: 5000
download_size: 215789530
dataset_size: 743520198
- config_name: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 747303248
num_examples: 5000
download_size: 221935130
dataset_size: 747303248
- config_name: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 747339601
num_examples: 5000
download_size: 221954985
dataset_size: 747339601
- config_name: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 747190969
num_examples: 5000
download_size: 221869334
dataset_size: 747190969
- config_name: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 750766370
num_examples: 5000
download_size: 228122101
dataset_size: 750766370
- config_name: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 750592491
num_examples: 5000
download_size: 228066650
dataset_size: 750592491
- config_name: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 750744424
num_examples: 5000
download_size: 228117150
dataset_size: 750744424
- config_name: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 755547331
num_examples: 5000
download_size: 234866566
dataset_size: 755547331
- config_name: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 755642862
num_examples: 5000
download_size: 234979232
dataset_size: 755642862
- config_name: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 754676119
num_examples: 5000
download_size: 234549916
dataset_size: 754676119
- config_name: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 760869708
num_examples: 5000
download_size: 241861739
dataset_size: 760869708
- config_name: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 760618937
num_examples: 5000
download_size: 241843740
dataset_size: 760618937
- config_name: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 760913067
num_examples: 5000
download_size: 241875509
dataset_size: 760913067
configs:
- config_name: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
---
提供机构:
ENSEONG



