ENSEONG/preprocessed-full-math-private-Llama-3.2-3B-Instruct-bon
收藏Hugging Face2026-03-17 更新2026-03-29 收录
下载链接:
https://hf-mirror.com/datasets/ENSEONG/preprocessed-full-math-private-Llama-3.2-3B-Instruct-bon
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 489458729
num_examples: 5000
download_size: 94833260
dataset_size: 489458729
- config_name: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 488322117
num_examples: 5000
download_size: 94528800
dataset_size: 488322117
- config_name: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 488586358
num_examples: 5000
download_size: 94608748
dataset_size: 488586358
- config_name: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 486133741
num_examples: 5000
download_size: 109469344
dataset_size: 486133741
- config_name: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 486244402
num_examples: 5000
download_size: 109255960
dataset_size: 486244402
- config_name: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 487600360
num_examples: 5000
download_size: 109530651
dataset_size: 487600360
- config_name: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 483291324
num_examples: 5000
download_size: 117985076
dataset_size: 483291324
- config_name: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 483092046
num_examples: 5000
download_size: 117946355
dataset_size: 483092046
- config_name: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 483612201
num_examples: 5000
download_size: 118093042
dataset_size: 483612201
- config_name: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 480519649
num_examples: 5000
download_size: 124833593
dataset_size: 480519649
- config_name: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 481219925
num_examples: 5000
download_size: 125125294
dataset_size: 481219925
- config_name: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 481359200
num_examples: 5000
download_size: 125139047
dataset_size: 481359200
- config_name: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 477889151
num_examples: 5000
download_size: 131112943
dataset_size: 477889151
- config_name: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 478945659
num_examples: 5000
download_size: 131394829
dataset_size: 478945659
- config_name: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 478119605
num_examples: 5000
download_size: 131136163
dataset_size: 478119605
- config_name: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 475031209
num_examples: 5000
download_size: 137183907
dataset_size: 475031209
- config_name: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 476846944
num_examples: 5000
download_size: 137732333
dataset_size: 476846944
- config_name: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 476765609
num_examples: 5000
download_size: 137676146
dataset_size: 476765609
- config_name: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 476804167
num_examples: 5000
download_size: 144683987
dataset_size: 476804167
- config_name: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 476841856
num_examples: 5000
download_size: 144577323
dataset_size: 476841856
- config_name: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 476527894
num_examples: 5000
download_size: 144617642
dataset_size: 476527894
- config_name: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 475826784
num_examples: 5000
download_size: 152203690
dataset_size: 475826784
- config_name: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 476532509
num_examples: 5000
download_size: 152404530
dataset_size: 476532509
- config_name: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 476447408
num_examples: 5000
download_size: 152379054
dataset_size: 476447408
- config_name: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 475892828
num_examples: 5000
download_size: 162279986
dataset_size: 475892828
- config_name: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 476087493
num_examples: 5000
download_size: 162200755
dataset_size: 476087493
- config_name: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 475345418
num_examples: 5000
download_size: 161776282
dataset_size: 475345418
- config_name: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 561244312
num_examples: 5000
download_size: 251487357
dataset_size: 561244312
- config_name: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 560917838
num_examples: 5000
download_size: 251700371
dataset_size: 560917838
- config_name: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 561089716
num_examples: 5000
download_size: 251516707
dataset_size: 561089716
- config_name: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 1161582898
num_examples: 5000
download_size: 764611216
dataset_size: 1161582898
- config_name: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 1159067149
num_examples: 5000
download_size: 762746867
dataset_size: 1159067149
- config_name: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 1159879651
num_examples: 5000
download_size: 763629840
dataset_size: 1159879651
- config_name: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-0--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 2168249410
num_examples: 5000
download_size: 1638822997
dataset_size: 2168249410
- config_name: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-42--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 2171101556
num_examples: 5000
download_size: 1641100075
dataset_size: 2171101556
- config_name: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-64--agg_strategy-last
features:
- name: problem
dtype: string
- name: level
dtype: string
- name: type
dtype: string
- name: solution
dtype: string
- name: answer
dtype: string
- name: completions
list: string
- name: scores
list:
list: float32
- name: pred
dtype: string
- name: completion_tokens
list: int64
- name: agg_scores
list: float64
- name: pred_weighted@1
dtype: string
- name: pred_maj@1
dtype: string
- name: pred_naive@1
dtype: string
- name: pred_weighted@2
dtype: string
- name: pred_maj@2
dtype: string
- name: pred_naive@2
dtype: string
- name: pred_weighted@4
dtype: string
- name: pred_maj@4
dtype: string
- name: pred_naive@4
dtype: string
- name: pred_weighted@8
dtype: string
- name: pred_maj@8
dtype: string
- name: pred_naive@8
dtype: string
- name: pred_weighted@16
dtype: string
- name: pred_maj@16
dtype: string
- name: pred_naive@16
dtype: string
- name: pred_weighted@32
dtype: string
- name: pred_maj@32
dtype: string
- name: pred_naive@32
dtype: string
- name: pred_weighted@64
dtype: string
- name: pred_maj@64
dtype: string
- name: pred_naive@64
dtype: string
- name: preds
list: string
- name: pass@1
dtype: float64
- name: pass@2
dtype: float64
- name: pass@4
dtype: float64
- name: pass@8
dtype: float64
- name: pass@16
dtype: float64
- name: pass@32
dtype: float64
- name: pass@64
dtype: float64
- name: is_correct_weighted@1
dtype: bool
- name: is_correct_maj@1
dtype: bool
- name: is_correct_naive@1
dtype: bool
- name: is_correct_weighted@2
dtype: bool
- name: is_correct_maj@2
dtype: bool
- name: is_correct_naive@2
dtype: bool
- name: is_correct_weighted@4
dtype: bool
- name: is_correct_maj@4
dtype: bool
- name: is_correct_naive@4
dtype: bool
- name: is_correct_weighted@8
dtype: bool
- name: is_correct_maj@8
dtype: bool
- name: is_correct_naive@8
dtype: bool
- name: is_correct_weighted@16
dtype: bool
- name: is_correct_maj@16
dtype: bool
- name: is_correct_naive@16
dtype: bool
- name: is_correct_weighted@32
dtype: bool
- name: is_correct_maj@32
dtype: bool
- name: is_correct_naive@32
dtype: bool
- name: is_correct_weighted@64
dtype: bool
- name: is_correct_maj@64
dtype: bool
- name: is_correct_naive@64
dtype: bool
- name: is_correct_preds
list: bool
- name: _preprocessing_metadata
struct:
- name: num_pred_fields
dtype: int64
- name: preprocessed_at
dtype: string
- name: preprocessing_version
dtype: string
- name: total_evaluations
dtype: int64
splits:
- name: train
num_bytes: 2172394095
num_examples: 5000
download_size: 1641952223
dataset_size: 2172394095
configs:
- config_name: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.1--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.2--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.3--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.4--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.5--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.6--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.7--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.8--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-0.9--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.0--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.1--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-0--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-0--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-42--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-42--agg_strategy-last/train-*
- config_name: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-64--agg_strategy-last
data_files:
- split: train
path: ENSEONG_math-private--T-1.2--top_p-1.0--n-64--seed-64--agg_strategy-last/train-*
---
提供机构:
ENSEONG



