12kimih/r1qa-guided-rollouts
收藏Hugging Face2025-11-19 更新2025-12-20 收录
下载链接:
https://hf-mirror.com/datasets/12kimih/r1qa-guided-rollouts
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: Qwen3-0.6B
features:
- name: sample_id
dtype: int64
- name: metadata
struct:
- name: answerable
dtype: bool
- name: id
dtype: string
- name: paragraphs
list:
- name: idx
dtype: int64
- name: is_supporting
dtype: bool
- name: paragraph_text
dtype: string
- name: title
dtype: string
- name: question_decomposition
list:
- name: answer
dtype: string
- name: id
dtype: int64
- name: paragraph_support_idx
dtype: int64
- name: question
dtype: string
- name: question
dtype: string
- name: answer
dtype: string
- name: answer_aliases
list: string
- name: rollout_id
dtype: int64
- name: prompt
list:
- name: content
dtype: string
- name: role
dtype: string
- name: response
dtype: string
- name: predicted_answer
dtype: string
- name: reasoning_trace
dtype: string
- name: clip_and_guide
dtype: string
- name: cheat
dtype: float64
splits:
- name: v0
num_bytes: 581104899
num_examples: 19938
- name: v1
num_bytes: 545051924
num_examples: 19938
- name: v2.offline
num_bytes: 541743542
num_examples: 19938
- name: v2.online
num_bytes: 540922248
num_examples: 19938
- name: v3.offline
num_bytes: 540030599
num_examples: 19938
- name: v3.online
num_bytes: 537541039
num_examples: 19938
- name: v4.offline
num_bytes: 539368524
num_examples: 19938
- name: v4.online
num_bytes: 537149071
num_examples: 19938
- name: gptoss.v0
num_bytes: 660029791
num_examples: 19938
- name: gptoss.v1.offline
num_bytes: 570421694
num_examples: 19938
- name: gptoss.v1.online
num_bytes: 572687317
num_examples: 19938
- name: gptoss.v2.offline
num_bytes: 566499008
num_examples: 19938
- name: gptoss.v2.online
num_bytes: 565761023
num_examples: 19938
- name: gptoss.v3.offline
num_bytes: 562972701
num_examples: 19938
- name: gptoss.v3.online
num_bytes: 561141491
num_examples: 19938
- name: gptoss.v4.offline
num_bytes: 562007752
num_examples: 19938
- name: gptoss.v4.online
num_bytes: 559315364
num_examples: 19938
- name: gptoss.v5.offline
num_bytes: 561136851
num_examples: 19938
- name: gptoss.v5.online
num_bytes: 558519113
num_examples: 19938
- name: gptoss.v6.offline
num_bytes: 560758462
num_examples: 19938
- name: gptoss.v6.online
num_bytes: 557937031
num_examples: 19938
- name: gptoss.v7.offline
num_bytes: 560063728
num_examples: 19938
- name: gptoss.v7.online
num_bytes: 556890579
num_examples: 19938
- name: gptoss.v8.offline
num_bytes: 559704208
num_examples: 19938
- name: gptoss.v8.online
num_bytes: 556515656
num_examples: 19938
- name: grpo.v0
num_bytes: 615677981
num_examples: 19938
- name: grpo.v1.offline
num_bytes: 576523039
num_examples: 19938
- name: grpo.v1.online
num_bytes: 576407063
num_examples: 19938
- name: grpo.v2.offline
num_bytes: 569585459
num_examples: 19938
- name: grpo.v3.offline
num_bytes: 566298577
num_examples: 19938
download_size: 6773296809
dataset_size: 16919765734
- config_name: Qwen3-1.7B
features:
- name: sample_id
dtype: int64
- name: metadata
struct:
- name: answerable
dtype: bool
- name: id
dtype: string
- name: paragraphs
list:
- name: idx
dtype: int64
- name: is_supporting
dtype: bool
- name: paragraph_text
dtype: string
- name: title
dtype: string
- name: question_decomposition
list:
- name: answer
dtype: string
- name: id
dtype: int64
- name: paragraph_support_idx
dtype: int64
- name: question
dtype: string
- name: question
dtype: string
- name: answer
dtype: string
- name: answer_aliases
list: string
- name: rollout_id
dtype: int64
- name: prompt
list:
- name: content
dtype: string
- name: role
dtype: string
- name: response
dtype: string
- name: predicted_answer
dtype: string
- name: reasoning_trace
dtype: string
- name: clip_and_guide
dtype: string
- name: cheat
dtype: float64
splits:
- name: v0
num_bytes: 646827914
num_examples: 19938
- name: v1
num_bytes: 559239497
num_examples: 19938
- name: v2.offline
num_bytes: 555427335
num_examples: 19938
- name: v2.online
num_bytes: 552968336
num_examples: 19938
- name: v3.offline
num_bytes: 552408297
num_examples: 19938
- name: v3.online
num_bytes: 550445182
num_examples: 19938
- name: v4.offline
num_bytes: 551612432
num_examples: 19938
- name: v4.online
num_bytes: 549212299
num_examples: 19938
- name: v5.offline
num_bytes: 551512321
num_examples: 19938
- name: v5.online
num_bytes: 549045566
num_examples: 19938
- name: v6.offline
num_bytes: 551340998
num_examples: 19938
- name: v6.online
num_bytes: 550418723
num_examples: 19938
download_size: 2702077404
dataset_size: 6720458900
- config_name: Qwen3-4B
features:
- name: sample_id
dtype: int64
- name: metadata
struct:
- name: answerable
dtype: bool
- name: id
dtype: string
- name: paragraphs
list:
- name: idx
dtype: int64
- name: is_supporting
dtype: bool
- name: paragraph_text
dtype: string
- name: title
dtype: string
- name: question_decomposition
list:
- name: answer
dtype: string
- name: id
dtype: int64
- name: paragraph_support_idx
dtype: int64
- name: question
dtype: string
- name: question
dtype: string
- name: answer
dtype: string
- name: answer_aliases
list: string
- name: rollout_id
dtype: int64
- name: prompt
list:
- name: content
dtype: string
- name: role
dtype: string
- name: response
dtype: string
- name: predicted_answer
dtype: string
- name: reasoning_trace
dtype: string
- name: clip_and_guide
dtype: string
- name: cheat
dtype: float64
splits:
- name: v0
num_bytes: 646644050
num_examples: 19938
- name: v1
num_bytes: 565830487
num_examples: 19938
- name: v2.offline
num_bytes: 558363936
num_examples: 19938
- name: v2.online
num_bytes: 558017091
num_examples: 19938
- name: v3.offline
num_bytes: 557197383
num_examples: 19938
- name: v3.online
num_bytes: 555035309
num_examples: 19938
- name: v4.offline
num_bytes: 553968495
num_examples: 19938
- name: v4.online
num_bytes: 554127526
num_examples: 19938
- name: v5.offline
num_bytes: 553766665
num_examples: 19938
- name: v5.online
num_bytes: 551173464
num_examples: 19938
- name: v6.offline
num_bytes: 554593491
num_examples: 19938
- name: v6.online
num_bytes: 550680592
num_examples: 19938
download_size: 2702672347
dataset_size: 6759398489
configs:
- config_name: Qwen3-0.6B
data_files:
- split: v0
path: Qwen3-0.6B/v0-*
- split: v1
path: Qwen3-0.6B/v1-*
- split: v2.offline
path: Qwen3-0.6B/v2.offline-*
- split: v2.online
path: Qwen3-0.6B/v2.online-*
- split: v3.offline
path: Qwen3-0.6B/v3.offline-*
- split: v3.online
path: Qwen3-0.6B/v3.online-*
- split: v4.offline
path: Qwen3-0.6B/v4.offline-*
- split: v4.online
path: Qwen3-0.6B/v4.online-*
- split: gptoss.v0
path: Qwen3-0.6B/gptoss.v0-*
- split: gptoss.v1.offline
path: Qwen3-0.6B/gptoss.v1.offline-*
- split: gptoss.v1.online
path: Qwen3-0.6B/gptoss.v1.online-*
- split: gptoss.v2.offline
path: Qwen3-0.6B/gptoss.v2.offline-*
- split: gptoss.v2.online
path: Qwen3-0.6B/gptoss.v2.online-*
- split: gptoss.v3.offline
path: Qwen3-0.6B/gptoss.v3.offline-*
- split: gptoss.v3.online
path: Qwen3-0.6B/gptoss.v3.online-*
- split: gptoss.v4.offline
path: Qwen3-0.6B/gptoss.v4.offline-*
- split: gptoss.v4.online
path: Qwen3-0.6B/gptoss.v4.online-*
- split: gptoss.v5.offline
path: Qwen3-0.6B/gptoss.v5.offline-*
- split: gptoss.v5.online
path: Qwen3-0.6B/gptoss.v5.online-*
- split: gptoss.v6.offline
path: Qwen3-0.6B/gptoss.v6.offline-*
- split: gptoss.v6.online
path: Qwen3-0.6B/gptoss.v6.online-*
- split: gptoss.v7.offline
path: Qwen3-0.6B/gptoss.v7.offline-*
- split: gptoss.v7.online
path: Qwen3-0.6B/gptoss.v7.online-*
- split: gptoss.v8.offline
path: Qwen3-0.6B/gptoss.v8.offline-*
- split: gptoss.v8.online
path: Qwen3-0.6B/gptoss.v8.online-*
- split: grpo.v0
path: Qwen3-0.6B/grpo.v0-*
- split: grpo.v1.offline
path: Qwen3-0.6B/grpo.v1.offline-*
- split: grpo.v1.online
path: Qwen3-0.6B/grpo.v1.online-*
- split: grpo.v2.offline
path: Qwen3-0.6B/grpo.v2.offline-*
- split: grpo.v3.offline
path: Qwen3-0.6B/grpo.v3.offline-*
- config_name: Qwen3-1.7B
data_files:
- split: v0
path: Qwen3-1.7B/v0-*
- split: v1
path: Qwen3-1.7B/v1-*
- split: v2.offline
path: Qwen3-1.7B/v2.offline-*
- split: v2.online
path: Qwen3-1.7B/v2.online-*
- split: v3.offline
path: Qwen3-1.7B/v3.offline-*
- split: v3.online
path: Qwen3-1.7B/v3.online-*
- split: v4.offline
path: Qwen3-1.7B/v4.offline-*
- split: v4.online
path: Qwen3-1.7B/v4.online-*
- split: v5.offline
path: Qwen3-1.7B/v5.offline-*
- split: v5.online
path: Qwen3-1.7B/v5.online-*
- split: v6.offline
path: Qwen3-1.7B/v6.offline-*
- split: v6.online
path: Qwen3-1.7B/v6.online-*
- config_name: Qwen3-4B
data_files:
- split: v0
path: Qwen3-4B/v0-*
- split: v1
path: Qwen3-4B/v1-*
- split: v2.offline
path: Qwen3-4B/v2.offline-*
- split: v2.online
path: Qwen3-4B/v2.online-*
- split: v3.offline
path: Qwen3-4B/v3.offline-*
- split: v3.online
path: Qwen3-4B/v3.online-*
- split: v4.offline
path: Qwen3-4B/v4.offline-*
- split: v4.online
path: Qwen3-4B/v4.online-*
- split: v5.offline
path: Qwen3-4B/v5.offline-*
- split: v5.online
path: Qwen3-4B/v5.online-*
- split: v6.offline
path: Qwen3-4B/v6.offline-*
- split: v6.online
path: Qwen3-4B/v6.online-*
---
提供机构:
12kimih



