ACSci/evaluation_0408
收藏Hugging Face2026-04-08 更新2026-04-12 收录
下载链接:
https://hf-mirror.com/datasets/ACSci/evaluation_0408
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: Qwen3-4B-Instruct-SFT-v00.01-checkpoint-60
features:
- name: paper_id
dtype: string
- name: title
dtype: string
- name: condition
dtype: string
- name: ground_truth
dtype: string
- name: generated_output
dtype: string
- name: equivalence_label
dtype: string
- name: similarity_score
dtype: string
- name: novelty_score
dtype: string
- name: feasibility_score
dtype: string
- name: specificity_score
dtype: string
- name: significance_score
dtype: string
- name: justification
dtype: string
- name: strengths
dtype: string
- name: weaknesses
dtype: string
- name: raw_evaluation
dtype: string
- name: evaluated
dtype: bool
splits:
- name: ICLR_2026_oral
num_bytes: 3903110
num_examples: 222
download_size: 723475
dataset_size: 3903110
- config_name: aicsi-rl-v00.00-step-000050
features:
- name: paper_id
dtype: string
- name: title
dtype: string
- name: condition
dtype: string
- name: ground_truth
dtype: string
- name: generated_output
dtype: string
- name: equivalence_label
dtype: string
- name: similarity_score
dtype: string
- name: novelty_score
dtype: string
- name: feasibility_score
dtype: string
- name: specificity_score
dtype: string
- name: significance_score
dtype: string
- name: justification
dtype: string
- name: strengths
dtype: string
- name: weaknesses
dtype: string
- name: raw_evaluation
dtype: string
- name: evaluated
dtype: bool
splits:
- name: ICLR_2026_oral
num_bytes: 2501547
num_examples: 222
download_size: 1240818
dataset_size: 2501547
- config_name: aicsi-rl-v00.00-step-000100
features:
- name: paper_id
dtype: string
- name: title
dtype: string
- name: condition
dtype: string
- name: ground_truth
dtype: string
- name: generated_output
dtype: string
- name: equivalence_label
dtype: string
- name: similarity_score
dtype: string
- name: novelty_score
dtype: string
- name: feasibility_score
dtype: string
- name: specificity_score
dtype: string
- name: significance_score
dtype: string
- name: justification
dtype: string
- name: strengths
dtype: string
- name: weaknesses
dtype: string
- name: raw_evaluation
dtype: string
- name: evaluated
dtype: bool
splits:
- name: ICLR_2026_oral
num_bytes: 2685011
num_examples: 222
download_size: 1326934
dataset_size: 2685011
- config_name: aicsi-rl-v00.00-step-000150
features:
- name: paper_id
dtype: string
- name: title
dtype: string
- name: condition
dtype: string
- name: ground_truth
dtype: string
- name: generated_output
dtype: string
- name: equivalence_label
dtype: string
- name: similarity_score
dtype: string
- name: novelty_score
dtype: string
- name: feasibility_score
dtype: string
- name: specificity_score
dtype: string
- name: significance_score
dtype: string
- name: justification
dtype: string
- name: strengths
dtype: string
- name: weaknesses
dtype: string
- name: raw_evaluation
dtype: string
- name: evaluated
dtype: bool
splits:
- name: ICLR_2026_oral
num_bytes: 2844731
num_examples: 222
download_size: 1407230
dataset_size: 2844731
- config_name: aicsi-rl-v00.00-step-000200
features:
- name: paper_id
dtype: string
- name: title
dtype: string
- name: condition
dtype: string
- name: ground_truth
dtype: string
- name: generated_output
dtype: string
- name: equivalence_label
dtype: string
- name: similarity_score
dtype: string
- name: novelty_score
dtype: string
- name: feasibility_score
dtype: string
- name: specificity_score
dtype: string
- name: significance_score
dtype: string
- name: justification
dtype: string
- name: strengths
dtype: string
- name: weaknesses
dtype: string
- name: raw_evaluation
dtype: string
- name: evaluated
dtype: bool
splits:
- name: ICLR_2026_oral
num_bytes: 3024841
num_examples: 222
download_size: 1485617
dataset_size: 3024841
- config_name: aicsi-rl-v00.00-step-000300
features:
- name: paper_id
dtype: string
- name: title
dtype: string
- name: condition
dtype: string
- name: ground_truth
dtype: string
- name: generated_output
dtype: string
- name: equivalence_label
dtype: string
- name: similarity_score
dtype: string
- name: novelty_score
dtype: string
- name: feasibility_score
dtype: string
- name: specificity_score
dtype: string
- name: significance_score
dtype: string
- name: justification
dtype: string
- name: strengths
dtype: string
- name: weaknesses
dtype: string
- name: raw_evaluation
dtype: string
- name: evaluated
dtype: bool
splits:
- name: ICLR_2026_oral
num_bytes: 3362524
num_examples: 222
download_size: 1625392
dataset_size: 3362524
- config_name: aicsi-rl-v00.01-step-000050
features:
- name: paper_id
dtype: string
- name: title
dtype: string
- name: condition
dtype: string
- name: ground_truth
dtype: string
- name: generated_output
dtype: string
- name: equivalence_label
dtype: string
- name: similarity_score
dtype: string
- name: novelty_score
dtype: string
- name: feasibility_score
dtype: string
- name: specificity_score
dtype: string
- name: significance_score
dtype: string
- name: justification
dtype: string
- name: strengths
dtype: string
- name: weaknesses
dtype: string
- name: raw_evaluation
dtype: string
- name: evaluated
dtype: bool
splits:
- name: ICLR_2026_oral
num_bytes: 2349548
num_examples: 222
download_size: 1166102
dataset_size: 2349548
- config_name: aicsi-rl-v00.01-step-000100
features:
- name: paper_id
dtype: string
- name: title
dtype: string
- name: condition
dtype: string
- name: ground_truth
dtype: string
- name: generated_output
dtype: string
- name: equivalence_label
dtype: string
- name: similarity_score
dtype: string
- name: novelty_score
dtype: string
- name: feasibility_score
dtype: string
- name: specificity_score
dtype: string
- name: significance_score
dtype: string
- name: justification
dtype: string
- name: strengths
dtype: string
- name: weaknesses
dtype: string
- name: raw_evaluation
dtype: string
- name: evaluated
dtype: bool
splits:
- name: ICLR_2026_oral
num_bytes: 1856014
num_examples: 222
download_size: 946419
dataset_size: 1856014
- config_name: aicsi-rl-v00.01-step-000150
features:
- name: paper_id
dtype: string
- name: title
dtype: string
- name: condition
dtype: string
- name: ground_truth
dtype: string
- name: generated_output
dtype: string
- name: equivalence_label
dtype: string
- name: similarity_score
dtype: string
- name: novelty_score
dtype: string
- name: feasibility_score
dtype: string
- name: specificity_score
dtype: string
- name: significance_score
dtype: string
- name: justification
dtype: string
- name: strengths
dtype: string
- name: weaknesses
dtype: string
- name: raw_evaluation
dtype: string
- name: evaluated
dtype: bool
splits:
- name: ICLR_2026_oral
num_bytes: 1499143
num_examples: 222
download_size: 742690
dataset_size: 1499143
- config_name: aicsi-rl-v00.01-step-000200
features:
- name: paper_id
dtype: string
- name: title
dtype: string
- name: condition
dtype: string
- name: ground_truth
dtype: string
- name: generated_output
dtype: string
- name: equivalence_label
dtype: string
- name: similarity_score
dtype: string
- name: novelty_score
dtype: string
- name: feasibility_score
dtype: string
- name: specificity_score
dtype: string
- name: significance_score
dtype: string
- name: justification
dtype: string
- name: strengths
dtype: string
- name: weaknesses
dtype: string
- name: raw_evaluation
dtype: string
- name: evaluated
dtype: bool
splits:
- name: ICLR_2026_oral
num_bytes: 1383780
num_examples: 222
download_size: 673117
dataset_size: 1383780
- config_name: aicsi-rl-v00.01-step-000300
features:
- name: paper_id
dtype: string
- name: title
dtype: string
- name: condition
dtype: string
- name: ground_truth
dtype: string
- name: generated_output
dtype: string
- name: equivalence_label
dtype: string
- name: similarity_score
dtype: string
- name: novelty_score
dtype: string
- name: feasibility_score
dtype: string
- name: specificity_score
dtype: string
- name: significance_score
dtype: string
- name: justification
dtype: string
- name: strengths
dtype: string
- name: weaknesses
dtype: string
- name: raw_evaluation
dtype: string
- name: evaluated
dtype: bool
splits:
- name: ICLR_2026_oral
num_bytes: 1444773
num_examples: 222
download_size: 704099
dataset_size: 1444773
- config_name: aicsi-rl-v00.01-step-000400
features:
- name: paper_id
dtype: string
- name: title
dtype: string
- name: condition
dtype: string
- name: ground_truth
dtype: string
- name: generated_output
dtype: string
- name: equivalence_label
dtype: string
- name: similarity_score
dtype: string
- name: novelty_score
dtype: string
- name: feasibility_score
dtype: string
- name: specificity_score
dtype: string
- name: significance_score
dtype: string
- name: justification
dtype: string
- name: strengths
dtype: string
- name: weaknesses
dtype: string
- name: raw_evaluation
dtype: string
- name: evaluated
dtype: bool
splits:
- name: ICLR_2026_oral
num_bytes: 1464890
num_examples: 222
download_size: 701398
dataset_size: 1464890
- config_name: azure-openai-gpt-5.4
features:
- name: paper_id
dtype: string
- name: title
dtype: string
- name: condition
dtype: string
- name: ground_truth
dtype: string
- name: generated_output
dtype: string
- name: equivalence_label
dtype: string
- name: similarity_score
dtype: string
- name: novelty_score
dtype: string
- name: feasibility_score
dtype: string
- name: specificity_score
dtype: string
- name: significance_score
dtype: string
- name: justification
dtype: string
- name: strengths
dtype: string
- name: weaknesses
dtype: string
- name: raw_evaluation
dtype: string
- name: evaluated
dtype: bool
splits:
- name: ICLR_2026_oral
num_bytes: 4415397
num_examples: 222
download_size: 2251784
dataset_size: 4415397
- config_name: gcp-google-gemini-3-flash-preview
features:
- name: paper_id
dtype: string
- name: title
dtype: string
- name: condition
dtype: string
- name: ground_truth
dtype: string
- name: generated_output
dtype: string
- name: equivalence_label
dtype: string
- name: similarity_score
dtype: string
- name: novelty_score
dtype: string
- name: feasibility_score
dtype: string
- name: specificity_score
dtype: string
- name: significance_score
dtype: string
- name: justification
dtype: string
- name: strengths
dtype: string
- name: weaknesses
dtype: string
- name: raw_evaluation
dtype: string
- name: evaluated
dtype: bool
splits:
- name: ICLR_2026_oral
num_bytes: 1822760
num_examples: 222
download_size: 982708
dataset_size: 1822760
- config_name: nvidia-openai-gpt-oss-120b
features:
- name: paper_id
dtype: string
- name: title
dtype: string
- name: condition
dtype: string
- name: ground_truth
dtype: string
- name: generated_output
dtype: string
- name: equivalence_label
dtype: string
- name: similarity_score
dtype: string
- name: novelty_score
dtype: string
- name: feasibility_score
dtype: string
- name: specificity_score
dtype: string
- name: significance_score
dtype: string
- name: justification
dtype: string
- name: strengths
dtype: string
- name: weaknesses
dtype: string
- name: raw_evaluation
dtype: string
- name: evaluated
dtype: bool
splits:
- name: ICLR_2026_oral
num_bytes: 3235267
num_examples: 222
download_size: 1734022
dataset_size: 3235267
configs:
- config_name: Qwen3-4B-Instruct-SFT-v00.01-checkpoint-60
data_files:
- split: ICLR_2026_oral
path: Qwen3-4B-Instruct-SFT-v00.01-checkpoint-60/ICLR_2026_oral-*
- config_name: aicsi-rl-v00.00-step-000050
data_files:
- split: ICLR_2026_oral
path: aicsi-rl-v00.00-step-000050/ICLR_2026_oral-*
- config_name: aicsi-rl-v00.00-step-000100
data_files:
- split: ICLR_2026_oral
path: aicsi-rl-v00.00-step-000100/ICLR_2026_oral-*
- config_name: aicsi-rl-v00.00-step-000150
data_files:
- split: ICLR_2026_oral
path: aicsi-rl-v00.00-step-000150/ICLR_2026_oral-*
- config_name: aicsi-rl-v00.00-step-000200
data_files:
- split: ICLR_2026_oral
path: aicsi-rl-v00.00-step-000200/ICLR_2026_oral-*
- config_name: aicsi-rl-v00.00-step-000300
data_files:
- split: ICLR_2026_oral
path: aicsi-rl-v00.00-step-000300/ICLR_2026_oral-*
- config_name: aicsi-rl-v00.01-step-000050
data_files:
- split: ICLR_2026_oral
path: aicsi-rl-v00.01-step-000050/ICLR_2026_oral-*
- config_name: aicsi-rl-v00.01-step-000100
data_files:
- split: ICLR_2026_oral
path: aicsi-rl-v00.01-step-000100/ICLR_2026_oral-*
- config_name: aicsi-rl-v00.01-step-000150
data_files:
- split: ICLR_2026_oral
path: aicsi-rl-v00.01-step-000150/ICLR_2026_oral-*
- config_name: aicsi-rl-v00.01-step-000200
data_files:
- split: ICLR_2026_oral
path: aicsi-rl-v00.01-step-000200/ICLR_2026_oral-*
- config_name: aicsi-rl-v00.01-step-000300
data_files:
- split: ICLR_2026_oral
path: aicsi-rl-v00.01-step-000300/ICLR_2026_oral-*
- config_name: aicsi-rl-v00.01-step-000400
data_files:
- split: ICLR_2026_oral
path: aicsi-rl-v00.01-step-000400/ICLR_2026_oral-*
- config_name: azure-openai-gpt-5.4
data_files:
- split: ICLR_2026_oral
path: azure-openai-gpt-5.4/ICLR_2026_oral-*
- config_name: gcp-google-gemini-3-flash-preview
data_files:
- split: ICLR_2026_oral
path: gcp-google-gemini-3-flash-preview/ICLR_2026_oral-*
- config_name: nvidia-openai-gpt-oss-120b
data_files:
- split: ICLR_2026_oral
path: nvidia-openai-gpt-oss-120b/ICLR_2026_oral-*
---
提供机构:
ACSci



