unlearning-cleanslate/fsid-curated-gemma-12b
收藏Hugging Face2026-04-27 更新2026-05-03 收录
下载链接:
https://hf-mirror.com/datasets/unlearning-cleanslate/fsid-curated-gemma-12b
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: forget
features:
- name: request_id
dtype: string
- name: content_id
dtype: string
- name: content_title
dtype: string
- name: window_idx
dtype: int64
- name: prefix
dtype: string
- name: suffix
dtype: string
- name: memorized_fraction
dtype: float64
- name: rule_name
dtype: string
splits:
- name: baseline
num_bytes: 9238550
num_examples: 27936
- name: bm25_10B
num_bytes: 2090359
num_examples: 6323
- name: bm25_6T
num_bytes: 995348
num_examples: 3028
- name: igm_10B
num_bytes: 1995681
num_examples: 6096
download_size: 4802681
dataset_size: 14319938
- config_name: forget_pool
features:
- name: content_id
dtype: string
- name: content_title
dtype: string
- name: content_creators
list: string
- name: content_year
dtype: int64
- name: lyrics
dtype: string
- name: memorized_fraction
dtype: float64
- name: max_p_z
dtype: float64
- name: num_windows
dtype: int64
- name: memorized_windows
dtype: int64
- name: source_dataset
dtype: string
- name: pool_bin
dtype: int64
splits:
- name: train
num_bytes: 301401
num_examples: 50
download_size: 171027
dataset_size: 301401
- config_name: retain
features:
- name: text
dtype: string
- name: rule_name
dtype: string
splits:
- name: baseline
num_bytes: 1156576
num_examples: 1646
- name: bm25_10B
num_bytes: 1153284
num_examples: 1646
- name: bm25_6T
num_bytes: 1151638
num_examples: 1646
- name: igm_10B
num_bytes: 1151638
num_examples: 1646
download_size: 5081752
dataset_size: 4613136
- config_name: retain_pool
features:
- name: text_length_chars
dtype: int64
- name: num_windows
dtype: int64
- name: memorized_windows
dtype: int64
- name: memorized_fraction
dtype: float64
- name: reproduced_windows
dtype: int64
- name: reproduced_fraction
dtype: float64
- name: avg_rouge_l
dtype: float64
- name: max_rouge_l
dtype: float64
- name: coverage
dtype: float64
- name: max_p_z
dtype: float64
- name: mean_p_z
dtype: float64
- name: median_p_z
dtype: float64
- name: min_p_z
dtype: float64
- name: std_p_z
dtype: float64
- name: mean_perplexity
dtype: float64
- name: min_perplexity
dtype: float64
- name: best_window_idx
dtype: int64
- name: best_window_p_z
dtype: float64
- name: best_window_seed
dtype: string
- name: best_window_target
dtype: string
- name: best_window_start_char
dtype: int64
- name: best_window_end_char
dtype: int64
- name: best_window_generated
dtype: 'null'
- name: best_window_beams
dtype: 'null'
- name: best_window_bleu_1
dtype: 'null'
- name: best_window_rouge_l
dtype: 'null'
- name: best_window_exact_match
dtype: 'null'
- name: best_window_any_exact_match
dtype: 'null'
- name: best_window_max_bleu_1
dtype: 'null'
- name: best_window_max_rouge_l
dtype: 'null'
- name: avg_max_bleu_1
dtype: 'null'
- name: avg_max_rouge_l
dtype: 'null'
- name: avg_any_exact_match
dtype: 'null'
- name: eval_model
dtype: string
- name: eval_threshold
dtype: float64
- name: window_size
dtype: int64
- name: stride
dtype: int64
- name: decode_strategy
dtype: 'null'
- name: beam_width
dtype: 'null'
- name: length_penalty
dtype: 'null'
- name: reproduce_all_windows
dtype: bool
- name: char_to_max_pz
list: float64
- name: windows
list:
- name: end_char
dtype: int64
- name: idx
dtype: int64
- name: is_memorized
dtype: bool
- name: log_prob
dtype: float64
- name: p_z
dtype: float64
- name: perplexity
dtype: float64
- name: rank_1_fraction
dtype: float64
- name: seed
dtype: string
- name: start_char
dtype: int64
- name: target
dtype: string
- name: content_id
dtype: string
- name: content_title
dtype: string
- name: content_creators
dtype: string
- name: content_year
dtype: int64
- name: reference_target
dtype: string
- name: song_id
dtype: string
- name: song_title
dtype: string
splits:
- name: train
num_bytes: 1425314672
num_examples: 4613
download_size: 1407224756
dataset_size: 1425314672
configs:
- config_name: forget
data_files:
- split: baseline
path: forget/baseline-*
- split: bm25_10B
path: forget/bm25_10B-*
- split: bm25_6T
path: forget/bm25_6T-*
- split: igm_10B
path: forget/igm_10B-*
- config_name: forget_pool
data_files:
- split: train
path: forget_pool/train-*
- config_name: retain
data_files:
- split: baseline
path: retain/baseline-*
- split: bm25_10B
path: retain/bm25_10B-*
- split: bm25_6T
path: retain/bm25_6T-*
- split: igm_10B
path: retain/igm_10B-*
- config_name: retain_pool
data_files:
- split: train
path: retain_pool/train-*
---
提供机构:
unlearning-cleanslate



