faezeb/wildchat_76k-k4-all-style-pca
收藏Hugging Face2026-02-26 更新2026-03-29 收录
下载链接:
https://hf-mirror.com/datasets/faezeb/wildchat_76k-k4-all-style-pca
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
features:
- name: model
dtype: string
- name: prompt
dtype: string
- name: response
dtype: string
- name: key
dtype: string
- name: source
dtype: string
- name: cluster
dtype: int64
- name: n_chars
dtype: int64
- name: n_tokens
dtype: int64
- name: n_words
dtype: int64
- name: n_lines
dtype: int64
- name: n_sents
dtype: int64
- name: avg_word_len
dtype: float64
- name: avg_sent_len_tokens
dtype: float64
- name: avg_line_len_tokens
dtype: float64
- name: type_token_ratio
dtype: float64
- name: entropy_words
dtype: float64
- name: unique_word_ratio
dtype: float64
- name: repetition_ratio
dtype: float64
- name: punct_ratio
dtype: float64
- name: n_exclaim
dtype: int64
- name: n_question
dtype: int64
- name: n_colon
dtype: int64
- name: n_semicolon
dtype: int64
- name: n_parens
dtype: int64
- name: n_commas
dtype: int64
- name: n_dashes
dtype: int64
- name: numbered_steps
dtype: int64
- name: bullet_lines
dtype: int64
- name: heading_lines
dtype: int64
- name: blank_lines
dtype: int64
- name: has_plan
dtype: float64
- name: has_final
dtype: float64
- name: has_sections
dtype: float64
- name: hedge_count
dtype: int64
- name: certainty_count
dtype: int64
- name: discourse_count
dtype: int64
- name: check_count
dtype: int64
- name: backtrack_count
dtype: int64
- name: first_person
dtype: int64
- name: second_person
dtype: int64
- name: apologies
dtype: int64
- name: filler_count
dtype: int64
- name: math_symbols
dtype: int64
- name: latex_inline
dtype: int64
- name: latex_display
dtype: int64
- name: theorem_words
dtype: int64
- name: fenced_code_blocks
dtype: int64
- name: inline_code_spans
dtype: int64
- name: code_keywords
dtype: int64
- name: indented_lines
dtype: int64
- name: brackets
dtype: int64
- name: backticks
dtype: int64
- name: urls
dtype: int64
- name: refs
dtype: int64
- name: steps_per_100_tokens
dtype: float64
- name: hedges_per_100_tokens
dtype: float64
- name: certainty_per_100_tokens
dtype: float64
- name: discourse_per_100_tokens
dtype: float64
- name: codekw_per_100_tokens
dtype: float64
- name: mathsym_per_100_tokens
dtype: float64
- name: newline_ratio
dtype: float64
- name: digit_ratio
dtype: float64
- name: upper_ratio
dtype: float64
- name: formal_proxy
dtype: int64
- name: style_cluster_id
dtype: int64
- name: style_cluster_conf
dtype: float64
- name: style_outlier
dtype: bool
- name: style_PC1
dtype: float64
- name: style_PC2
dtype: float64
- name: style_PC3
dtype: float64
- name: style_PC4
dtype: float64
- name: style_PC5
dtype: float64
- name: style_PC6
dtype: float64
- name: style_PC7
dtype: float64
- name: style_PC8
dtype: float64
- name: style_PC9
dtype: float64
- name: style_PC10
dtype: float64
splits:
- name: train
num_bytes: 6956290808
num_examples: 514638
download_size: 2991791181
dataset_size: 6956290808
configs:
- config_name: default
data_files:
- split: train
path: data/train-*
---
提供机构:
faezeb



