MLP-KTLim/Kor-CC-Dumps
收藏Hugging Face2026-02-13 更新2026-03-29 收录
下载链接:
https://hf-mirror.com/datasets/MLP-KTLim/Kor-CC-Dumps
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: CC-MAIN-2024-22
features:
- name: text
dtype: string
- name: id
dtype: string
- name: metadata
struct:
- name: dump
dtype: string
- name: url
dtype: string
- name: date
dtype: timestamp[s]
- name: file_path
dtype: string
- name: __label__ko
dtype: float64
splits:
- name: train
num_bytes: 13935373646
num_examples: 1839847
download_size: 6556645229
dataset_size: 13935373646
- config_name: CC-MAIN-2024-26
features:
- name: text
dtype: string
- name: id
dtype: string
- name: metadata
struct:
- name: dump
dtype: string
- name: url
dtype: string
- name: date
dtype: timestamp[s]
- name: file_path
dtype: string
- name: __label__ko
dtype: float64
splits:
- name: train
num_bytes: 161121580078
num_examples: 21306242
download_size: 75576117863
dataset_size: 161121580078
- config_name: CC-MAIN-2024-30
features:
- name: text
dtype: string
- name: id
dtype: string
- name: metadata
struct:
- name: dump
dtype: string
- name: url
dtype: string
- name: date
dtype: timestamp[s]
- name: file_path
dtype: string
- name: __label__ko
dtype: float64
splits:
- name: train
num_bytes: 127508888141
num_examples: 16929268
download_size: 60550916929
dataset_size: 127508888141
- config_name: CC-MAIN-2024-33
features:
- name: text
dtype: string
- name: id
dtype: string
- name: metadata
struct:
- name: dump
dtype: string
- name: url
dtype: string
- name: date
dtype: timestamp[s]
- name: file_path
dtype: string
- name: __label__ko
dtype: float64
splits:
- name: train
num_bytes: 120613836327
num_examples: 15877789
download_size: 57338594594
dataset_size: 120613836327
- config_name: CC-MAIN-2024-38
features:
- name: text
dtype: string
- name: id
dtype: string
- name: metadata
struct:
- name: dump
dtype: string
- name: url
dtype: string
- name: date
dtype: timestamp[s]
- name: file_path
dtype: string
- name: __label__ko
dtype: float64
splits:
- name: train
num_bytes: 135946063667
num_examples: 17803771
download_size: 64691806364
dataset_size: 135946063667
- config_name: CC-MAIN-2024-42
features:
- name: text
dtype: string
- name: id
dtype: string
- name: metadata
struct:
- name: dump
dtype: string
- name: url
dtype: string
- name: date
dtype: timestamp[s]
- name: file_path
dtype: string
- name: __label__ko
dtype: float64
splits:
- name: train
num_bytes: 130127466237
num_examples: 17435297
download_size: 62042599518
dataset_size: 130127466237
- config_name: CC-MAIN-2024-46
features:
- name: text
dtype: string
- name: id
dtype: string
- name: metadata
struct:
- name: dump
dtype: string
- name: url
dtype: string
- name: date
dtype: timestamp[s]
- name: file_path
dtype: string
- name: __label__ko
dtype: float64
splits:
- name: train
num_bytes: 137351629529
num_examples: 18425664
download_size: 65473187519
dataset_size: 137351629529
- config_name: CC-MAIN-2024-51
features:
- name: text
dtype: string
- name: id
dtype: string
- name: metadata
struct:
- name: dump
dtype: string
- name: url
dtype: string
- name: date
dtype: timestamp[s]
- name: file_path
dtype: string
- name: __label__ko
dtype: float64
splits:
- name: train
num_bytes: 143199427392
num_examples: 18902638
download_size: 67647918055
dataset_size: 143199427392
- config_name: CC-MAIN-2025-05
features:
- name: text
dtype: string
- name: id
dtype: string
- name: metadata
struct:
- name: dump
dtype: string
- name: url
dtype: string
- name: date
dtype: timestamp[s]
- name: file_path
dtype: string
- name: __label__ko
dtype: float64
splits:
- name: train
num_bytes: 157528032151
num_examples: 20677486
download_size: 73884987407
dataset_size: 157528032151
- config_name: CC-MAIN-2025-08
features:
- name: text
dtype: string
- name: id
dtype: string
- name: metadata
struct:
- name: dump
dtype: string
- name: url
dtype: string
- name: date
dtype: timestamp[s]
- name: file_path
dtype: string
- name: __label__ko
dtype: float64
splits:
- name: train
num_bytes: 158446089313
num_examples: 20527961
download_size: 74554440414
dataset_size: 158446089313
configs:
- config_name: CC-MAIN-2024-22
data_files:
- split: train
path: CC-MAIN-2024-22/train-*
- config_name: CC-MAIN-2024-26
data_files:
- split: train
path: CC-MAIN-2024-26/train-*
- config_name: CC-MAIN-2024-30
data_files:
- split: train
path: CC-MAIN-2024-30/train-*
- config_name: CC-MAIN-2024-33
data_files:
- split: train
path: CC-MAIN-2024-33/train-*
- config_name: CC-MAIN-2024-38
data_files:
- split: train
path: CC-MAIN-2024-38/train-*
- config_name: CC-MAIN-2024-42
data_files:
- split: train
path: CC-MAIN-2024-42/train-*
- config_name: CC-MAIN-2024-46
data_files:
- split: train
path: CC-MAIN-2024-46/train-*
- config_name: CC-MAIN-2024-51
data_files:
- split: train
path: CC-MAIN-2024-51/train-*
- config_name: CC-MAIN-2025-05
data_files:
- split: train
path: CC-MAIN-2025-05/train-*
- config_name: CC-MAIN-2025-08
data_files:
- split: train
path: CC-MAIN-2025-08/train-*
---
提供机构:
MLP-KTLim



