Shuu12121/owl_code_search_hard_negative_datasets_V2_kd
收藏Hugging Face2026-04-15 更新2026-05-03 收录
下载链接:
https://hf-mirror.com/datasets/Shuu12121/owl_code_search_hard_negative_datasets_V2_kd
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: documents_go
features:
- name: document_id
dtype: string
- name: document
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 788961766
num_examples: 1361475
download_size: 234362060
dataset_size: 788961766
- config_name: documents_java
features:
- name: document_id
dtype: string
- name: document
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 595730631
num_examples: 1281018
download_size: 157313428
dataset_size: 595730631
- config_name: documents_javascript
features:
- name: document_id
dtype: string
- name: document
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 103581935
num_examples: 129007
download_size: 36388466
dataset_size: 103581935
- config_name: documents_php
features:
- name: document_id
dtype: string
- name: document
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 223918975
num_examples: 424463
download_size: 63024688
dataset_size: 223918975
- config_name: documents_python
features:
- name: document_id
dtype: string
- name: document
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 1076005138
num_examples: 776900
download_size: 335083397
dataset_size: 1076005138
- config_name: documents_ruby
features:
- name: document_id
dtype: string
- name: document
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 63838225
num_examples: 104899
download_size: 15331355
dataset_size: 63838225
- config_name: documents_rust
features:
- name: document_id
dtype: string
- name: document
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 239717261
num_examples: 381521
download_size: 76487156
dataset_size: 239717261
- config_name: documents_typescript
features:
- name: document_id
dtype: string
- name: document
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 265789788
num_examples: 328457
download_size: 77031262
dataset_size: 265789788
- config_name: queries_go
features:
- name: query_id
dtype: string
- name: query
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 244374320
num_examples: 1361475
download_size: 79885874
dataset_size: 244374320
- config_name: queries_java
features:
- name: query_id
dtype: string
- name: query
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 392026807
num_examples: 1281018
download_size: 102004628
dataset_size: 392026807
- config_name: queries_javascript
features:
- name: query_id
dtype: string
- name: query
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 40951897
num_examples: 129007
download_size: 14431296
dataset_size: 40951897
- config_name: queries_php
features:
- name: query_id
dtype: string
- name: query
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 101783501
num_examples: 424463
download_size: 31142491
dataset_size: 101783501
- config_name: queries_python
features:
- name: query_id
dtype: string
- name: query
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 333640311
num_examples: 776900
download_size: 102366988
dataset_size: 333640311
- config_name: queries_ruby
features:
- name: query_id
dtype: string
- name: query
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 67089741
num_examples: 104899
download_size: 16729531
dataset_size: 67089741
- config_name: queries_rust
features:
- name: query_id
dtype: string
- name: query
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 78338197
num_examples: 381521
download_size: 30360518
dataset_size: 78338197
- config_name: queries_typescript
features:
- name: query_id
dtype: string
- name: query
dtype: string
- name: split
dtype: string
splits:
- name: train
num_bytes: 96580847
num_examples: 328457
download_size: 30396829
dataset_size: 96580847
- config_name: scores_go
features:
- name: query_id
dtype: string
- name: document_ids
sequence: string
- name: scores
sequence: float64
- name: split
dtype: string
splits:
- name: train
num_bytes: 6368108469
num_examples: 1361475
download_size: 3268145122
dataset_size: 6368108469
- config_name: scores_java
features:
- name: query_id
dtype: string
- name: document_ids
sequence: string
- name: scores
sequence: float64
- name: split
dtype: string
splits:
- name: train
num_bytes: 6478731786
num_examples: 1281018
download_size: 3134730175
dataset_size: 6478731786
- config_name: scores_javascript
features:
- name: query_id
dtype: string
- name: document_ids
sequence: string
- name: scores
sequence: float64
- name: split
dtype: string
splits:
- name: train
num_bytes: 784178497
num_examples: 129007
download_size: 301374369
dataset_size: 784178497
- config_name: scores_php
features:
- name: query_id
dtype: string
- name: document_ids
sequence: string
- name: scores
sequence: float64
- name: split
dtype: string
splits:
- name: train
num_bytes: 2029525899
num_examples: 424463
download_size: 999627270
dataset_size: 2029525899
- config_name: scores_python
features:
- name: query_id
dtype: string
- name: document_ids
sequence: string
- name: scores
sequence: float64
- name: split
dtype: string
splits:
- name: train
num_bytes: 4202284424
num_examples: 776900
download_size: 1889424131
dataset_size: 4202284424
- config_name: scores_ruby
features:
- name: query_id
dtype: string
- name: document_ids
sequence: string
- name: scores
sequence: float64
- name: split
dtype: string
splits:
- name: train
num_bytes: 505741456
num_examples: 104899
download_size: 220359577
dataset_size: 505741456
- config_name: scores_rust
features:
- name: query_id
dtype: string
- name: document_ids
sequence: string
- name: scores
sequence: float64
- name: split
dtype: string
splits:
- name: train
num_bytes: 1898140853
num_examples: 381521
download_size: 933043800
dataset_size: 1898140853
- config_name: scores_typescript
features:
- name: query_id
dtype: string
- name: document_ids
sequence: string
- name: scores
sequence: float64
- name: split
dtype: string
splits:
- name: train
num_bytes: 2030421789
num_examples: 328457
download_size: 796324061
dataset_size: 2030421789
configs:
- config_name: documents_go
data_files:
- split: train
path: documents_go/train-*
- config_name: documents_java
data_files:
- split: train
path: documents_java/train-*
- config_name: documents_javascript
data_files:
- split: train
path: documents_javascript/train-*
- config_name: documents_php
data_files:
- split: train
path: documents_php/train-*
- config_name: documents_python
data_files:
- split: train
path: documents_python/train-*
- config_name: documents_ruby
data_files:
- split: train
path: documents_ruby/train-*
- config_name: documents_rust
data_files:
- split: train
path: documents_rust/train-*
- config_name: documents_typescript
data_files:
- split: train
path: documents_typescript/train-*
- config_name: queries_go
data_files:
- split: train
path: queries_go/train-*
- config_name: queries_java
data_files:
- split: train
path: queries_java/train-*
- config_name: queries_javascript
data_files:
- split: train
path: queries_javascript/train-*
- config_name: queries_php
data_files:
- split: train
path: queries_php/train-*
- config_name: queries_python
data_files:
- split: train
path: queries_python/train-*
- config_name: queries_ruby
data_files:
- split: train
path: queries_ruby/train-*
- config_name: queries_rust
data_files:
- split: train
path: queries_rust/train-*
- config_name: queries_typescript
data_files:
- split: train
path: queries_typescript/train-*
- config_name: scores_go
data_files:
- split: train
path: scores_go/train-*
- config_name: scores_java
data_files:
- split: train
path: scores_java/train-*
- config_name: scores_javascript
data_files:
- split: train
path: scores_javascript/train-*
- config_name: scores_php
data_files:
- split: train
path: scores_php/train-*
- config_name: scores_python
data_files:
- split: train
path: scores_python/train-*
- config_name: scores_ruby
data_files:
- split: train
path: scores_ruby/train-*
- config_name: scores_rust
data_files:
- split: train
path: scores_rust/train-*
- config_name: scores_typescript
data_files:
- split: train
path: scores_typescript/train-*
---
提供机构:
Shuu12121



