five

Shuu12121/owl_code_search_hard_negative_datasets_V2_kd

收藏
Hugging Face2026-04-15 更新2026-05-03 收录
下载链接:
https://hf-mirror.com/datasets/Shuu12121/owl_code_search_hard_negative_datasets_V2_kd
下载链接
链接失效反馈
官方服务:
资源简介:
--- dataset_info: - config_name: documents_go features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 788961766 num_examples: 1361475 download_size: 234362060 dataset_size: 788961766 - config_name: documents_java features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 595730631 num_examples: 1281018 download_size: 157313428 dataset_size: 595730631 - config_name: documents_javascript features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 103581935 num_examples: 129007 download_size: 36388466 dataset_size: 103581935 - config_name: documents_php features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 223918975 num_examples: 424463 download_size: 63024688 dataset_size: 223918975 - config_name: documents_python features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 1076005138 num_examples: 776900 download_size: 335083397 dataset_size: 1076005138 - config_name: documents_ruby features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 63838225 num_examples: 104899 download_size: 15331355 dataset_size: 63838225 - config_name: documents_rust features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 239717261 num_examples: 381521 download_size: 76487156 dataset_size: 239717261 - config_name: documents_typescript features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 265789788 num_examples: 328457 download_size: 77031262 dataset_size: 265789788 - config_name: queries_go features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 244374320 num_examples: 1361475 download_size: 79885874 dataset_size: 244374320 - config_name: queries_java features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 392026807 num_examples: 1281018 download_size: 102004628 dataset_size: 392026807 - config_name: queries_javascript features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 40951897 num_examples: 129007 download_size: 14431296 dataset_size: 40951897 - config_name: queries_php features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 101783501 num_examples: 424463 download_size: 31142491 dataset_size: 101783501 - config_name: queries_python features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 333640311 num_examples: 776900 download_size: 102366988 dataset_size: 333640311 - config_name: queries_ruby features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 67089741 num_examples: 104899 download_size: 16729531 dataset_size: 67089741 - config_name: queries_rust features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 78338197 num_examples: 381521 download_size: 30360518 dataset_size: 78338197 - config_name: queries_typescript features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 96580847 num_examples: 328457 download_size: 30396829 dataset_size: 96580847 - config_name: scores_go features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 6368108469 num_examples: 1361475 download_size: 3268145122 dataset_size: 6368108469 - config_name: scores_java features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 6478731786 num_examples: 1281018 download_size: 3134730175 dataset_size: 6478731786 - config_name: scores_javascript features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 784178497 num_examples: 129007 download_size: 301374369 dataset_size: 784178497 - config_name: scores_php features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 2029525899 num_examples: 424463 download_size: 999627270 dataset_size: 2029525899 - config_name: scores_python features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 4202284424 num_examples: 776900 download_size: 1889424131 dataset_size: 4202284424 - config_name: scores_ruby features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 505741456 num_examples: 104899 download_size: 220359577 dataset_size: 505741456 - config_name: scores_rust features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 1898140853 num_examples: 381521 download_size: 933043800 dataset_size: 1898140853 - config_name: scores_typescript features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 2030421789 num_examples: 328457 download_size: 796324061 dataset_size: 2030421789 configs: - config_name: documents_go data_files: - split: train path: documents_go/train-* - config_name: documents_java data_files: - split: train path: documents_java/train-* - config_name: documents_javascript data_files: - split: train path: documents_javascript/train-* - config_name: documents_php data_files: - split: train path: documents_php/train-* - config_name: documents_python data_files: - split: train path: documents_python/train-* - config_name: documents_ruby data_files: - split: train path: documents_ruby/train-* - config_name: documents_rust data_files: - split: train path: documents_rust/train-* - config_name: documents_typescript data_files: - split: train path: documents_typescript/train-* - config_name: queries_go data_files: - split: train path: queries_go/train-* - config_name: queries_java data_files: - split: train path: queries_java/train-* - config_name: queries_javascript data_files: - split: train path: queries_javascript/train-* - config_name: queries_php data_files: - split: train path: queries_php/train-* - config_name: queries_python data_files: - split: train path: queries_python/train-* - config_name: queries_ruby data_files: - split: train path: queries_ruby/train-* - config_name: queries_rust data_files: - split: train path: queries_rust/train-* - config_name: queries_typescript data_files: - split: train path: queries_typescript/train-* - config_name: scores_go data_files: - split: train path: scores_go/train-* - config_name: scores_java data_files: - split: train path: scores_java/train-* - config_name: scores_javascript data_files: - split: train path: scores_javascript/train-* - config_name: scores_php data_files: - split: train path: scores_php/train-* - config_name: scores_python data_files: - split: train path: scores_python/train-* - config_name: scores_ruby data_files: - split: train path: scores_ruby/train-* - config_name: scores_rust data_files: - split: train path: scores_rust/train-* - config_name: scores_typescript data_files: - split: train path: scores_typescript/train-* ---
提供机构:
Shuu12121
5,000+
优质数据集
54 个
任务类型
进入经典数据集
二维码
社区交流群

面向社区/商业的数据集话题

二维码
科研交流群

面向高校/科研机构的开源数据集话题

数据驱动未来

携手共赢发展

商业合作