five

Shuu12121/coir_hard_negative_datasets_kd

收藏
Hugging Face2026-03-29 更新2026-04-12 收录
下载链接:
https://hf-mirror.com/datasets/Shuu12121/coir_hard_negative_datasets_kd
下载链接
链接失效反馈
官方服务:
资源简介:
--- dataset_info: - config_name: documents_apps-python features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 5252033 num_examples: 8755 download_size: 2441099 dataset_size: 5252033 - config_name: documents_codefeedback-mt-mixed features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 98959739 num_examples: 66371 download_size: 50614519 dataset_size: 98959739 - config_name: documents_codefeedback-st-mixed features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 241780674 num_examples: 156493 download_size: 119840558 dataset_size: 241780674 - config_name: documents_codesearchnet-ccr-go features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 35151378 num_examples: 182608 download_size: 16033868 dataset_size: 35151378 - config_name: documents_codesearchnet-ccr-java features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 49663715 num_examples: 180814 download_size: 19717956 dataset_size: 49663715 - config_name: documents_codesearchnet-ccr-javascript features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 19241952 num_examples: 65124 download_size: 8448048 dataset_size: 19241952 - config_name: documents_codesearchnet-ccr-php features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 70931762 num_examples: 268139 download_size: 28062510 dataset_size: 70931762 - config_name: documents_codesearchnet-ccr-python features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 108300841 num_examples: 276732 download_size: 44279546 dataset_size: 108300841 - config_name: documents_codesearchnet-ccr-ruby features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 5597619 num_examples: 27580 download_size: 2558547 dataset_size: 5597619 - config_name: documents_codesearchnet-go features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 21448139 num_examples: 182394 download_size: 9695160 dataset_size: 21448139 - config_name: documents_codesearchnet-java features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 35998195 num_examples: 180834 download_size: 14939015 dataset_size: 35998195 - config_name: documents_codesearchnet-javascript features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 13142849 num_examples: 64839 download_size: 5907776 dataset_size: 13142849 - config_name: documents_codesearchnet-php features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 49202162 num_examples: 267701 download_size: 20971743 dataset_size: 49202162 - config_name: documents_codesearchnet-python features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 79455238 num_examples: 280140 download_size: 32521978 dataset_size: 79455238 - config_name: documents_codesearchnet-ruby features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 6921123 num_examples: 27569 download_size: 3089665 dataset_size: 6921123 - config_name: documents_codetrans-contest-mixed features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 1541254 num_examples: 1008 download_size: 657617 dataset_size: 1541254 - config_name: documents_codetrans-dl-mixed features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 443046 num_examples: 266 download_size: 128629 dataset_size: 443046 - config_name: documents_cosqa-python features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 2072945 num_examples: 6267 download_size: 1106059 dataset_size: 2072945 - config_name: documents_stackoverflow-qa-mixed features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 24429289 num_examples: 19930 download_size: 13175543 dataset_size: 24429289 - config_name: documents_synthetic-text2sql-sql features: - name: document_id dtype: string - name: document dtype: string - name: split dtype: string splits: - name: train num_bytes: 15668333 num_examples: 105851 download_size: 7085169 dataset_size: 15668333 - config_name: queries_apps-python features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 6410447 num_examples: 5000 download_size: 3263749 dataset_size: 6410447 - config_name: queries_codefeedback-mt-mixed features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 235947161 num_examples: 53106 download_size: 99226011 dataset_size: 235947161 - config_name: queries_codefeedback-st-mixed features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 93421835 num_examples: 125220 download_size: 46801238 dataset_size: 93421835 - config_name: queries_codesearchnet-ccr-go features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 43207673 num_examples: 167288 download_size: 18678358 dataset_size: 43207673 - config_name: queries_codesearchnet-ccr-java features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 62513115 num_examples: 164923 download_size: 23948866 dataset_size: 62513115 - config_name: queries_codesearchnet-ccr-javascript features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 23515337 num_examples: 58025 download_size: 10128890 dataset_size: 23515337 - config_name: queries_codesearchnet-ccr-php features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 87492528 num_examples: 241241 download_size: 33347946 dataset_size: 87492528 - config_name: queries_codesearchnet-ccr-python features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 137102794 num_examples: 251820 download_size: 57069617 dataset_size: 137102794 - config_name: queries_codesearchnet-ccr-ruby features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 6757179 num_examples: 24927 download_size: 3053272 dataset_size: 6757179 - config_name: queries_codesearchnet-go features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 72165539 num_examples: 167288 download_size: 29465690 dataset_size: 72165539 - config_name: queries_codesearchnet-java features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 104328618 num_examples: 164923 download_size: 37258933 dataset_size: 104328618 - config_name: queries_codesearchnet-javascript features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 38975411 num_examples: 58025 download_size: 15762412 dataset_size: 38975411 - config_name: queries_codesearchnet-php features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 146123377 num_examples: 241241 download_size: 51891853 dataset_size: 146123377 - config_name: queries_codesearchnet-python features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 229222143 num_examples: 251820 download_size: 89277813 dataset_size: 229222143 - config_name: queries_codesearchnet-ruby features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 11273759 num_examples: 24927 download_size: 4742042 dataset_size: 11273759 - config_name: queries_codetrans-contest-mixed features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 401024 num_examples: 561 download_size: 196326 dataset_size: 401024 - config_name: queries_codetrans-dl-mixed features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 789134 num_examples: 564 download_size: 81011 dataset_size: 789134 - config_name: queries_cosqa-python features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 547571 num_examples: 9020 download_size: 253852 dataset_size: 547571 - config_name: queries_stackoverflow-qa-mixed features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 19787799 num_examples: 13951 download_size: 9951440 dataset_size: 19787799 - config_name: queries_synthetic-text2sql-sql features: - name: query_id dtype: string - name: query dtype: string - name: split dtype: string splits: - name: train num_bytes: 10403642 num_examples: 100000 download_size: 5039559 dataset_size: 10403642 - config_name: scores_apps-python features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 8645114 num_examples: 5000 download_size: 5002371 dataset_size: 8645114 - config_name: scores_codefeedback-mt-mixed features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 97074328 num_examples: 53106 download_size: 65747726 dataset_size: 97074328 - config_name: scores_codefeedback-st-mixed features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 234412832 num_examples: 125220 download_size: 169380932 dataset_size: 234412832 - config_name: scores_codesearchnet-ccr-go features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 314698366 num_examples: 167288 download_size: 202321699 dataset_size: 314698366 - config_name: scores_codesearchnet-ccr-java features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 310862243 num_examples: 164923 download_size: 200693853 dataset_size: 310862243 - config_name: scores_codesearchnet-ccr-javascript features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 106362070 num_examples: 58025 download_size: 66087872 dataset_size: 106362070 - config_name: scores_codesearchnet-ccr-php features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 458913278 num_examples: 241241 download_size: 317444974 dataset_size: 458913278 - config_name: scores_codesearchnet-ccr-python features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 480385067 num_examples: 251820 download_size: 303193068 dataset_size: 480385067 - config_name: scores_codesearchnet-ccr-ruby features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 44971695 num_examples: 24927 download_size: 26739998 dataset_size: 44971695 - config_name: scores_codesearchnet-go features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 314746146 num_examples: 167288 download_size: 208168428 dataset_size: 314746146 - config_name: scores_codesearchnet-java features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 310749085 num_examples: 164923 download_size: 205058131 dataset_size: 310749085 - config_name: scores_codesearchnet-javascript features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 106335220 num_examples: 58025 download_size: 67841144 dataset_size: 106335220 - config_name: scores_codesearchnet-php features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 458901659 num_examples: 241241 download_size: 320873447 dataset_size: 458901659 - config_name: scores_codesearchnet-python features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 480603219 num_examples: 251820 download_size: 317376169 dataset_size: 480603219 - config_name: scores_codesearchnet-ruby features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 44912420 num_examples: 24927 download_size: 26981910 dataset_size: 44912420 - config_name: scores_codetrans-contest-mixed features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 917468 num_examples: 561 download_size: 522522 dataset_size: 917468 - config_name: scores_codetrans-dl-mixed features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 898326 num_examples: 564 download_size: 263988 dataset_size: 898326 - config_name: scores_cosqa-python features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 15839259 num_examples: 9020 download_size: 8894704 dataset_size: 15839259 - config_name: scores_stackoverflow-qa-mixed features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 24935270 num_examples: 13951 download_size: 15047920 dataset_size: 24935270 - config_name: scores_synthetic-text2sql-sql features: - name: query_id dtype: string - name: document_ids sequence: string - name: scores sequence: float64 - name: split dtype: string splits: - name: train num_bytes: 183789209 num_examples: 100000 download_size: 128552991 dataset_size: 183789209 configs: - config_name: documents_apps-python data_files: - split: train path: documents_apps-python/train-* - config_name: documents_codefeedback-mt-mixed data_files: - split: train path: documents_codefeedback-mt-mixed/train-* - config_name: documents_codefeedback-st-mixed data_files: - split: train path: documents_codefeedback-st-mixed/train-* - config_name: documents_codesearchnet-ccr-go data_files: - split: train path: documents_codesearchnet-ccr-go/train-* - config_name: documents_codesearchnet-ccr-java data_files: - split: train path: documents_codesearchnet-ccr-java/train-* - config_name: documents_codesearchnet-ccr-javascript data_files: - split: train path: documents_codesearchnet-ccr-javascript/train-* - config_name: documents_codesearchnet-ccr-php data_files: - split: train path: documents_codesearchnet-ccr-php/train-* - config_name: documents_codesearchnet-ccr-python data_files: - split: train path: documents_codesearchnet-ccr-python/train-* - config_name: documents_codesearchnet-ccr-ruby data_files: - split: train path: documents_codesearchnet-ccr-ruby/train-* - config_name: documents_codesearchnet-go data_files: - split: train path: documents_codesearchnet-go/train-* - config_name: documents_codesearchnet-java data_files: - split: train path: documents_codesearchnet-java/train-* - config_name: documents_codesearchnet-javascript data_files: - split: train path: documents_codesearchnet-javascript/train-* - config_name: documents_codesearchnet-php data_files: - split: train path: documents_codesearchnet-php/train-* - config_name: documents_codesearchnet-python data_files: - split: train path: documents_codesearchnet-python/train-* - config_name: documents_codesearchnet-ruby data_files: - split: train path: documents_codesearchnet-ruby/train-* - config_name: documents_codetrans-contest-mixed data_files: - split: train path: documents_codetrans-contest-mixed/train-* - config_name: documents_codetrans-dl-mixed data_files: - split: train path: documents_codetrans-dl-mixed/train-* - config_name: documents_cosqa-python data_files: - split: train path: documents_cosqa-python/train-* - config_name: documents_stackoverflow-qa-mixed data_files: - split: train path: documents_stackoverflow-qa-mixed/train-* - config_name: documents_synthetic-text2sql-sql data_files: - split: train path: documents_synthetic-text2sql-sql/train-* - config_name: queries_apps-python data_files: - split: train path: queries_apps-python/train-* - config_name: queries_codefeedback-mt-mixed data_files: - split: train path: queries_codefeedback-mt-mixed/train-* - config_name: queries_codefeedback-st-mixed data_files: - split: train path: queries_codefeedback-st-mixed/train-* - config_name: queries_codesearchnet-ccr-go data_files: - split: train path: queries_codesearchnet-ccr-go/train-* - config_name: queries_codesearchnet-ccr-java data_files: - split: train path: queries_codesearchnet-ccr-java/train-* - config_name: queries_codesearchnet-ccr-javascript data_files: - split: train path: queries_codesearchnet-ccr-javascript/train-* - config_name: queries_codesearchnet-ccr-php data_files: - split: train path: queries_codesearchnet-ccr-php/train-* - config_name: queries_codesearchnet-ccr-python data_files: - split: train path: queries_codesearchnet-ccr-python/train-* - config_name: queries_codesearchnet-ccr-ruby data_files: - split: train path: queries_codesearchnet-ccr-ruby/train-* - config_name: queries_codesearchnet-go data_files: - split: train path: queries_codesearchnet-go/train-* - config_name: queries_codesearchnet-java data_files: - split: train path: queries_codesearchnet-java/train-* - config_name: queries_codesearchnet-javascript data_files: - split: train path: queries_codesearchnet-javascript/train-* - config_name: queries_codesearchnet-php data_files: - split: train path: queries_codesearchnet-php/train-* - config_name: queries_codesearchnet-python data_files: - split: train path: queries_codesearchnet-python/train-* - config_name: queries_codesearchnet-ruby data_files: - split: train path: queries_codesearchnet-ruby/train-* - config_name: queries_codetrans-contest-mixed data_files: - split: train path: queries_codetrans-contest-mixed/train-* - config_name: queries_codetrans-dl-mixed data_files: - split: train path: queries_codetrans-dl-mixed/train-* - config_name: queries_cosqa-python data_files: - split: train path: queries_cosqa-python/train-* - config_name: queries_stackoverflow-qa-mixed data_files: - split: train path: queries_stackoverflow-qa-mixed/train-* - config_name: queries_synthetic-text2sql-sql data_files: - split: train path: queries_synthetic-text2sql-sql/train-* - config_name: scores_apps-python data_files: - split: train path: scores_apps-python/train-* - config_name: scores_codefeedback-mt-mixed data_files: - split: train path: scores_codefeedback-mt-mixed/train-* - config_name: scores_codefeedback-st-mixed data_files: - split: train path: scores_codefeedback-st-mixed/train-* - config_name: scores_codesearchnet-ccr-go data_files: - split: train path: scores_codesearchnet-ccr-go/train-* - config_name: scores_codesearchnet-ccr-java data_files: - split: train path: scores_codesearchnet-ccr-java/train-* - config_name: scores_codesearchnet-ccr-javascript data_files: - split: train path: scores_codesearchnet-ccr-javascript/train-* - config_name: scores_codesearchnet-ccr-php data_files: - split: train path: scores_codesearchnet-ccr-php/train-* - config_name: scores_codesearchnet-ccr-python data_files: - split: train path: scores_codesearchnet-ccr-python/train-* - config_name: scores_codesearchnet-ccr-ruby data_files: - split: train path: scores_codesearchnet-ccr-ruby/train-* - config_name: scores_codesearchnet-go data_files: - split: train path: scores_codesearchnet-go/train-* - config_name: scores_codesearchnet-java data_files: - split: train path: scores_codesearchnet-java/train-* - config_name: scores_codesearchnet-javascript data_files: - split: train path: scores_codesearchnet-javascript/train-* - config_name: scores_codesearchnet-php data_files: - split: train path: scores_codesearchnet-php/train-* - config_name: scores_codesearchnet-python data_files: - split: train path: scores_codesearchnet-python/train-* - config_name: scores_codesearchnet-ruby data_files: - split: train path: scores_codesearchnet-ruby/train-* - config_name: scores_codetrans-contest-mixed data_files: - split: train path: scores_codetrans-contest-mixed/train-* - config_name: scores_codetrans-dl-mixed data_files: - split: train path: scores_codetrans-dl-mixed/train-* - config_name: scores_cosqa-python data_files: - split: train path: scores_cosqa-python/train-* - config_name: scores_stackoverflow-qa-mixed data_files: - split: train path: scores_stackoverflow-qa-mixed/train-* - config_name: scores_synthetic-text2sql-sql data_files: - split: train path: scores_synthetic-text2sql-sql/train-* ---
提供机构:
Shuu12121
5,000+
优质数据集
54 个
任务类型
进入经典数据集
二维码
社区交流群

面向社区/商业的数据集话题

二维码
科研交流群

面向高校/科研机构的开源数据集话题

数据驱动未来

携手共赢发展

商业合作