lightonai/nv-embed-supervised-distill-dedup-translated-v2
收藏Hugging Face2026-02-18 更新2026-03-29 收录
下载链接:
https://hf-mirror.com/datasets/lightonai/nv-embed-supervised-distill-dedup-translated-v2
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: documents
features:
- name: document_id
dtype: int64
- name: document
dtype: string
splits:
- name: miracl_fr
num_bytes: 4950963719
num_examples: 13930215
- name: fiqa_fr
num_bytes: 37051746
num_examples: 34108
- name: trivia_fr
num_bytes: 3455769744
num_examples: 4570492
- name: nq_fr
num_bytes: 592305960
num_examples: 1590294
- name: fever_fr
num_bytes: 152715449
num_examples: 331425
- name: hotpotqa_fr
num_bytes: 219933296
num_examples: 679486
- name: msmarco_fr
num_bytes: 2114487505
num_examples: 5090394
- name: squadv2_fr
num_bytes: 17481122
num_examples: 19029
download_size: 12914879297
dataset_size: 11540708541
- config_name: queries
features:
- name: query_id
dtype: int64
- name: query
dtype: string
splits:
- name: miracl_fr
num_bytes: 65622
num_examples: 1143
- name: fiqa_fr
num_bytes: 531429
num_examples: 5458
- name: trivia_fr
num_bytes: 6162546
num_examples: 60403
- name: nq_fr
num_bytes: 10851783
num_examples: 149419
- name: fever_fr
num_bytes: 7496278
num_examples: 109647
- name: hotpotqa_fr
num_bytes: 10420870
num_examples: 84589
- name: msmarco_fr
num_bytes: 31458732
num_examples: 502904
- name: squadv2_fr
num_bytes: 11098677
num_examples: 130103
download_size: 159298926
dataset_size: 78085937
- config_name: scores
features:
- name: query_id
dtype: int64
- name: document_ids
list: int64
- name: scores
list: float64
splits:
- name: miracl_fr
num_bytes: 304440928
num_examples: 2321
- name: fiqa_fr
num_bytes: 2641920
num_examples: 13760
- name: trivia_fr
num_bytes: 119554752
num_examples: 622681
- name: nq_fr
num_bytes: 24655680
num_examples: 128415
- name: fever_fr
num_bytes: 24216000
num_examples: 126125
- name: hotpotqa_fr
num_bytes: 28969536
num_examples: 150883
- name: msmarco_fr
num_bytes: 101245824
num_examples: 527322
- name: squadv2_fr
num_bytes: 24986880
num_examples: 130140
download_size: 718817868
dataset_size: 630711520
configs:
- config_name: documents
data_files:
- split: fiqa_fr
path: documents/fiqa_fr-*
- split: nq_fr
path: documents/nq_fr-*
- split: fever_fr
path: documents/fever_fr-*
- split: hotpotqa_fr
path: documents/hotpotqa_fr-*
- split: msmarco_fr
path: documents/msmarco_fr-*
- split: squadv2_fr
path: documents/squadv2_fr-*
- split: trivia_fr
path: documents/trivia_fr-*
- split: miracl_fr
path: documents/miracl_fr-*
- config_name: queries
data_files:
- split: fiqa_fr
path: queries/fiqa_fr-*
- split: nq_fr
path: queries/nq_fr-*
- split: trivia_fr
path: queries/trivia_fr-*
- split: fever_fr
path: queries/fever_fr-*
- split: hotpotqa_fr
path: queries/hotpotqa_fr-*
- split: msmarco_fr
path: queries/msmarco_fr-*
- split: squadv2_fr
path: queries/squadv2_fr-*
- split: miracl_fr
path: queries/miracl_fr-*
- config_name: scores
data_files:
- split: fiqa_fr
path: scores/fiqa_fr-*
- split: nq_fr
path: scores/nq_fr-*
- split: fever_fr
path: scores/fever_fr-*
- split: hotpotqa_fr
path: scores/hotpotqa_fr-*
- split: msmarco_fr
path: scores/msmarco_fr-*
- split: squadv2_fr
path: scores/squadv2_fr-*
- split: trivia_fr
path: scores/trivia_fr-*
- split: miracl_fr
path: scores/miracl_fr-*
---
提供机构:
lightonai



