embedding-benchmark/MIRACLRerankingDownsampled
收藏Hugging Face2026-04-17 更新2026-04-26 收录
下载链接:
https://hf-mirror.com/datasets/embedding-benchmark/MIRACLRerankingDownsampled
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: ar-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 94914703.19880481
num_examples: 117605
download_size: 47483280
dataset_size: 94914703.19880481
- config_name: ar-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 288227
num_examples: 10081
download_size: 107236
dataset_size: 288227
- config_name: ar-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 66509
num_examples: 1000
download_size: 37992
dataset_size: 66509
- config_name: ar-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 3170621
num_examples: 1000
download_size: 1764519
dataset_size: 3170621
- config_name: bn-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 66409513.16677039
num_examples: 63830
download_size: 26369204
dataset_size: 66409513.16677039
- config_name: bn-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 113824
num_examples: 4206
download_size: 40805
dataset_size: 113824
- config_name: bn-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 55939
num_examples: 411
download_size: 25029
dataset_size: 55939
- config_name: bn-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 1221514
num_examples: 411
download_size: 659629
dataset_size: 1221514
- config_name: de-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 24022111.44380375
num_examples: 44204
download_size: 15909493
dataset_size: 24022111.44380375
- config_name: de-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 105647
num_examples: 3144
download_size: 37102
dataset_size: 105647
- config_name: de-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 19450
num_examples: 305
download_size: 14582
dataset_size: 19450
- config_name: de-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 990384
num_examples: 305
download_size: 521595
dataset_size: 990384
- config_name: en-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 66828586.48429249
num_examples: 121098
download_size: 46460694
dataset_size: 66828586.48429249
- config_name: en-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 242967
num_examples: 8350
download_size: 92748
dataset_size: 242967
- config_name: en-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 41576
num_examples: 799
download_size: 28948
dataset_size: 41576
- config_name: en-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 2702552
num_examples: 799
download_size: 1686157
dataset_size: 2702552
- config_name: es-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 52456963.53967973
num_examples: 92382
download_size: 33620535
dataset_size: 52456963.53967973
- config_name: es-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 215688
num_examples: 6443
download_size: 73182
dataset_size: 215688
- config_name: es-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 43504
num_examples: 648
download_size: 29571
dataset_size: 43504
- config_name: es-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 2103359
num_examples: 648
download_size: 1175431
dataset_size: 2103359
- config_name: fa-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 67028102.84709123
num_examples: 87607
download_size: 33798396
dataset_size: 67028102.84709123
- config_name: fa-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 216792
num_examples: 6571
download_size: 72765
dataset_size: 216792
- config_name: fa-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 58114
num_examples: 632
download_size: 32759
dataset_size: 58114
- config_name: fa-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 1995862
num_examples: 632
download_size: 1039456
dataset_size: 1995862
- config_name: fi-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 54157044.43336162
num_examples: 108517
download_size: 31921307
dataset_size: 54157044.43336162
- config_name: fi-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 262605
num_examples: 9436
download_size: 92891
dataset_size: 262605
- config_name: fi-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 51734
num_examples: 1000
download_size: 36471
dataset_size: 51734
- config_name: fi-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 3036568
num_examples: 1000
download_size: 1557009
dataset_size: 3036568
- config_name: fr-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 24666920.98979524
num_examples: 49634
download_size: 16316172
dataset_size: 24666920.98979524
- config_name: fr-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 115836
num_examples: 3429
download_size: 39712
dataset_size: 115836
- config_name: fr-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 21433
num_examples: 343
download_size: 14877
dataset_size: 21433
- config_name: fr-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 1121744
num_examples: 343
download_size: 579142
dataset_size: 1121744
- config_name: hi-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 54310898.977452196
num_examples: 41808
download_size: 23056200
dataset_size: 54310898.977452196
- config_name: hi-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 111217
num_examples: 3494
download_size: 37580
dataset_size: 111217
- config_name: hi-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 54122
num_examples: 350
download_size: 25941
dataset_size: 54122
- config_name: hi-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 1059433
num_examples: 350
download_size: 489134
dataset_size: 1059433
- config_name: id-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 58714418.48596386
num_examples: 113860
download_size: 34203447
dataset_size: 58714418.48596386
- config_name: id-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 263958
num_examples: 9668
download_size: 92612
dataset_size: 263958
- config_name: id-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 47803
num_examples: 960
download_size: 30679
dataset_size: 47803
- config_name: id-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 2948910
num_examples: 960
download_size: 1589306
dataset_size: 2948910
- config_name: ja-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 13466672.945364479
num_examples: 22514
download_size: 8837658
dataset_size: 13466672.945364479
- config_name: ja-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 236068
num_examples: 8354
download_size: 85925
dataset_size: 236068
- config_name: ja-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 55179
num_examples: 860
download_size: 34096
dataset_size: 55179
- config_name: ja-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 2673867
num_examples: 860
download_size: 370358
dataset_size: 2673867
- config_name: ko-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 14825970.209779872
num_examples: 23737
download_size: 9161969
dataset_size: 14825970.209779872
- config_name: ko-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 83188
num_examples: 3057
download_size: 31571
dataset_size: 83188
- config_name: ko-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 13875
num_examples: 213
download_size: 10030
dataset_size: 13875
- config_name: ko-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 633622
num_examples: 213
download_size: 277466
dataset_size: 633622
- config_name: ru-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 110518164.55353378
num_examples: 125341
download_size: 51957289
dataset_size: 110518164.55353378
- config_name: ru-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 298924
num_examples: 10470
download_size: 110477
dataset_size: 298924
- config_name: ru-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 92517
num_examples: 1000
download_size: 53448
dataset_size: 92517
- config_name: ru-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 3227807
num_examples: 1000
download_size: 1936072
dataset_size: 3227807
- config_name: sw-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 10585059.560595494
num_examples: 42617
download_size: 6163053
dataset_size: 10585059.560595494
- config_name: sw-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 135409
num_examples: 5092
download_size: 45442
dataset_size: 135409
- config_name: sw-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 24357
num_examples: 482
download_size: 15728
dataset_size: 24357
- config_name: sw-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 1344855
num_examples: 482
download_size: 507676
dataset_size: 1344855
- config_name: te-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 93363524.33578525
num_examples: 58319
download_size: 32349875
dataset_size: 93363524.33578525
- config_name: te-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 43268
num_examples: 1606
download_size: 19742
dataset_size: 43268
- config_name: te-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 95102
num_examples: 828
download_size: 38671
dataset_size: 95102
- config_name: te-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 2433095
num_examples: 828
download_size: 817298
dataset_size: 2433095
- config_name: th-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 104307792.51621531
num_examples: 76737
download_size: 39295638
dataset_size: 104307792.51621531
- config_name: th-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 208002
num_examples: 7573
download_size: 72960
dataset_size: 208002
- config_name: th-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 98697
num_examples: 733
download_size: 45069
dataset_size: 98697
- config_name: th-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 2178686
num_examples: 733
download_size: 942279
dataset_size: 2178686
- config_name: yo-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 2152524.9245356116
num_examples: 9922
download_size: 1710602
dataset_size: 2152524.9245356116
- config_name: yo-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 35512
num_examples: 1188
download_size: 11075
dataset_size: 35512
- config_name: yo-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 6697
num_examples: 119
download_size: 5813
dataset_size: 6697
- config_name: yo-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 329720
num_examples: 119
download_size: 109356
dataset_size: 329720
- config_name: zh-corpus
features:
- name: id
dtype: string
- name: text
dtype: string
- name: title
dtype: string
splits:
- name: dev
num_bytes: 743209.8867899003
num_examples: 1735
download_size: 704491
dataset_size: 743209.8867899003
- config_name: zh-qrels
features:
- name: query-id
dtype: string
- name: corpus-id
dtype: string
- name: score
dtype: int64
splits:
- name: dev
num_bytes: 129492
num_examples: 3928
download_size: 43632
dataset_size: 129492
- config_name: zh-queries
features:
- name: id
dtype: string
- name: text
dtype: string
splits:
- name: dev
num_bytes: 19261
num_examples: 393
download_size: 13987
dataset_size: 19261
- config_name: zh-top_ranked
features:
- name: query-id
dtype: string
- name: corpus-ids
sequence: string
splits:
- name: dev
num_bytes: 1308662
num_examples: 393
download_size: 44371
dataset_size: 1308662
configs:
- config_name: ar-corpus
data_files:
- split: dev
path: ar-corpus/dev-*
- config_name: ar-qrels
data_files:
- split: dev
path: ar-qrels/dev-*
- config_name: ar-queries
data_files:
- split: dev
path: ar-queries/dev-*
- config_name: ar-top_ranked
data_files:
- split: dev
path: ar-top_ranked/dev-*
- config_name: bn-corpus
data_files:
- split: dev
path: bn-corpus/dev-*
- config_name: bn-qrels
data_files:
- split: dev
path: bn-qrels/dev-*
- config_name: bn-queries
data_files:
- split: dev
path: bn-queries/dev-*
- config_name: bn-top_ranked
data_files:
- split: dev
path: bn-top_ranked/dev-*
- config_name: de-corpus
data_files:
- split: dev
path: de-corpus/dev-*
- config_name: de-qrels
data_files:
- split: dev
path: de-qrels/dev-*
- config_name: de-queries
data_files:
- split: dev
path: de-queries/dev-*
- config_name: de-top_ranked
data_files:
- split: dev
path: de-top_ranked/dev-*
- config_name: en-corpus
data_files:
- split: dev
path: en-corpus/dev-*
- config_name: en-qrels
data_files:
- split: dev
path: en-qrels/dev-*
- config_name: en-queries
data_files:
- split: dev
path: en-queries/dev-*
- config_name: en-top_ranked
data_files:
- split: dev
path: en-top_ranked/dev-*
- config_name: es-corpus
data_files:
- split: dev
path: es-corpus/dev-*
- config_name: es-qrels
data_files:
- split: dev
path: es-qrels/dev-*
- config_name: es-queries
data_files:
- split: dev
path: es-queries/dev-*
- config_name: es-top_ranked
data_files:
- split: dev
path: es-top_ranked/dev-*
- config_name: fa-corpus
data_files:
- split: dev
path: fa-corpus/dev-*
- config_name: fa-qrels
data_files:
- split: dev
path: fa-qrels/dev-*
- config_name: fa-queries
data_files:
- split: dev
path: fa-queries/dev-*
- config_name: fa-top_ranked
data_files:
- split: dev
path: fa-top_ranked/dev-*
- config_name: fi-corpus
data_files:
- split: dev
path: fi-corpus/dev-*
- config_name: fi-qrels
data_files:
- split: dev
path: fi-qrels/dev-*
- config_name: fi-queries
data_files:
- split: dev
path: fi-queries/dev-*
- config_name: fi-top_ranked
data_files:
- split: dev
path: fi-top_ranked/dev-*
- config_name: fr-corpus
data_files:
- split: dev
path: fr-corpus/dev-*
- config_name: fr-qrels
data_files:
- split: dev
path: fr-qrels/dev-*
- config_name: fr-queries
data_files:
- split: dev
path: fr-queries/dev-*
- config_name: fr-top_ranked
data_files:
- split: dev
path: fr-top_ranked/dev-*
- config_name: hi-corpus
data_files:
- split: dev
path: hi-corpus/dev-*
- config_name: hi-qrels
data_files:
- split: dev
path: hi-qrels/dev-*
- config_name: hi-queries
data_files:
- split: dev
path: hi-queries/dev-*
- config_name: hi-top_ranked
data_files:
- split: dev
path: hi-top_ranked/dev-*
- config_name: id-corpus
data_files:
- split: dev
path: id-corpus/dev-*
- config_name: id-qrels
data_files:
- split: dev
path: id-qrels/dev-*
- config_name: id-queries
data_files:
- split: dev
path: id-queries/dev-*
- config_name: id-top_ranked
data_files:
- split: dev
path: id-top_ranked/dev-*
- config_name: ja-corpus
data_files:
- split: dev
path: ja-corpus/dev-*
- config_name: ja-qrels
data_files:
- split: dev
path: ja-qrels/dev-*
- config_name: ja-queries
data_files:
- split: dev
path: ja-queries/dev-*
- config_name: ja-top_ranked
data_files:
- split: dev
path: ja-top_ranked/dev-*
- config_name: ko-corpus
data_files:
- split: dev
path: ko-corpus/dev-*
- config_name: ko-qrels
data_files:
- split: dev
path: ko-qrels/dev-*
- config_name: ko-queries
data_files:
- split: dev
path: ko-queries/dev-*
- config_name: ko-top_ranked
data_files:
- split: dev
path: ko-top_ranked/dev-*
- config_name: ru-corpus
data_files:
- split: dev
path: ru-corpus/dev-*
- config_name: ru-qrels
data_files:
- split: dev
path: ru-qrels/dev-*
- config_name: ru-queries
data_files:
- split: dev
path: ru-queries/dev-*
- config_name: ru-top_ranked
data_files:
- split: dev
path: ru-top_ranked/dev-*
- config_name: sw-corpus
data_files:
- split: dev
path: sw-corpus/dev-*
- config_name: sw-qrels
data_files:
- split: dev
path: sw-qrels/dev-*
- config_name: sw-queries
data_files:
- split: dev
path: sw-queries/dev-*
- config_name: sw-top_ranked
data_files:
- split: dev
path: sw-top_ranked/dev-*
- config_name: te-corpus
data_files:
- split: dev
path: te-corpus/dev-*
- config_name: te-qrels
data_files:
- split: dev
path: te-qrels/dev-*
- config_name: te-queries
data_files:
- split: dev
path: te-queries/dev-*
- config_name: te-top_ranked
data_files:
- split: dev
path: te-top_ranked/dev-*
- config_name: th-corpus
data_files:
- split: dev
path: th-corpus/dev-*
- config_name: th-qrels
data_files:
- split: dev
path: th-qrels/dev-*
- config_name: th-queries
data_files:
- split: dev
path: th-queries/dev-*
- config_name: th-top_ranked
data_files:
- split: dev
path: th-top_ranked/dev-*
- config_name: yo-corpus
data_files:
- split: dev
path: yo-corpus/dev-*
- config_name: yo-qrels
data_files:
- split: dev
path: yo-qrels/dev-*
- config_name: yo-queries
data_files:
- split: dev
path: yo-queries/dev-*
- config_name: yo-top_ranked
data_files:
- split: dev
path: yo-top_ranked/dev-*
- config_name: zh-corpus
data_files:
- split: dev
path: zh-corpus/dev-*
- config_name: zh-qrels
data_files:
- split: dev
path: zh-qrels/dev-*
- config_name: zh-queries
data_files:
- split: dev
path: zh-queries/dev-*
- config_name: zh-top_ranked
data_files:
- split: dev
path: zh-top_ranked/dev-*
---
提供机构:
embedding-benchmark



