asahi417/seamless-align-enA-frA.tokenized
收藏Hugging Face2024-06-10 更新2024-06-12 收录
下载链接:
https://hf-mirror.com/datasets/asahi417/seamless-align-enA-frA.tokenized
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: subset_1
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 896960282
num_examples: 2344
download_size: 138828871
dataset_size: 896960282
- config_name: subset_10
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 849195322
num_examples: 2336
download_size: 131866595
dataset_size: 849195322
- config_name: subset_100
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 784236275
num_examples: 2326
download_size: 121581663
dataset_size: 784236275
- config_name: subset_101
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 778808804
num_examples: 2335
download_size: 120877210
dataset_size: 778808804
- config_name: subset_102
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 772584121
num_examples: 2306
download_size: 119739300
dataset_size: 772584121
- config_name: subset_103
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 783315490
num_examples: 2335
download_size: 121528799
dataset_size: 783315490
- config_name: subset_104
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 774101833
num_examples: 2330
download_size: 119997337
dataset_size: 774101833
- config_name: subset_105
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 782038760
num_examples: 2331
download_size: 121205066
dataset_size: 782038760
- config_name: subset_11
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 836642733
num_examples: 2315
download_size: 129888698
dataset_size: 836642733
- config_name: subset_12
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 866050440
num_examples: 2349
download_size: 134452465
dataset_size: 866050440
- config_name: subset_13
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 859275177
num_examples: 2341
download_size: 133374434
dataset_size: 859275177
- config_name: subset_14
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 854462165
num_examples: 2338
download_size: 132567966
dataset_size: 854462165
- config_name: subset_15
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 846548429
num_examples: 2363
download_size: 131471430
dataset_size: 846548429
- config_name: subset_16
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 849723661
num_examples: 2348
download_size: 131823703
dataset_size: 849723661
- config_name: subset_17
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 847314332
num_examples: 2328
download_size: 131528731
dataset_size: 847314332
- config_name: subset_18
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 856549312
num_examples: 2350
download_size: 132883574
dataset_size: 856549312
- config_name: subset_19
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 849231130
num_examples: 2348
download_size: 131806664
dataset_size: 849231130
- config_name: subset_2
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 885549898
num_examples: 2364
download_size: 137223195
dataset_size: 885549898
- config_name: subset_20
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 829390963
num_examples: 2346
download_size: 128708237
dataset_size: 829390963
- config_name: subset_21
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 841492981
num_examples: 2335
download_size: 130599466
dataset_size: 841492981
- config_name: subset_22
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 839566978
num_examples: 2331
download_size: 130433057
dataset_size: 839566978
- config_name: subset_23
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 831309666
num_examples: 2337
download_size: 129065953
dataset_size: 831309666
- config_name: subset_24
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 843207728
num_examples: 2333
download_size: 130820123
dataset_size: 843207728
- config_name: subset_25
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 826543551
num_examples: 2319
download_size: 128228152
dataset_size: 826543551
- config_name: subset_26
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 856166635
num_examples: 2338
download_size: 132709958
dataset_size: 856166635
- config_name: subset_27
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 835216842
num_examples: 2319
download_size: 129640044
dataset_size: 835216842
- config_name: subset_28
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 839904123
num_examples: 2334
download_size: 130295941
dataset_size: 839904123
- config_name: subset_29
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 787229439
num_examples: 2182
download_size: 122258606
dataset_size: 787229439
- config_name: subset_3
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 877322523
num_examples: 2354
download_size: 136064499
dataset_size: 877322523
- config_name: subset_30
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 731200910
num_examples: 2049
download_size: 113489889
dataset_size: 731200910
- config_name: subset_300
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 615595453
num_examples: 2112
download_size: 95508534
dataset_size: 615595453
- config_name: subset_301
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 618759301
num_examples: 2080
download_size: 95997104
dataset_size: 618759301
- config_name: subset_302
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 632889940
num_examples: 2108
download_size: 98160959
dataset_size: 632889940
- config_name: subset_303
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 609269662
num_examples: 2115
download_size: 94590996
dataset_size: 609269662
- config_name: subset_304
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 621920484
num_examples: 2109
download_size: 96549400
dataset_size: 621920484
- config_name: subset_305
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 621509233
num_examples: 2108
download_size: 96426327
dataset_size: 621509233
- config_name: subset_306
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 611209447
num_examples: 2090
download_size: 94887257
dataset_size: 611209447
- config_name: subset_307
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 611166497
num_examples: 2066
download_size: 94827583
dataset_size: 611166497
- config_name: subset_308
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 609561455
num_examples: 2073
download_size: 94558889
dataset_size: 609561455
- config_name: subset_309
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 614925629
num_examples: 2076
download_size: 95314537
dataset_size: 614925629
- config_name: subset_31
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 837041227
num_examples: 2343
download_size: 129841147
dataset_size: 837041227
- config_name: subset_310
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 618940832
num_examples: 2093
download_size: 96118544
dataset_size: 618940832
- config_name: subset_311
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 617176010
num_examples: 2080
download_size: 95799437
dataset_size: 617176010
- config_name: subset_312
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 620188688
num_examples: 2107
download_size: 96229842
dataset_size: 620188688
- config_name: subset_313
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 618673481
num_examples: 2101
download_size: 95998837
dataset_size: 618673481
- config_name: subset_314
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 598479264
num_examples: 2077
download_size: 92884108
dataset_size: 598479264
- config_name: subset_315
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 621758695
num_examples: 2100
download_size: 96437924
dataset_size: 621758695
- config_name: subset_316
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 607616749
num_examples: 2069
download_size: 94309186
dataset_size: 607616749
- config_name: subset_317
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 604649925
num_examples: 2062
download_size: 93843165
dataset_size: 604649925
- config_name: subset_318
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 590823189
num_examples: 2072
download_size: 91774830
dataset_size: 590823189
- config_name: subset_319
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 603460812
num_examples: 2081
download_size: 93693912
dataset_size: 603460812
- config_name: subset_32
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 833327844
num_examples: 2316
download_size: 129321785
dataset_size: 833327844
- config_name: subset_320
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 597239908
num_examples: 2055
download_size: 92548238
dataset_size: 597239908
- config_name: subset_321
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 600287614
num_examples: 2077
download_size: 93139913
dataset_size: 600287614
- config_name: subset_322
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 600033353
num_examples: 2070
download_size: 93107602
dataset_size: 600033353
- config_name: subset_323
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 597583948
num_examples: 2092
download_size: 92791855
dataset_size: 597583948
- config_name: subset_324
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 584192513
num_examples: 2031
download_size: 90606221
dataset_size: 584192513
- config_name: subset_325
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 598289672
num_examples: 2066
download_size: 92880431
dataset_size: 598289672
- config_name: subset_326
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 577370669
num_examples: 2029
download_size: 89634416
dataset_size: 577370669
- config_name: subset_327
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 600437626
num_examples: 2067
download_size: 93149645
dataset_size: 600437626
- config_name: subset_328
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 606382681
num_examples: 2098
download_size: 94050079
dataset_size: 606382681
- config_name: subset_329
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 495852006
num_examples: 1708
download_size: 76933542
dataset_size: 495852006
- config_name: subset_33
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 821475954
num_examples: 2324
download_size: 127495781
dataset_size: 821475954
- config_name: subset_330
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 590256104
num_examples: 2031
download_size: 91622869
dataset_size: 590256104
- config_name: subset_331
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 585303005
num_examples: 2046
download_size: 90758179
dataset_size: 585303005
- config_name: subset_332
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 601650721
num_examples: 2029
download_size: 93454688
dataset_size: 601650721
- config_name: subset_333
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 595456369
num_examples: 2042
download_size: 92340360
dataset_size: 595456369
- config_name: subset_334
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 596628853
num_examples: 2023
download_size: 92541068
dataset_size: 596628853
- config_name: subset_335
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 565330087
num_examples: 2018
download_size: 87753330
dataset_size: 565330087
- config_name: subset_336
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 587826604
num_examples: 2030
download_size: 91163467
dataset_size: 587826604
- config_name: subset_337
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 577560638
num_examples: 2029
download_size: 89721302
dataset_size: 577560638
- config_name: subset_338
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 583490791
num_examples: 2031
download_size: 90592272
dataset_size: 583490791
- config_name: subset_339
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 583317641
num_examples: 2042
download_size: 90484207
dataset_size: 583317641
- config_name: subset_34
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 833704005
num_examples: 2324
download_size: 129374458
dataset_size: 833704005
- config_name: subset_340
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 602220740
num_examples: 2071
download_size: 93412625
dataset_size: 602220740
- config_name: subset_341
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 602556715
num_examples: 2076
download_size: 93535274
dataset_size: 602556715
- config_name: subset_342
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 579025698
num_examples: 2032
download_size: 89878478
dataset_size: 579025698
- config_name: subset_343
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 582492962
num_examples: 2041
download_size: 90324037
dataset_size: 582492962
- config_name: subset_344
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 566346868
num_examples: 2014
download_size: 87993067
dataset_size: 566346868
- config_name: subset_345
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 578773526
num_examples: 2012
download_size: 89815165
dataset_size: 578773526
- config_name: subset_346
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 573559888
num_examples: 2007
download_size: 89084562
dataset_size: 573559888
- config_name: subset_347
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 578304027
num_examples: 2006
download_size: 89698009
dataset_size: 578304027
- config_name: subset_348
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 567698512
num_examples: 1985
download_size: 88108902
dataset_size: 567698512
- config_name: subset_349
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 580030047
num_examples: 2014
download_size: 90068855
dataset_size: 580030047
- config_name: subset_35
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 825705783
num_examples: 2342
download_size: 128114585
dataset_size: 825705783
- config_name: subset_350
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 558307377
num_examples: 1972
download_size: 86661716
dataset_size: 558307377
- config_name: subset_351
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 545101041
num_examples: 1931
download_size: 84617032
dataset_size: 545101041
- config_name: subset_352
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 576856641
num_examples: 2023
download_size: 89628183
dataset_size: 576856641
- config_name: subset_353
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 552918104
num_examples: 1973
download_size: 85742465
dataset_size: 552918104
- config_name: subset_354
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 567033343
num_examples: 2003
download_size: 88043567
dataset_size: 567033343
- config_name: subset_355
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 562524372
num_examples: 1974
download_size: 87328549
dataset_size: 562524372
- config_name: subset_356
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 567755097
num_examples: 2016
download_size: 88214858
dataset_size: 567755097
- config_name: subset_357
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 579880001
num_examples: 2016
download_size: 89978335
dataset_size: 579880001
- config_name: subset_358
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 565013465
num_examples: 1965
download_size: 87721360
dataset_size: 565013465
- config_name: subset_359
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 557687432
num_examples: 1989
download_size: 86538393
dataset_size: 557687432
- config_name: subset_36
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 821517839
num_examples: 2333
download_size: 127455481
dataset_size: 821517839
- config_name: subset_360
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 553125613
num_examples: 1984
download_size: 85853632
dataset_size: 553125613
- config_name: subset_361
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 568561423
num_examples: 2012
download_size: 88295647
dataset_size: 568561423
- config_name: subset_362
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 558296420
num_examples: 2004
download_size: 86750116
dataset_size: 558296420
- config_name: subset_363
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 565321555
num_examples: 2010
download_size: 87713995
dataset_size: 565321555
- config_name: subset_364
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 550709975
num_examples: 1971
download_size: 85413015
dataset_size: 550709975
- config_name: subset_365
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 560606119
num_examples: 2043
download_size: 87020174
dataset_size: 560606119
- config_name: subset_366
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 568384430
num_examples: 2005
download_size: 88240157
dataset_size: 568384430
- config_name: subset_368
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 548264320
num_examples: 1936
download_size: 85126656
dataset_size: 548264320
- config_name: subset_369
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 232768298
num_examples: 897
download_size: 36210423
dataset_size: 232768298
- config_name: subset_37
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 812589184
num_examples: 2315
download_size: 126149166
dataset_size: 812589184
- config_name: subset_370
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 541413568
num_examples: 1968
download_size: 84057142
dataset_size: 541413568
- config_name: subset_371
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 557115604
num_examples: 1962
download_size: 86562493
dataset_size: 557115604
- config_name: subset_372
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 554395090
num_examples: 1977
download_size: 86025480
dataset_size: 554395090
- config_name: subset_373
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 567127170
num_examples: 1989
download_size: 88099164
dataset_size: 567127170
- config_name: subset_374
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 554364830
num_examples: 2015
download_size: 86222175
dataset_size: 554364830
- config_name: subset_375
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 556663368
num_examples: 1960
download_size: 86368984
dataset_size: 556663368
- config_name: subset_376
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 539178775
num_examples: 1950
download_size: 83571243
dataset_size: 539178775
- config_name: subset_377
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 570851414
num_examples: 2030
download_size: 88605831
dataset_size: 570851414
- config_name: subset_378
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 556820943
num_examples: 1990
download_size: 86487994
dataset_size: 556820943
- config_name: subset_379
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 536625666
num_examples: 1963
download_size: 83287808
dataset_size: 536625666
- config_name: subset_38
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 829139016
num_examples: 2333
download_size: 128679026
dataset_size: 829139016
- config_name: subset_380
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 544162284
num_examples: 1978
download_size: 84530245
dataset_size: 544162284
- config_name: subset_39
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 820990456
num_examples: 2326
download_size: 127321393
dataset_size: 820990456
- config_name: subset_399
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 526772422
num_examples: 1921
download_size: 81778218
dataset_size: 526772422
- config_name: subset_4
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 864639230
num_examples: 2335
download_size: 134197917
dataset_size: 864639230
- config_name: subset_40
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 826303157
num_examples: 2338
download_size: 128242803
dataset_size: 826303157
- config_name: subset_400
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 526371492
num_examples: 1911
download_size: 81778986
dataset_size: 526371492
- config_name: subset_401
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 522407219
num_examples: 1953
download_size: 81189473
dataset_size: 522407219
- config_name: subset_402
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 521443650
num_examples: 1919
download_size: 80996621
dataset_size: 521443650
- config_name: subset_403
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 518409867
num_examples: 1939
download_size: 80531369
dataset_size: 518409867
- config_name: subset_404
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 513261105
num_examples: 1907
download_size: 79714635
dataset_size: 513261105
- config_name: subset_405
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 511509098
num_examples: 1923
download_size: 79341298
dataset_size: 511509098
- config_name: subset_406
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 536843486
num_examples: 1946
download_size: 83384329
dataset_size: 536843486
- config_name: subset_407
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 530216022
num_examples: 1957
download_size: 82296174
dataset_size: 530216022
- config_name: subset_408
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 531756687
num_examples: 1945
download_size: 82514018
dataset_size: 531756687
- config_name: subset_409
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 518561307
num_examples: 1922
download_size: 80599707
dataset_size: 518561307
- config_name: subset_41
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 816877471
num_examples: 2335
download_size: 126708091
dataset_size: 816877471
- config_name: subset_410
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 508740373
num_examples: 1906
download_size: 78969194
dataset_size: 508740373
- config_name: subset_411
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 522400050
num_examples: 1922
download_size: 81146748
dataset_size: 522400050
- config_name: subset_412
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 512421225
num_examples: 1920
download_size: 79557666
dataset_size: 512421225
- config_name: subset_413
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 521584119
num_examples: 1937
download_size: 80983283
dataset_size: 521584119
- config_name: subset_414
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 537900287
num_examples: 1972
download_size: 83587254
dataset_size: 537900287
- config_name: subset_415
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 519737252
num_examples: 1921
download_size: 80746916
dataset_size: 519737252
- config_name: subset_416
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 497694318
num_examples: 1876
download_size: 77313872
dataset_size: 497694318
- config_name: subset_417
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 519215825
num_examples: 1950
download_size: 80628898
dataset_size: 519215825
- config_name: subset_418
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 511717570
num_examples: 1892
download_size: 79512887
dataset_size: 511717570
- config_name: subset_419
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 531043725
num_examples: 1945
download_size: 82468881
dataset_size: 531043725
- config_name: subset_42
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 815869334
num_examples: 2322
download_size: 126553227
dataset_size: 815869334
- config_name: subset_420
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 520631918
num_examples: 1905
download_size: 80829772
dataset_size: 520631918
- config_name: subset_421
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 525932758
num_examples: 1928
download_size: 81648797
dataset_size: 525932758
- config_name: subset_422
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 519492770
num_examples: 1920
download_size: 80669724
dataset_size: 519492770
- config_name: subset_423
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 504774725
num_examples: 1866
download_size: 78400399
dataset_size: 504774725
- config_name: subset_424
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 506575267
num_examples: 1914
download_size: 78683868
dataset_size: 506575267
- config_name: subset_425
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 507984927
num_examples: 1894
download_size: 78902459
dataset_size: 507984927
- config_name: subset_43
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 821991670
num_examples: 2314
download_size: 127579521
dataset_size: 821991670
- config_name: subset_44
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 813794647
num_examples: 2343
download_size: 126265260
dataset_size: 813794647
- config_name: subset_45
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 814150214
num_examples: 2323
download_size: 126312319
dataset_size: 814150214
- config_name: subset_46
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 831736229
num_examples: 2328
download_size: 129074354
dataset_size: 831736229
- config_name: subset_47
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 804349610
num_examples: 2303
download_size: 124737087
dataset_size: 804349610
- config_name: subset_48
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 797151932
num_examples: 2269
download_size: 123520240
dataset_size: 797151932
- config_name: subset_49
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 794196758
num_examples: 2295
download_size: 123255723
dataset_size: 794196758
- config_name: subset_5
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 880665270
num_examples: 2369
download_size: 136657633
dataset_size: 880665270
- config_name: subset_50
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 805527014
num_examples: 2332
download_size: 124952694
dataset_size: 805527014
- config_name: subset_51
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 810987318
num_examples: 2320
download_size: 125909707
dataset_size: 810987318
- config_name: subset_52
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 804756878
num_examples: 2328
download_size: 124911962
dataset_size: 804756878
- config_name: subset_53
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 814579546
num_examples: 2346
download_size: 126323592
dataset_size: 814579546
- config_name: subset_54
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 817334379
num_examples: 2314
download_size: 126726543
dataset_size: 817334379
- config_name: subset_55
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 806167179
num_examples: 2340
download_size: 124962905
dataset_size: 806167179
- config_name: subset_56
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 788700818
num_examples: 2312
download_size: 122420697
dataset_size: 788700818
- config_name: subset_57
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 816783940
num_examples: 2309
download_size: 126678671
dataset_size: 816783940
- config_name: subset_58
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 802255501
num_examples: 2318
download_size: 124500843
dataset_size: 802255501
- config_name: subset_59
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 794304992
num_examples: 2332
download_size: 123265947
dataset_size: 794304992
- config_name: subset_6
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 870636443
num_examples: 2371
download_size: 135098487
dataset_size: 870636443
- config_name: subset_60
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 789133546
num_examples: 2319
download_size: 122322051
dataset_size: 789133546
- config_name: subset_61
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 797842879
num_examples: 2309
download_size: 123807907
dataset_size: 797842879
- config_name: subset_62
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 803617904
num_examples: 2334
download_size: 124639893
dataset_size: 803617904
- config_name: subset_63
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 576624503
num_examples: 1669
download_size: 89497145
dataset_size: 576624503
- config_name: subset_64
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 493957100
num_examples: 1451
download_size: 76672470
dataset_size: 493957100
- config_name: subset_65
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 809228985
num_examples: 2344
download_size: 125567690
dataset_size: 809228985
- config_name: subset_66
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 802463327
num_examples: 2324
download_size: 124524946
dataset_size: 802463327
- config_name: subset_67
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 809041834
num_examples: 2341
download_size: 125514106
dataset_size: 809041834
- config_name: subset_68
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 801443208
num_examples: 2331
download_size: 124409069
dataset_size: 801443208
- config_name: subset_69
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 767659950
num_examples: 2268
download_size: 119086299
dataset_size: 767659950
- config_name: subset_7
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 862300251
num_examples: 2336
download_size: 133899698
dataset_size: 862300251
- config_name: subset_70
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 807025850
num_examples: 2348
download_size: 125145079
dataset_size: 807025850
- config_name: subset_72
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 805465598
num_examples: 2344
download_size: 124948881
dataset_size: 805465598
- config_name: subset_8
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 860647946
num_examples: 2351
download_size: 133589761
dataset_size: 860647946
- config_name: subset_87
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 797836000
num_examples: 2335
download_size: 123676344
dataset_size: 797836000
- config_name: subset_88
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 790640388
num_examples: 2344
download_size: 122633634
dataset_size: 790640388
- config_name: subset_89
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 791670317
num_examples: 2317
download_size: 122761969
dataset_size: 791670317
- config_name: subset_9
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 853697723
num_examples: 2347
download_size: 132406781
dataset_size: 853697723
- config_name: subset_90
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 767339364
num_examples: 2330
download_size: 119048385
dataset_size: 767339364
- config_name: subset_91
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 803098340
num_examples: 2338
download_size: 124623706
dataset_size: 803098340
- config_name: subset_92
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 790881493
num_examples: 2341
download_size: 122681457
dataset_size: 790881493
- config_name: subset_93
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 792658076
num_examples: 2337
download_size: 122765555
dataset_size: 792658076
- config_name: subset_94
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 774818258
num_examples: 2319
download_size: 120167610
dataset_size: 774818258
- config_name: subset_95
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 773467274
num_examples: 2334
download_size: 119976106
dataset_size: 773467274
- config_name: subset_96
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 797247546
num_examples: 2314
download_size: 123609844
dataset_size: 797247546
- config_name: subset_97
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: frA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 794004389
num_examples: 2334
download_size: 123150930
dataset_size: 794004389
- config_name: subset_98
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 783265503
num_examples: 2319
download_size: 121463499
dataset_size: 783265503
- config_name: subset_99
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: frA.id
dtype: string
- name: frA.laser_score
dtype: float64
- name: frA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 776953555
num_examples: 2316
download_size: 120428249
dataset_size: 776953555
configs:
- config_name: subset_1
data_files:
- split: train
path: subset_1/train-*
- config_name: subset_10
data_files:
- split: train
path: subset_10/train-*
- config_name: subset_100
data_files:
- split: train
path: subset_100/train-*
- config_name: subset_101
data_files:
- split: train
path: subset_101/train-*
- config_name: subset_102
data_files:
- split: train
path: subset_102/train-*
- config_name: subset_103
data_files:
- split: train
path: subset_103/train-*
- config_name: subset_104
data_files:
- split: train
path: subset_104/train-*
- config_name: subset_105
data_files:
- split: train
path: subset_105/train-*
- config_name: subset_11
data_files:
- split: train
path: subset_11/train-*
- config_name: subset_12
data_files:
- split: train
path: subset_12/train-*
- config_name: subset_13
data_files:
- split: train
path: subset_13/train-*
- config_name: subset_14
data_files:
- split: train
path: subset_14/train-*
- config_name: subset_15
data_files:
- split: train
path: subset_15/train-*
- config_name: subset_16
data_files:
- split: train
path: subset_16/train-*
- config_name: subset_17
data_files:
- split: train
path: subset_17/train-*
- config_name: subset_18
data_files:
- split: train
path: subset_18/train-*
- config_name: subset_19
data_files:
- split: train
path: subset_19/train-*
- config_name: subset_2
data_files:
- split: train
path: subset_2/train-*
- config_name: subset_20
data_files:
- split: train
path: subset_20/train-*
- config_name: subset_21
data_files:
- split: train
path: subset_21/train-*
- config_name: subset_22
data_files:
- split: train
path: subset_22/train-*
- config_name: subset_23
data_files:
- split: train
path: subset_23/train-*
- config_name: subset_24
data_files:
- split: train
path: subset_24/train-*
- config_name: subset_25
data_files:
- split: train
path: subset_25/train-*
- config_name: subset_26
data_files:
- split: train
path: subset_26/train-*
- config_name: subset_27
data_files:
- split: train
path: subset_27/train-*
- config_name: subset_28
data_files:
- split: train
path: subset_28/train-*
- config_name: subset_29
data_files:
- split: train
path: subset_29/train-*
- config_name: subset_3
data_files:
- split: train
path: subset_3/train-*
- config_name: subset_30
data_files:
- split: train
path: subset_30/train-*
- config_name: subset_300
data_files:
- split: train
path: subset_300/train-*
- config_name: subset_301
data_files:
- split: train
path: subset_301/train-*
- config_name: subset_302
data_files:
- split: train
path: subset_302/train-*
- config_name: subset_303
data_files:
- split: train
path: subset_303/train-*
- config_name: subset_304
data_files:
- split: train
path: subset_304/train-*
- config_name: subset_305
data_files:
- split: train
path: subset_305/train-*
- config_name: subset_306
data_files:
- split: train
path: subset_306/train-*
- config_name: subset_307
data_files:
- split: train
path: subset_307/train-*
- config_name: subset_308
data_files:
- split: train
path: subset_308/train-*
- config_name: subset_309
data_files:
- split: train
path: subset_309/train-*
- config_name: subset_31
data_files:
- split: train
path: subset_31/train-*
- config_name: subset_310
data_files:
- split: train
path: subset_310/train-*
- config_name: subset_311
data_files:
- split: train
path: subset_311/train-*
- config_name: subset_312
data_files:
- split: train
path: subset_312/train-*
- config_name: subset_313
data_files:
- split: train
path: subset_313/train-*
- config_name: subset_314
data_files:
- split: train
path: subset_314/train-*
- config_name: subset_315
data_files:
- split: train
path: subset_315/train-*
- config_name: subset_316
data_files:
- split: train
path: subset_316/train-*
- config_name: subset_317
data_files:
- split: train
path: subset_317/train-*
- config_name: subset_318
data_files:
- split: train
path: subset_318/train-*
- config_name: subset_319
data_files:
- split: train
path: subset_319/train-*
- config_name: subset_32
data_files:
- split: train
path: subset_32/train-*
- config_name: subset_320
data_files:
- split: train
path: subset_320/train-*
- config_name: subset_321
data_files:
- split: train
path: subset_321/train-*
- config_name: subset_322
data_files:
- split: train
path: subset_322/train-*
- config_name: subset_323
data_files:
- split: train
path: subset_323/train-*
- config_name: subset_324
data_files:
- split: train
path: subset_324/train-*
- config_name: subset_325
data_files:
- split: train
path: subset_325/train-*
- config_name: subset_326
data_files:
- split: train
path: subset_326/train-*
- config_name: subset_327
data_files:
- split: train
path: subset_327/train-*
- config_name: subset_328
data_files:
- split: train
path: subset_328/train-*
- config_name: subset_329
data_files:
- split: train
path: subset_329/train-*
- config_name: subset_33
data_files:
- split: train
path: subset_33/train-*
- config_name: subset_330
data_files:
- split: train
path: subset_330/train-*
- config_name: subset_331
data_files:
- split: train
path: subset_331/train-*
- config_name: subset_332
data_files:
- split: train
path: subset_332/train-*
- config_name: subset_333
data_files:
- split: train
path: subset_333/train-*
- config_name: subset_334
data_files:
- split: train
path: subset_334/train-*
- config_name: subset_335
data_files:
- split: train
path: subset_335/train-*
- config_name: subset_336
data_files:
- split: train
path: subset_336/train-*
- config_name: subset_337
data_files:
- split: train
path: subset_337/train-*
- config_name: subset_338
data_files:
- split: train
path: subset_338/train-*
- config_name: subset_339
data_files:
- split: train
path: subset_339/train-*
- config_name: subset_34
data_files:
- split: train
path: subset_34/train-*
- config_name: subset_340
data_files:
- split: train
path: subset_340/train-*
- config_name: subset_341
data_files:
- split: train
path: subset_341/train-*
- config_name: subset_342
data_files:
- split: train
path: subset_342/train-*
- config_name: subset_343
data_files:
- split: train
path: subset_343/train-*
- config_name: subset_344
data_files:
- split: train
path: subset_344/train-*
- config_name: subset_345
data_files:
- split: train
path: subset_345/train-*
- config_name: subset_346
data_files:
- split: train
path: subset_346/train-*
- config_name: subset_347
data_files:
- split: train
path: subset_347/train-*
- config_name: subset_348
data_files:
- split: train
path: subset_348/train-*
- config_name: subset_349
data_files:
- split: train
path: subset_349/train-*
- config_name: subset_35
data_files:
- split: train
path: subset_35/train-*
- config_name: subset_350
data_files:
- split: train
path: subset_350/train-*
- config_name: subset_351
data_files:
- split: train
path: subset_351/train-*
- config_name: subset_352
data_files:
- split: train
path: subset_352/train-*
- config_name: subset_353
data_files:
- split: train
path: subset_353/train-*
- config_name: subset_354
data_files:
- split: train
path: subset_354/train-*
- config_name: subset_355
data_files:
- split: train
path: subset_355/train-*
- config_name: subset_356
data_files:
- split: train
path: subset_356/train-*
- config_name: subset_357
data_files:
- split: train
path: subset_357/train-*
- config_name: subset_358
data_files:
- split: train
path: subset_358/train-*
- config_name: subset_359
data_files:
- split: train
path: subset_359/train-*
- config_name: subset_36
data_files:
- split: train
path: subset_36/train-*
- config_name: subset_360
data_files:
- split: train
path: subset_360/train-*
- config_name: subset_361
data_files:
- split: train
path: subset_361/train-*
- config_name: subset_362
data_files:
- split: train
path: subset_362/train-*
- config_name: subset_363
data_files:
- split: train
path: subset_363/train-*
- config_name: subset_364
data_files:
- split: train
path: subset_364/train-*
- config_name: subset_365
data_files:
- split: train
path: subset_365/train-*
- config_name: subset_366
data_files:
- split: train
path: subset_366/train-*
- config_name: subset_368
data_files:
- split: train
path: subset_368/train-*
- config_name: subset_369
data_files:
- split: train
path: subset_369/train-*
- config_name: subset_37
data_files:
- split: train
path: subset_37/train-*
- config_name: subset_370
data_files:
- split: train
path: subset_370/train-*
- config_name: subset_371
data_files:
- split: train
path: subset_371/train-*
- config_name: subset_372
data_files:
- split: train
path: subset_372/train-*
- config_name: subset_373
data_files:
- split: train
path: subset_373/train-*
- config_name: subset_374
data_files:
- split: train
path: subset_374/train-*
- config_name: subset_375
data_files:
- split: train
path: subset_375/train-*
- config_name: subset_376
data_files:
- split: train
path: subset_376/train-*
- config_name: subset_377
data_files:
- split: train
path: subset_377/train-*
- config_name: subset_378
data_files:
- split: train
path: subset_378/train-*
- config_name: subset_379
data_files:
- split: train
path: subset_379/train-*
- config_name: subset_38
data_files:
- split: train
path: subset_38/train-*
- config_name: subset_380
data_files:
- split: train
path: subset_380/train-*
- config_name: subset_39
data_files:
- split: train
path: subset_39/train-*
- config_name: subset_399
data_files:
- split: train
path: subset_399/train-*
- config_name: subset_4
data_files:
- split: train
path: subset_4/train-*
- config_name: subset_40
data_files:
- split: train
path: subset_40/train-*
- config_name: subset_400
data_files:
- split: train
path: subset_400/train-*
- config_name: subset_401
data_files:
- split: train
path: subset_401/train-*
- config_name: subset_402
data_files:
- split: train
path: subset_402/train-*
- config_name: subset_403
data_files:
- split: train
path: subset_403/train-*
- config_name: subset_404
data_files:
- split: train
path: subset_404/train-*
- config_name: subset_405
data_files:
- split: train
path: subset_405/train-*
- config_name: subset_406
data_files:
- split: train
path: subset_406/train-*
- config_name: subset_407
data_files:
- split: train
path: subset_407/train-*
- config_name: subset_408
data_files:
- split: train
path: subset_408/train-*
- config_name: subset_409
data_files:
- split: train
path: subset_409/train-*
- config_name: subset_41
data_files:
- split: train
path: subset_41/train-*
- config_name: subset_410
data_files:
- split: train
path: subset_410/train-*
- config_name: subset_411
data_files:
- split: train
path: subset_411/train-*
- config_name: subset_412
data_files:
- split: train
path: subset_412/train-*
- config_name: subset_413
data_files:
- split: train
path: subset_413/train-*
- config_name: subset_414
data_files:
- split: train
path: subset_414/train-*
- config_name: subset_415
data_files:
- split: train
path: subset_415/train-*
- config_name: subset_416
data_files:
- split: train
path: subset_416/train-*
- config_name: subset_417
data_files:
- split: train
path: subset_417/train-*
- config_name: subset_418
data_files:
- split: train
path: subset_418/train-*
- config_name: subset_419
data_files:
- split: train
path: subset_419/train-*
- config_name: subset_42
data_files:
- split: train
path: subset_42/train-*
- config_name: subset_420
data_files:
- split: train
path: subset_420/train-*
- config_name: subset_421
data_files:
- split: train
path: subset_421/train-*
- config_name: subset_422
data_files:
- split: train
path: subset_422/train-*
- config_name: subset_423
data_files:
- split: train
path: subset_423/train-*
- config_name: subset_424
data_files:
- split: train
path: subset_424/train-*
- config_name: subset_425
data_files:
- split: train
path: subset_425/train-*
- config_name: subset_43
data_files:
- split: train
path: subset_43/train-*
- config_name: subset_44
data_files:
- split: train
path: subset_44/train-*
- config_name: subset_45
data_files:
- split: train
path: subset_45/train-*
- config_name: subset_46
data_files:
- split: train
path: subset_46/train-*
- config_name: subset_47
data_files:
- split: train
path: subset_47/train-*
- config_name: subset_48
data_files:
- split: train
path: subset_48/train-*
- config_name: subset_49
data_files:
- split: train
path: subset_49/train-*
- config_name: subset_5
data_files:
- split: train
path: subset_5/train-*
- config_name: subset_50
data_files:
- split: train
path: subset_50/train-*
- config_name: subset_51
data_files:
- split: train
path: subset_51/train-*
- config_name: subset_52
data_files:
- split: train
path: subset_52/train-*
- config_name: subset_53
data_files:
- split: train
path: subset_53/train-*
- config_name: subset_54
data_files:
- split: train
path: subset_54/train-*
- config_name: subset_55
data_files:
- split: train
path: subset_55/train-*
- config_name: subset_56
data_files:
- split: train
path: subset_56/train-*
- config_name: subset_57
data_files:
- split: train
path: subset_57/train-*
- config_name: subset_58
data_files:
- split: train
path: subset_58/train-*
- config_name: subset_59
data_files:
- split: train
path: subset_59/train-*
- config_name: subset_6
data_files:
- split: train
path: subset_6/train-*
- config_name: subset_60
data_files:
- split: train
path: subset_60/train-*
- config_name: subset_61
data_files:
- split: train
path: subset_61/train-*
- config_name: subset_62
data_files:
- split: train
path: subset_62/train-*
- config_name: subset_63
data_files:
- split: train
path: subset_63/train-*
- config_name: subset_64
data_files:
- split: train
path: subset_64/train-*
- config_name: subset_65
data_files:
- split: train
path: subset_65/train-*
- config_name: subset_66
data_files:
- split: train
path: subset_66/train-*
- config_name: subset_67
data_files:
- split: train
path: subset_67/train-*
- config_name: subset_68
data_files:
- split: train
path: subset_68/train-*
- config_name: subset_69
data_files:
- split: train
path: subset_69/train-*
- config_name: subset_7
data_files:
- split: train
path: subset_7/train-*
- config_name: subset_70
data_files:
- split: train
path: subset_70/train-*
- config_name: subset_72
data_files:
- split: train
path: subset_72/train-*
- config_name: subset_8
data_files:
- split: train
path: subset_8/train-*
- config_name: subset_87
data_files:
- split: train
path: subset_87/train-*
- config_name: subset_88
data_files:
- split: train
path: subset_88/train-*
- config_name: subset_89
data_files:
- split: train
path: subset_89/train-*
- config_name: subset_9
data_files:
- split: train
path: subset_9/train-*
- config_name: subset_90
data_files:
- split: train
path: subset_90/train-*
- config_name: subset_91
data_files:
- split: train
path: subset_91/train-*
- config_name: subset_92
data_files:
- split: train
path: subset_92/train-*
- config_name: subset_93
data_files:
- split: train
path: subset_93/train-*
- config_name: subset_94
data_files:
- split: train
path: subset_94/train-*
- config_name: subset_95
data_files:
- split: train
path: subset_95/train-*
- config_name: subset_96
data_files:
- split: train
path: subset_96/train-*
- config_name: subset_97
data_files:
- split: train
path: subset_97/train-*
- config_name: subset_98
data_files:
- split: train
path: subset_98/train-*
- config_name: subset_99
data_files:
- split: train
path: subset_99/train-*
---
The dataset consists of multiple subsets, each with the same feature structure including line number, English and French IDs, LASER scores, and audio tokens. All subsets have only one training split, with varying sizes and number of examples per subset.
提供机构:
asahi417
原始信息汇总
数据集概述
数据集子集信息
| 子集名称 | 特征数量 | 主要特征 | 数据类型 | 训练集大小 | 训练集示例数量 | 下载大小 | 数据集大小 |
|---|---|---|---|---|---|---|---|
| subset_1 | 8 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 896960282 bytes | 2344 | 138828871 bytes | 896960282 bytes |
| subset_10 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 849195322 bytes | 2336 | 131866595 bytes | 849195322 bytes |
| subset_100 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 784236275 bytes | 2326 | 121581663 bytes | 784236275 bytes |
| subset_101 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 778808804 bytes | 2335 | 120877210 bytes | 778808804 bytes |
| subset_102 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 772584121 bytes | 2306 | 119739300 bytes | 772584121 bytes |
| subset_103 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 783315490 bytes | 2335 | 121528799 bytes | 783315490 bytes |
| subset_104 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 774101833 bytes | 2330 | 119997337 bytes | 774101833 bytes |
| subset_105 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 782038760 bytes | 2331 | 121205066 bytes | 782038760 bytes |
| subset_11 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 836642733 bytes | 2315 | 129888698 bytes | 836642733 bytes |
| subset_12 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 866050440 bytes | 2349 | 134452465 bytes | 866050440 bytes |
| subset_13 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 859275177 bytes | 2341 | 133374434 bytes | 859275177 bytes |
| subset_14 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 854462165 bytes | 2338 | 132567966 bytes | 854462165 bytes |
| subset_15 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 846548429 bytes | 2363 | 131471430 bytes | 846548429 bytes |
| subset_16 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 849723661 bytes | 2348 | 131823703 bytes | 849723661 bytes |
| subset_17 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 847314332 bytes | 2328 | 131528731 bytes | 847314332 bytes |
| subset_18 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 856549312 bytes | 2350 | 132883574 bytes | 856549312 bytes |
| subset_19 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 849231130 bytes | 2348 | 131806664 bytes | 849231130 bytes |
| subset_2 | 8 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 885549898 bytes | 2364 | 137223195 bytes | 885549898 bytes |
| subset_20 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 829390963 bytes | 2346 | 128708237 bytes | 829390963 bytes |
| subset_21 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 841492981 bytes | 2335 | 130599466 bytes | 841492981 bytes |
| subset_22 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 839566978 bytes | 2331 | 130433057 bytes | 839566978 bytes |
| subset_23 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 831309666 bytes | 2337 | 129065953 bytes | 831309666 bytes |
| subset_24 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 843207728 bytes | 2333 | 130820123 bytes | 843207728 bytes |
| subset_25 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 826543551 bytes | 2319 | 128228152 bytes | 826543551 bytes |
| subset_26 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 856166635 bytes | 2338 | 132709958 bytes | 856166635 bytes |
| subset_27 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 835216842 bytes | 2319 | 129640044 bytes | 835216842 bytes |
| subset_28 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 839904123 bytes | 2334 | 130295941 bytes | 839904123 bytes |
| subset_29 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 787229439 bytes | 2182 | 122258606 bytes | 787229439 bytes |
| subset_3 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 877322523 bytes | 2354 | 136064499 bytes | 877322523 bytes |
| subset_30 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 731200910 bytes | 2049 | 113489889 bytes | 731200910 bytes |
| subset_300 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 615595453 bytes | 2112 | 95508534 bytes | 615595453 bytes |
| subset_301 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 618759301 bytes | 2080 | 95997104 bytes | 618759301 bytes |
| subset_302 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 632889940 bytes | 2108 | 98160959 bytes | 632889940 bytes |
| subset_303 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 609269662 bytes | 2115 | 94590996 bytes | 609269662 bytes |
| subset_304 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64, string, float64, int64 | 621920484 bytes | 2109 | 96549400 bytes | 621920484 bytes |
| subset_305 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens | int64, string, float64, int64 | 621509233 bytes | 2108 | 96426327 bytes | 621509233 bytes |
| subset_306 | 7 | line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens | int64 |



