asahi417/seamless-align-deA-enA.tokenized
收藏Hugging Face2024-06-10 更新2024-06-12 收录
下载链接:
https://hf-mirror.com/datasets/asahi417/seamless-align-deA-enA.tokenized
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: subset_1
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 825169182
num_examples: 2064
download_size: 127787389
dataset_size: 825169182
- config_name: subset_10
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 805994313
num_examples: 2109
download_size: 124771864
dataset_size: 805994313
- config_name: subset_100
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 589512805
num_examples: 1982
download_size: 91508494
dataset_size: 589512805
- config_name: subset_101
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 603841640
num_examples: 2029
download_size: 93644008
dataset_size: 603841640
- config_name: subset_102
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 604643147
num_examples: 2029
download_size: 93825688
dataset_size: 604643147
- config_name: subset_103
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 601136745
num_examples: 1982
download_size: 93374940
dataset_size: 601136745
- config_name: subset_104
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 590015590
num_examples: 1985
download_size: 91516565
dataset_size: 590015590
- config_name: subset_11
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 811970945
num_examples: 2135
download_size: 125769382
dataset_size: 811970945
- config_name: subset_12
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 783606636
num_examples: 2110
download_size: 121396474
dataset_size: 783606636
- config_name: subset_13
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 818546882
num_examples: 2163
download_size: 126774705
dataset_size: 818546882
- config_name: subset_14
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 794334972
num_examples: 2102
download_size: 123031776
dataset_size: 794334972
- config_name: subset_15
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 779790032
num_examples: 2114
download_size: 120716436
dataset_size: 779790032
- config_name: subset_16
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 788648344
num_examples: 2141
download_size: 121996920
dataset_size: 788648344
- config_name: subset_17
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 781308113
num_examples: 2108
download_size: 120886271
dataset_size: 781308113
- config_name: subset_18
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 805637299
num_examples: 2101
download_size: 124644421
dataset_size: 805637299
- config_name: subset_19
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 783993898
num_examples: 2118
download_size: 121391405
dataset_size: 783993898
- config_name: subset_2
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 834363947
num_examples: 2058
download_size: 129174194
dataset_size: 834363947
- config_name: subset_20
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 800202881
num_examples: 2147
download_size: 123592413
dataset_size: 800202881
- config_name: subset_201
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 455804873
num_examples: 1837
download_size: 70871882
dataset_size: 455804873
- config_name: subset_202
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 471221210
num_examples: 1830
download_size: 73352192
dataset_size: 471221210
- config_name: subset_203
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 470510522
num_examples: 1865
download_size: 73145719
dataset_size: 470510522
- config_name: subset_204
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 464875087
num_examples: 1882
download_size: 72339101
dataset_size: 464875087
- config_name: subset_205
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 475206665
num_examples: 1872
download_size: 73888571
dataset_size: 475206665
- config_name: subset_206
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 475125067
num_examples: 1873
download_size: 73962791
dataset_size: 475125067
- config_name: subset_207
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 469106835
num_examples: 1883
download_size: 72993481
dataset_size: 469106835
- config_name: subset_208
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 483311972
num_examples: 1889
download_size: 75081724
dataset_size: 483311972
- config_name: subset_209
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 473893920
num_examples: 1872
download_size: 73774499
dataset_size: 473893920
- config_name: subset_21
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 780786641
num_examples: 2145
download_size: 120754704
dataset_size: 780786641
- config_name: subset_210
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 465836940
num_examples: 1851
download_size: 72365720
dataset_size: 465836940
- config_name: subset_211
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 477889069
num_examples: 1872
download_size: 74294090
dataset_size: 477889069
- config_name: subset_212
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 467755952
num_examples: 1821
download_size: 72736185
dataset_size: 467755952
- config_name: subset_213
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 460575587
num_examples: 1858
download_size: 71678240
dataset_size: 460575587
- config_name: subset_214
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 464251327
num_examples: 1853
download_size: 72206408
dataset_size: 464251327
- config_name: subset_215
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 447187295
num_examples: 1807
download_size: 69514232
dataset_size: 447187295
- config_name: subset_216
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 467359331
num_examples: 1845
download_size: 72710439
dataset_size: 467359331
- config_name: subset_217
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 468473869
num_examples: 1869
download_size: 72746451
dataset_size: 468473869
- config_name: subset_218
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 467298903
num_examples: 1873
download_size: 72677978
dataset_size: 467298903
- config_name: subset_219
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 473338664
num_examples: 1885
download_size: 73518190
dataset_size: 473338664
- config_name: subset_22
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 713048682
num_examples: 1937
download_size: 110420921
dataset_size: 713048682
- config_name: subset_220
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 459819067
num_examples: 1845
download_size: 71591236
dataset_size: 459819067
- config_name: subset_221
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 445755564
num_examples: 1818
download_size: 69342732
dataset_size: 445755564
- config_name: subset_222
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 470899065
num_examples: 1823
download_size: 73177277
dataset_size: 470899065
- config_name: subset_223
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 477061528
num_examples: 1880
download_size: 74238269
dataset_size: 477061528
- config_name: subset_224
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 469743576
num_examples: 1874
download_size: 73038089
dataset_size: 469743576
- config_name: subset_225
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 465521093
num_examples: 1834
download_size: 72441750
dataset_size: 465521093
- config_name: subset_226
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 458185439
num_examples: 1824
download_size: 71261007
dataset_size: 458185439
- config_name: subset_227
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 473122449
num_examples: 1852
download_size: 73601289
dataset_size: 473122449
- config_name: subset_228
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 453763148
num_examples: 1826
download_size: 70547790
dataset_size: 453763148
- config_name: subset_229
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 461461459
num_examples: 1871
download_size: 71736428
dataset_size: 461461459
- config_name: subset_23
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 679208716
num_examples: 1877
download_size: 105107036
dataset_size: 679208716
- config_name: subset_230
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 461297558
num_examples: 1867
download_size: 71715789
dataset_size: 461297558
- config_name: subset_231
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 470652743
num_examples: 1848
download_size: 73180993
dataset_size: 470652743
- config_name: subset_232
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 449862927
num_examples: 1822
download_size: 69917201
dataset_size: 449862927
- config_name: subset_233
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 415587349
num_examples: 1678
download_size: 64648762
dataset_size: 415587349
- config_name: subset_234
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 478912402
num_examples: 1877
download_size: 74523656
dataset_size: 478912402
- config_name: subset_235
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 428724834
num_examples: 1776
download_size: 66786781
dataset_size: 428724834
- config_name: subset_236
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 459720683
num_examples: 1875
download_size: 71500472
dataset_size: 459720683
- config_name: subset_237
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 463323185
num_examples: 1832
download_size: 72028354
dataset_size: 463323185
- config_name: subset_238
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 457498482
num_examples: 1861
download_size: 71159832
dataset_size: 457498482
- config_name: subset_239
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 463158797
num_examples: 1853
download_size: 72075249
dataset_size: 463158797
- config_name: subset_24
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 764594206
num_examples: 2121
download_size: 118344683
dataset_size: 764594206
- config_name: subset_240
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 456036783
num_examples: 1846
download_size: 71042786
dataset_size: 456036783
- config_name: subset_241
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 465863104
num_examples: 1840
download_size: 72424009
dataset_size: 465863104
- config_name: subset_242
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 454519941
num_examples: 1839
download_size: 70756181
dataset_size: 454519941
- config_name: subset_243
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 457380472
num_examples: 1854
download_size: 71136981
dataset_size: 457380472
- config_name: subset_244
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 455281509
num_examples: 1845
download_size: 70866789
dataset_size: 455281509
- config_name: subset_245
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 465679512
num_examples: 1848
download_size: 72420946
dataset_size: 465679512
- config_name: subset_246
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 461619007
num_examples: 1832
download_size: 71821356
dataset_size: 461619007
- config_name: subset_247
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 463913422
num_examples: 1850
download_size: 72203860
dataset_size: 463913422
- config_name: subset_248
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 449477559
num_examples: 1824
download_size: 69979068
dataset_size: 449477559
- config_name: subset_249
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 454727949
num_examples: 1838
download_size: 70790199
dataset_size: 454727949
- config_name: subset_25
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 750520578
num_examples: 2121
download_size: 116341204
dataset_size: 750520578
- config_name: subset_250
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 463813826
num_examples: 1840
download_size: 72065645
dataset_size: 463813826
- config_name: subset_251
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 450689170
num_examples: 1839
download_size: 70046254
dataset_size: 450689170
- config_name: subset_252
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 467954424
num_examples: 1843
download_size: 72784863
dataset_size: 467954424
- config_name: subset_253
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 455568041
num_examples: 1865
download_size: 70860848
dataset_size: 455568041
- config_name: subset_254
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 467139289
num_examples: 1840
download_size: 72651087
dataset_size: 467139289
- config_name: subset_255
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 461968254
num_examples: 1864
download_size: 71891732
dataset_size: 461968254
- config_name: subset_256
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 459536957
num_examples: 1838
download_size: 71427986
dataset_size: 459536957
- config_name: subset_257
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 461614133
num_examples: 1826
download_size: 71804418
dataset_size: 461614133
- config_name: subset_258
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 458706356
num_examples: 1818
download_size: 71423859
dataset_size: 458706356
- config_name: subset_259
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 460799913
num_examples: 1852
download_size: 71619019
dataset_size: 460799913
- config_name: subset_26
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 777975322
num_examples: 2130
download_size: 120477925
dataset_size: 777975322
- config_name: subset_260
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 448876933
num_examples: 1792
download_size: 69879669
dataset_size: 448876933
- config_name: subset_261
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 444714246
num_examples: 1778
download_size: 69167074
dataset_size: 444714246
- config_name: subset_262
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 435429396
num_examples: 1769
download_size: 67707400
dataset_size: 435429396
- config_name: subset_263
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 442643122
num_examples: 1805
download_size: 68854800
dataset_size: 442643122
- config_name: subset_264
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 467382521
num_examples: 1866
download_size: 72647839
dataset_size: 467382521
- config_name: subset_265
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 465116706
num_examples: 1844
download_size: 72344840
dataset_size: 465116706
- config_name: subset_266
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 460363257
num_examples: 1847
download_size: 71663941
dataset_size: 460363257
- config_name: subset_267
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 447759075
num_examples: 1787
download_size: 69646181
dataset_size: 447759075
- config_name: subset_268
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 447527426
num_examples: 1806
download_size: 69650289
dataset_size: 447527426
- config_name: subset_269
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 455076434
num_examples: 1795
download_size: 70700230
dataset_size: 455076434
- config_name: subset_27
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 753948138
num_examples: 2130
download_size: 116583764
dataset_size: 753948138
- config_name: subset_270
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 439216865
num_examples: 1834
download_size: 68362426
dataset_size: 439216865
- config_name: subset_271
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 444051739
num_examples: 1800
download_size: 69025984
dataset_size: 444051739
- config_name: subset_272
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 451361399
num_examples: 1801
download_size: 70236747
dataset_size: 451361399
- config_name: subset_273
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 450833143
num_examples: 1831
download_size: 70159969
dataset_size: 450833143
- config_name: subset_274
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 443152201
num_examples: 1810
download_size: 69004857
dataset_size: 443152201
- config_name: subset_275
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 437646815
num_examples: 1790
download_size: 68134481
dataset_size: 437646815
- config_name: subset_276
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 468067264
num_examples: 1847
download_size: 72861842
dataset_size: 468067264
- config_name: subset_277
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 444762742
num_examples: 1816
download_size: 69209157
dataset_size: 444762742
- config_name: subset_278
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 445494254
num_examples: 1794
download_size: 69369643
dataset_size: 445494254
- config_name: subset_279
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 451535875
num_examples: 1856
download_size: 70264746
dataset_size: 451535875
- config_name: subset_28
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 768459980
num_examples: 2158
download_size: 119153507
dataset_size: 768459980
- config_name: subset_280
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 452095907
num_examples: 1837
download_size: 70319587
dataset_size: 452095907
- config_name: subset_281
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 452843992
num_examples: 1846
download_size: 70439416
dataset_size: 452843992
- config_name: subset_282
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 442466310
num_examples: 1822
download_size: 68938284
dataset_size: 442466310
- config_name: subset_283
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 456366256
num_examples: 1840
download_size: 71063106
dataset_size: 456366256
- config_name: subset_284
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 456985136
num_examples: 1838
download_size: 71080220
dataset_size: 456985136
- config_name: subset_285
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 444510822
num_examples: 1817
download_size: 69173611
dataset_size: 444510822
- config_name: subset_286
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 427358663
num_examples: 1752
download_size: 66512482
dataset_size: 427358663
- config_name: subset_287
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 453258158
num_examples: 1865
download_size: 70544876
dataset_size: 453258158
- config_name: subset_288
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 439079178
num_examples: 1800
download_size: 68409707
dataset_size: 439079178
- config_name: subset_289
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 450499402
num_examples: 1820
download_size: 70119686
dataset_size: 450499402
- config_name: subset_29
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 766963416
num_examples: 2137
download_size: 118867120
dataset_size: 766963416
- config_name: subset_290
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 475888515
num_examples: 1883
download_size: 74007259
dataset_size: 475888515
- config_name: subset_291
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 457427842
num_examples: 1823
download_size: 71267149
dataset_size: 457427842
- config_name: subset_292
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 439691387
num_examples: 1764
download_size: 68456956
dataset_size: 439691387
- config_name: subset_293
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 458482146
num_examples: 1831
download_size: 71465681
dataset_size: 458482146
- config_name: subset_294
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 457080199
num_examples: 1833
download_size: 71182815
dataset_size: 457080199
- config_name: subset_295
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 451491839
num_examples: 1797
download_size: 70260756
dataset_size: 451491839
- config_name: subset_296
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 453242634
num_examples: 1803
download_size: 70573828
dataset_size: 453242634
- config_name: subset_297
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 457539984
num_examples: 1836
download_size: 71222302
dataset_size: 457539984
- config_name: subset_298
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 463968449
num_examples: 1837
download_size: 72170257
dataset_size: 463968449
- config_name: subset_299
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 448448354
num_examples: 1805
download_size: 69722082
dataset_size: 448448354
- config_name: subset_3
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 801408978
num_examples: 2054
download_size: 124173760
dataset_size: 801408978
- config_name: subset_30
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 772041395
num_examples: 2170
download_size: 119611594
dataset_size: 772041395
- config_name: subset_300
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 403356714
num_examples: 1650
download_size: 62781261
dataset_size: 403356714
- config_name: subset_301
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 357830915
num_examples: 1484
download_size: 55778910
dataset_size: 357830915
- config_name: subset_302
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 446675162
num_examples: 1837
download_size: 69489021
dataset_size: 446675162
- config_name: subset_303
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 463700389
num_examples: 1827
download_size: 72198221
dataset_size: 463700389
- config_name: subset_304
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 434254955
num_examples: 1767
download_size: 67648267
dataset_size: 434254955
- config_name: subset_305
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 449552507
num_examples: 1820
download_size: 69987372
dataset_size: 449552507
- config_name: subset_306
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 448137698
num_examples: 1784
download_size: 69553193
dataset_size: 448137698
- config_name: subset_307
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 451189125
num_examples: 1792
download_size: 70241746
dataset_size: 451189125
- config_name: subset_308
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 449864336
num_examples: 1823
download_size: 69967169
dataset_size: 449864336
- config_name: subset_309
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 455754038
num_examples: 1835
download_size: 70931105
dataset_size: 455754038
- config_name: subset_31
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 772464281
num_examples: 2119
download_size: 119576111
dataset_size: 772464281
- config_name: subset_310
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 463144261
num_examples: 1851
download_size: 72129824
dataset_size: 463144261
- config_name: subset_311
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 449410690
num_examples: 1855
download_size: 69984472
dataset_size: 449410690
- config_name: subset_312
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 446552411
num_examples: 1789
download_size: 69461029
dataset_size: 446552411
- config_name: subset_313
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 443926020
num_examples: 1794
download_size: 69047584
dataset_size: 443926020
- config_name: subset_314
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 454802715
num_examples: 1820
download_size: 70826706
dataset_size: 454802715
- config_name: subset_315
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 455632078
num_examples: 1831
download_size: 70953044
dataset_size: 455632078
- config_name: subset_316
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 463376423
num_examples: 1905
download_size: 72134477
dataset_size: 463376423
- config_name: subset_317
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 455668536
num_examples: 1829
download_size: 70989146
dataset_size: 455668536
- config_name: subset_318
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 461158903
num_examples: 1822
download_size: 71660068
dataset_size: 461158903
- config_name: subset_319
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 459012366
num_examples: 1825
download_size: 71417592
dataset_size: 459012366
- config_name: subset_32
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 755841096
num_examples: 2153
download_size: 116990982
dataset_size: 755841096
- config_name: subset_320
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 458289808
num_examples: 1839
download_size: 71296447
dataset_size: 458289808
- config_name: subset_33
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 741466971
num_examples: 2135
download_size: 114881032
dataset_size: 741466971
- config_name: subset_34
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 742476408
num_examples: 2132
download_size: 114840204
dataset_size: 742476408
- config_name: subset_35
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 757057387
num_examples: 2188
download_size: 117167420
dataset_size: 757057387
- config_name: subset_354
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 457638927
num_examples: 1794
download_size: 71264788
dataset_size: 457638927
- config_name: subset_355
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 463229522
num_examples: 1826
download_size: 72093897
dataset_size: 463229522
- config_name: subset_356
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 448774751
num_examples: 1814
download_size: 69850619
dataset_size: 448774751
- config_name: subset_357
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 457420272
num_examples: 1812
download_size: 71250557
dataset_size: 457420272
- config_name: subset_358
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 473942836
num_examples: 1879
download_size: 73870888
dataset_size: 473942836
- config_name: subset_359
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 451617476
num_examples: 1812
download_size: 70184286
dataset_size: 451617476
- config_name: subset_36
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 743010379
num_examples: 2131
download_size: 115100691
dataset_size: 743010379
- config_name: subset_360
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 446947834
num_examples: 1809
download_size: 69616927
dataset_size: 446947834
- config_name: subset_361
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 454487601
num_examples: 1798
download_size: 70765102
dataset_size: 454487601
- config_name: subset_362
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 469840616
num_examples: 1856
download_size: 73051726
dataset_size: 469840616
- config_name: subset_363
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 461915381
num_examples: 1841
download_size: 71894344
dataset_size: 461915381
- config_name: subset_364
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 448444972
num_examples: 1801
download_size: 69761673
dataset_size: 448444972
- config_name: subset_365
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 451455879
num_examples: 1802
download_size: 70292533
dataset_size: 451455879
- config_name: subset_366
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 444295871
num_examples: 1825
download_size: 69173686
dataset_size: 444295871
- config_name: subset_367
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 453633935
num_examples: 1802
download_size: 70566822
dataset_size: 453633935
- config_name: subset_368
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 445544099
num_examples: 1821
download_size: 69397768
dataset_size: 445544099
- config_name: subset_369
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 459275436
num_examples: 1824
download_size: 71531089
dataset_size: 459275436
- config_name: subset_37
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 751749756
num_examples: 2169
download_size: 116497579
dataset_size: 751749756
- config_name: subset_370
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 456011609
num_examples: 1817
download_size: 70914678
dataset_size: 456011609
- config_name: subset_371
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 443829563
num_examples: 1779
download_size: 69077681
dataset_size: 443829563
- config_name: subset_372
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 453500307
num_examples: 1796
download_size: 70589802
dataset_size: 453500307
- config_name: subset_373
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 457282029
num_examples: 1816
download_size: 71198365
dataset_size: 457282029
- config_name: subset_374
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 456723584
num_examples: 1855
download_size: 71057761
dataset_size: 456723584
- config_name: subset_375
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 456194192
num_examples: 1780
download_size: 70974887
dataset_size: 456194192
- config_name: subset_376
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 444508212
num_examples: 1810
download_size: 69126071
dataset_size: 444508212
- config_name: subset_377
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 443938745
num_examples: 1780
download_size: 69114200
dataset_size: 443938745
- config_name: subset_378
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 447834390
num_examples: 1813
download_size: 69679448
dataset_size: 447834390
- config_name: subset_379
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 443182378
num_examples: 1819
download_size: 69021490
dataset_size: 443182378
- config_name: subset_38
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 739675803
num_examples: 2120
download_size: 114637439
dataset_size: 739675803
- config_name: subset_380
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 461645622
num_examples: 1823
download_size: 71898796
dataset_size: 461645622
- config_name: subset_381
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 449864744
num_examples: 1782
download_size: 70072664
dataset_size: 449864744
- config_name: subset_382
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 435436156
num_examples: 1780
download_size: 67811495
dataset_size: 435436156
- config_name: subset_383
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 459763764
num_examples: 1807
download_size: 71551451
dataset_size: 459763764
- config_name: subset_384
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 451083732
num_examples: 1806
download_size: 70140392
dataset_size: 451083732
- config_name: subset_385
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 452186717
num_examples: 1823
download_size: 70410320
dataset_size: 452186717
- config_name: subset_386
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 452299074
num_examples: 1769
download_size: 70378813
dataset_size: 452299074
- config_name: subset_387
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 428694441
num_examples: 1777
download_size: 66747077
dataset_size: 428694441
- config_name: subset_388
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 468152378
num_examples: 1873
download_size: 72895014
dataset_size: 468152378
- config_name: subset_389
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 453027663
num_examples: 1830
download_size: 70505279
dataset_size: 453027663
- config_name: subset_39
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 744786604
num_examples: 2142
download_size: 115382986
dataset_size: 744786604
- config_name: subset_390
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 442143896
num_examples: 1789
download_size: 68847532
dataset_size: 442143896
- config_name: subset_391
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 457519077
num_examples: 1825
download_size: 71192760
dataset_size: 457519077
- config_name: subset_392
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 450905071
num_examples: 1831
download_size: 70277918
dataset_size: 450905071
- config_name: subset_393
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 458167098
num_examples: 1832
download_size: 71240611
dataset_size: 458167098
- config_name: subset_394
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 19001691
num_examples: 70
download_size: 2977521
dataset_size: 19001691
- config_name: subset_4
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 827721221
num_examples: 2086
download_size: 128043606
dataset_size: 827721221
- config_name: subset_40
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 727962815
num_examples: 2145
download_size: 112685767
dataset_size: 727962815
- config_name: subset_41
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 736403859
num_examples: 2123
download_size: 114006472
dataset_size: 736403859
- config_name: subset_42
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 709758249
num_examples: 2084
download_size: 109943941
dataset_size: 709758249
- config_name: subset_43
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 705674169
num_examples: 2083
download_size: 109446898
dataset_size: 705674169
- config_name: subset_44
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 730124720
num_examples: 2101
download_size: 113008676
dataset_size: 730124720
- config_name: subset_45
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 738722529
num_examples: 2124
download_size: 114323151
dataset_size: 738722529
- config_name: subset_46
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 729284107
num_examples: 2149
download_size: 113032918
dataset_size: 729284107
- config_name: subset_47
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 726761393
num_examples: 2130
download_size: 112674445
dataset_size: 726761393
- config_name: subset_48
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 721207275
num_examples: 2118
download_size: 111809397
dataset_size: 721207275
- config_name: subset_49
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 729214392
num_examples: 2114
download_size: 112900424
dataset_size: 729214392
- config_name: subset_5
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 799320422
num_examples: 2053
download_size: 123791184
dataset_size: 799320422
- config_name: subset_50
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 701585332
num_examples: 2073
download_size: 108565065
dataset_size: 701585332
- config_name: subset_51
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 732108705
num_examples: 2142
download_size: 113410234
dataset_size: 732108705
- config_name: subset_52
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 711218307
num_examples: 2107
download_size: 110105806
dataset_size: 711218307
- config_name: subset_53
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 698877538
num_examples: 2122
download_size: 108360775
dataset_size: 698877538
- config_name: subset_54
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 690534917
num_examples: 2127
download_size: 107134792
dataset_size: 690534917
- config_name: subset_55
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 705716892
num_examples: 2104
download_size: 109300516
dataset_size: 705716892
- config_name: subset_56
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 702278164
num_examples: 2115
download_size: 109009474
dataset_size: 702278164
- config_name: subset_57
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 705519807
num_examples: 2105
download_size: 109325029
dataset_size: 705519807
- config_name: subset_58
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 687118080
num_examples: 2121
download_size: 106416668
dataset_size: 687118080
- config_name: subset_59
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 685359213
num_examples: 2088
download_size: 106230112
dataset_size: 685359213
- config_name: subset_6
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 815073901
num_examples: 2100
download_size: 126294871
dataset_size: 815073901
- config_name: subset_60
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 693773608
num_examples: 2119
download_size: 107667067
dataset_size: 693773608
- config_name: subset_61
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 383490878
num_examples: 1195
download_size: 59616418
dataset_size: 383490878
- config_name: subset_62
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 616955337
num_examples: 1867
download_size: 95547138
dataset_size: 616955337
- config_name: subset_63
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 671659254
num_examples: 2078
download_size: 104099974
dataset_size: 671659254
- config_name: subset_64
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 689030357
num_examples: 2118
download_size: 106905857
dataset_size: 689030357
- config_name: subset_65
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 694831930
num_examples: 2122
download_size: 107709849
dataset_size: 694831930
- config_name: subset_66
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 696917997
num_examples: 2137
download_size: 107937327
dataset_size: 696917997
- config_name: subset_67
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 678301811
num_examples: 2113
download_size: 105231860
dataset_size: 678301811
- config_name: subset_68
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 672072994
num_examples: 2078
download_size: 104193430
dataset_size: 672072994
- config_name: subset_69
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 681521768
num_examples: 2110
download_size: 105567128
dataset_size: 681521768
- config_name: subset_7
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 795107132
num_examples: 2071
download_size: 123098493
dataset_size: 795107132
- config_name: subset_70
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 680430611
num_examples: 2131
download_size: 105372607
dataset_size: 680430611
- config_name: subset_71
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 679614376
num_examples: 2114
download_size: 105380787
dataset_size: 679614376
- config_name: subset_72
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 678548535
num_examples: 2106
download_size: 105237553
dataset_size: 678548535
- config_name: subset_8
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 812551355
num_examples: 2085
download_size: 125940395
dataset_size: 812551355
- config_name: subset_9
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 806272257
num_examples: 2096
download_size: 125032729
dataset_size: 806272257
- config_name: subset_92
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 616966850
num_examples: 2039
download_size: 95560783
dataset_size: 616966850
- config_name: subset_93
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 601137903
num_examples: 1999
download_size: 93286202
dataset_size: 601137903
- config_name: subset_94
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 613341047
num_examples: 2054
download_size: 95148390
dataset_size: 613341047
- config_name: subset_95
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: deA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 615183805
num_examples: 2030
download_size: 95449914
dataset_size: 615183805
- config_name: subset_96
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 602105112
num_examples: 2009
download_size: 93472916
dataset_size: 602105112
- config_name: subset_97
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 598568153
num_examples: 1978
download_size: 92860869
dataset_size: 598568153
- config_name: subset_98
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 590891790
num_examples: 1982
download_size: 91495197
dataset_size: 590891790
- config_name: subset_99
features:
- name: line_no
dtype: int64
- name: deA.id
dtype: string
- name: deA.laser_score
dtype: float64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: deA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 598432146
num_examples: 2045
download_size: 92927374
dataset_size: 598432146
configs:
- config_name: subset_1
data_files:
- split: train
path: subset_1/train-*
- config_name: subset_10
data_files:
- split: train
path: subset_10/train-*
- config_name: subset_100
data_files:
- split: train
path: subset_100/train-*
- config_name: subset_101
data_files:
- split: train
path: subset_101/train-*
- config_name: subset_102
data_files:
- split: train
path: subset_102/train-*
- config_name: subset_103
data_files:
- split: train
path: subset_103/train-*
- config_name: subset_104
data_files:
- split: train
path: subset_104/train-*
- config_name: subset_11
data_files:
- split: train
path: subset_11/train-*
- config_name: subset_12
data_files:
- split: train
path: subset_12/train-*
- config_name: subset_13
data_files:
- split: train
path: subset_13/train-*
- config_name: subset_14
data_files:
- split: train
path: subset_14/train-*
- config_name: subset_15
data_files:
- split: train
path: subset_15/train-*
- config_name: subset_16
data_files:
- split: train
path: subset_16/train-*
- config_name: subset_17
data_files:
- split: train
path: subset_17/train-*
- config_name: subset_18
data_files:
- split: train
path: subset_18/train-*
- config_name: subset_19
data_files:
- split: train
path: subset_19/train-*
- config_name: subset_2
data_files:
- split: train
path: subset_2/train-*
- config_name: subset_20
data_files:
- split: train
path: subset_20/train-*
- config_name: subset_201
data_files:
- split: train
path: subset_201/train-*
- config_name: subset_202
data_files:
- split: train
path: subset_202/train-*
- config_name: subset_203
data_files:
- split: train
path: subset_203/train-*
- config_name: subset_204
data_files:
- split: train
path: subset_204/train-*
- config_name: subset_205
data_files:
- split: train
path: subset_205/train-*
- config_name: subset_206
data_files:
- split: train
path: subset_206/train-*
- config_name: subset_207
data_files:
- split: train
path: subset_207/train-*
- config_name: subset_208
data_files:
- split: train
path: subset_208/train-*
- config_name: subset_209
data_files:
- split: train
path: subset_209/train-*
- config_name: subset_21
data_files:
- split: train
path: subset_21/train-*
- config_name: subset_210
data_files:
- split: train
path: subset_210/train-*
- config_name: subset_211
data_files:
- split: train
path: subset_211/train-*
- config_name: subset_212
data_files:
- split: train
path: subset_212/train-*
- config_name: subset_213
data_files:
- split: train
path: subset_213/train-*
- config_name: subset_214
data_files:
- split: train
path: subset_214/train-*
- config_name: subset_215
data_files:
- split: train
path: subset_215/train-*
- config_name: subset_216
data_files:
- split: train
path: subset_216/train-*
- config_name: subset_217
data_files:
- split: train
path: subset_217/train-*
- config_name: subset_218
data_files:
- split: train
path: subset_218/train-*
- config_name: subset_219
data_files:
- split: train
path: subset_219/train-*
- config_name: subset_22
data_files:
- split: train
path: subset_22/train-*
- config_name: subset_220
data_files:
- split: train
path: subset_220/train-*
- config_name: subset_221
data_files:
- split: train
path: subset_221/train-*
- config_name: subset_222
data_files:
- split: train
path: subset_222/train-*
- config_name: subset_223
data_files:
- split: train
path: subset_223/train-*
- config_name: subset_224
data_files:
- split: train
path: subset_224/train-*
- config_name: subset_225
data_files:
- split: train
path: subset_225/train-*
- config_name: subset_226
data_files:
- split: train
path: subset_226/train-*
- config_name: subset_227
data_files:
- split: train
path: subset_227/train-*
- config_name: subset_228
data_files:
- split: train
path: subset_228/train-*
- config_name: subset_229
data_files:
- split: train
path: subset_229/train-*
- config_name: subset_23
data_files:
- split: train
path: subset_23/train-*
- config_name: subset_230
data_files:
- split: train
path: subset_230/train-*
- config_name: subset_231
data_files:
- split: train
path: subset_231/train-*
- config_name: subset_232
data_files:
- split: train
path: subset_232/train-*
- config_name: subset_233
data_files:
- split: train
path: subset_233/train-*
- config_name: subset_234
data_files:
- split: train
path: subset_234/train-*
- config_name: subset_235
data_files:
- split: train
path: subset_235/train-*
- config_name: subset_236
data_files:
- split: train
path: subset_236/train-*
- config_name: subset_237
data_files:
- split: train
path: subset_237/train-*
- config_name: subset_238
data_files:
- split: train
path: subset_238/train-*
- config_name: subset_239
data_files:
- split: train
path: subset_239/train-*
- config_name: subset_24
data_files:
- split: train
path: subset_24/train-*
- config_name: subset_240
data_files:
- split: train
path: subset_240/train-*
- config_name: subset_241
data_files:
- split: train
path: subset_241/train-*
- config_name: subset_242
data_files:
- split: train
path: subset_242/train-*
- config_name: subset_243
data_files:
- split: train
path: subset_243/train-*
- config_name: subset_244
data_files:
- split: train
path: subset_244/train-*
- config_name: subset_245
data_files:
- split: train
path: subset_245/train-*
- config_name: subset_246
data_files:
- split: train
path: subset_246/train-*
- config_name: subset_247
data_files:
- split: train
path: subset_247/train-*
- config_name: subset_248
data_files:
- split: train
path: subset_248/train-*
- config_name: subset_249
data_files:
- split: train
path: subset_249/train-*
- config_name: subset_25
data_files:
- split: train
path: subset_25/train-*
- config_name: subset_250
data_files:
- split: train
path: subset_250/train-*
- config_name: subset_251
data_files:
- split: train
path: subset_251/train-*
- config_name: subset_252
data_files:
- split: train
path: subset_252/train-*
- config_name: subset_253
data_files:
- split: train
path: subset_253/train-*
- config_name: subset_254
data_files:
- split: train
path: subset_254/train-*
- config_name: subset_255
data_files:
- split: train
path: subset_255/train-*
- config_name: subset_256
data_files:
- split: train
path: subset_256/train-*
- config_name: subset_257
data_files:
- split: train
path: subset_257/train-*
- config_name: subset_258
data_files:
- split: train
path: subset_258/train-*
- config_name: subset_259
data_files:
- split: train
path: subset_259/train-*
- config_name: subset_26
data_files:
- split: train
path: subset_26/train-*
- config_name: subset_260
data_files:
- split: train
path: subset_260/train-*
- config_name: subset_261
data_files:
- split: train
path: subset_261/train-*
- config_name: subset_262
data_files:
- split: train
path: subset_262/train-*
- config_name: subset_263
data_files:
- split: train
path: subset_263/train-*
- config_name: subset_264
data_files:
- split: train
path: subset_264/train-*
- config_name: subset_265
data_files:
- split: train
path: subset_265/train-*
- config_name: subset_266
data_files:
- split: train
path: subset_266/train-*
- config_name: subset_267
data_files:
- split: train
path: subset_267/train-*
- config_name: subset_268
data_files:
- split: train
path: subset_268/train-*
- config_name: subset_269
data_files:
- split: train
path: subset_269/train-*
- config_name: subset_27
data_files:
- split: train
path: subset_27/train-*
- config_name: subset_270
data_files:
- split: train
path: subset_270/train-*
- config_name: subset_271
data_files:
- split: train
path: subset_271/train-*
- config_name: subset_272
data_files:
- split: train
path: subset_272/train-*
- config_name: subset_273
data_files:
- split: train
path: subset_273/train-*
- config_name: subset_274
data_files:
- split: train
path: subset_274/train-*
- config_name: subset_275
data_files:
- split: train
path: subset_275/train-*
- config_name: subset_276
data_files:
- split: train
path: subset_276/train-*
- config_name: subset_277
data_files:
- split: train
path: subset_277/train-*
- config_name: subset_278
data_files:
- split: train
path: subset_278/train-*
- config_name: subset_279
data_files:
- split: train
path: subset_279/train-*
- config_name: subset_28
data_files:
- split: train
path: subset_28/train-*
- config_name: subset_280
data_files:
- split: train
path: subset_280/train-*
- config_name: subset_281
data_files:
- split: train
path: subset_281/train-*
- config_name: subset_282
data_files:
- split: train
path: subset_282/train-*
- config_name: subset_283
data_files:
- split: train
path: subset_283/train-*
- config_name: subset_284
data_files:
- split: train
path: subset_284/train-*
- config_name: subset_285
data_files:
- split: train
path: subset_285/train-*
- config_name: subset_286
data_files:
- split: train
path: subset_286/train-*
- config_name: subset_287
data_files:
- split: train
path: subset_287/train-*
- config_name: subset_288
data_files:
- split: train
path: subset_288/train-*
- config_name: subset_289
data_files:
- split: train
path: subset_289/train-*
- config_name: subset_29
data_files:
- split: train
path: subset_29/train-*
- config_name: subset_290
data_files:
- split: train
path: subset_290/train-*
- config_name: subset_291
data_files:
- split: train
path: subset_291/train-*
- config_name: subset_292
data_files:
- split: train
path: subset_292/train-*
- config_name: subset_293
data_files:
- split: train
path: subset_293/train-*
- config_name: subset_294
data_files:
- split: train
path: subset_294/train-*
- config_name: subset_295
data_files:
- split: train
path: subset_295/train-*
- config_name: subset_296
data_files:
- split: train
path: subset_296/train-*
- config_name: subset_297
data_files:
- split: train
path: subset_297/train-*
- config_name: subset_298
data_files:
- split: train
path: subset_298/train-*
- config_name: subset_299
data_files:
- split: train
path: subset_299/train-*
- config_name: subset_3
data_files:
- split: train
path: subset_3/train-*
- config_name: subset_30
data_files:
- split: train
path: subset_30/train-*
- config_name: subset_300
data_files:
- split: train
path: subset_300/train-*
- config_name: subset_301
data_files:
- split: train
path: subset_301/train-*
- config_name: subset_302
data_files:
- split: train
path: subset_302/train-*
- config_name: subset_303
data_files:
- split: train
path: subset_303/train-*
- config_name: subset_304
data_files:
- split: train
path: subset_304/train-*
- config_name: subset_305
data_files:
- split: train
path: subset_305/train-*
- config_name: subset_306
data_files:
- split: train
path: subset_306/train-*
- config_name: subset_307
data_files:
- split: train
path: subset_307/train-*
- config_name: subset_308
data_files:
- split: train
path: subset_308/train-*
- config_name: subset_309
data_files:
- split: train
path: subset_309/train-*
- config_name: subset_31
data_files:
- split: train
path: subset_31/train-*
- config_name: subset_310
data_files:
- split: train
path: subset_310/train-*
- config_name: subset_311
data_files:
- split: train
path: subset_311/train-*
- config_name: subset_312
data_files:
- split: train
path: subset_312/train-*
- config_name: subset_313
data_files:
- split: train
path: subset_313/train-*
- config_name: subset_314
data_files:
- split: train
path: subset_314/train-*
- config_name: subset_315
data_files:
- split: train
path: subset_315/train-*
- config_name: subset_316
data_files:
- split: train
path: subset_316/train-*
- config_name: subset_317
data_files:
- split: train
path: subset_317/train-*
- config_name: subset_318
data_files:
- split: train
path: subset_318/train-*
- config_name: subset_319
data_files:
- split: train
path: subset_319/train-*
- config_name: subset_32
data_files:
- split: train
path: subset_32/train-*
- config_name: subset_320
data_files:
- split: train
path: subset_320/train-*
- config_name: subset_33
data_files:
- split: train
path: subset_33/train-*
- config_name: subset_34
data_files:
- split: train
path: subset_34/train-*
- config_name: subset_35
data_files:
- split: train
path: subset_35/train-*
- config_name: subset_354
data_files:
- split: train
path: subset_354/train-*
- config_name: subset_355
data_files:
- split: train
path: subset_355/train-*
- config_name: subset_356
data_files:
- split: train
path: subset_356/train-*
- config_name: subset_357
data_files:
- split: train
path: subset_357/train-*
- config_name: subset_358
data_files:
- split: train
path: subset_358/train-*
- config_name: subset_359
data_files:
- split: train
path: subset_359/train-*
- config_name: subset_36
data_files:
- split: train
path: subset_36/train-*
- config_name: subset_360
data_files:
- split: train
path: subset_360/train-*
- config_name: subset_361
data_files:
- split: train
path: subset_361/train-*
- config_name: subset_362
data_files:
- split: train
path: subset_362/train-*
- config_name: subset_363
data_files:
- split: train
path: subset_363/train-*
- config_name: subset_364
data_files:
- split: train
path: subset_364/train-*
- config_name: subset_365
data_files:
- split: train
path: subset_365/train-*
- config_name: subset_366
data_files:
- split: train
path: subset_366/train-*
- config_name: subset_367
data_files:
- split: train
path: subset_367/train-*
- config_name: subset_368
data_files:
- split: train
path: subset_368/train-*
- config_name: subset_369
data_files:
- split: train
path: subset_369/train-*
- config_name: subset_37
data_files:
- split: train
path: subset_37/train-*
- config_name: subset_370
data_files:
- split: train
path: subset_370/train-*
- config_name: subset_371
data_files:
- split: train
path: subset_371/train-*
- config_name: subset_372
data_files:
- split: train
path: subset_372/train-*
- config_name: subset_373
data_files:
- split: train
path: subset_373/train-*
- config_name: subset_374
data_files:
- split: train
path: subset_374/train-*
- config_name: subset_375
data_files:
- split: train
path: subset_375/train-*
- config_name: subset_376
data_files:
- split: train
path: subset_376/train-*
- config_name: subset_377
data_files:
- split: train
path: subset_377/train-*
- config_name: subset_378
data_files:
- split: train
path: subset_378/train-*
- config_name: subset_379
data_files:
- split: train
path: subset_379/train-*
- config_name: subset_38
data_files:
- split: train
path: subset_38/train-*
- config_name: subset_380
data_files:
- split: train
path: subset_380/train-*
- config_name: subset_381
data_files:
- split: train
path: subset_381/train-*
- config_name: subset_382
data_files:
- split: train
path: subset_382/train-*
- config_name: subset_383
data_files:
- split: train
path: subset_383/train-*
- config_name: subset_384
data_files:
- split: train
path: subset_384/train-*
- config_name: subset_385
data_files:
- split: train
path: subset_385/train-*
- config_name: subset_386
data_files:
- split: train
path: subset_386/train-*
- config_name: subset_387
data_files:
- split: train
path: subset_387/train-*
- config_name: subset_388
data_files:
- split: train
path: subset_388/train-*
- config_name: subset_389
data_files:
- split: train
path: subset_389/train-*
- config_name: subset_39
data_files:
- split: train
path: subset_39/train-*
- config_name: subset_390
data_files:
- split: train
path: subset_390/train-*
- config_name: subset_391
data_files:
- split: train
path: subset_391/train-*
- config_name: subset_392
data_files:
- split: train
path: subset_392/train-*
- config_name: subset_393
data_files:
- split: train
path: subset_393/train-*
- config_name: subset_394
data_files:
- split: train
path: subset_394/train-*
- config_name: subset_4
data_files:
- split: train
path: subset_4/train-*
- config_name: subset_40
data_files:
- split: train
path: subset_40/train-*
- config_name: subset_41
data_files:
- split: train
path: subset_41/train-*
- config_name: subset_42
data_files:
- split: train
path: subset_42/train-*
- config_name: subset_43
data_files:
- split: train
path: subset_43/train-*
- config_name: subset_44
data_files:
- split: train
path: subset_44/train-*
- config_name: subset_45
data_files:
- split: train
path: subset_45/train-*
- config_name: subset_46
data_files:
- split: train
path: subset_46/train-*
- config_name: subset_47
data_files:
- split: train
path: subset_47/train-*
- config_name: subset_48
data_files:
- split: train
path: subset_48/train-*
- config_name: subset_49
data_files:
- split: train
path: subset_49/train-*
- config_name: subset_5
data_files:
- split: train
path: subset_5/train-*
- config_name: subset_50
data_files:
- split: train
path: subset_50/train-*
- config_name: subset_51
data_files:
- split: train
path: subset_51/train-*
- config_name: subset_52
data_files:
- split: train
path: subset_52/train-*
- config_name: subset_53
data_files:
- split: train
path: subset_53/train-*
- config_name: subset_54
data_files:
- split: train
path: subset_54/train-*
- config_name: subset_55
data_files:
- split: train
path: subset_55/train-*
- config_name: subset_56
data_files:
- split: train
path: subset_56/train-*
- config_name: subset_57
data_files:
- split: train
path: subset_57/train-*
- config_name: subset_58
data_files:
- split: train
path: subset_58/train-*
- config_name: subset_59
data_files:
- split: train
path: subset_59/train-*
- config_name: subset_6
data_files:
- split: train
path: subset_6/train-*
- config_name: subset_60
data_files:
- split: train
path: subset_60/train-*
- config_name: subset_61
data_files:
- split: train
path: subset_61/train-*
- config_name: subset_62
data_files:
- split: train
path: subset_62/train-*
- config_name: subset_63
data_files:
- split: train
path: subset_63/train-*
- config_name: subset_64
data_files:
- split: train
path: subset_64/train-*
- config_name: subset_65
data_files:
- split: train
path: subset_65/train-*
- config_name: subset_66
data_files:
- split: train
path: subset_66/train-*
- config_name: subset_67
data_files:
- split: train
path: subset_67/train-*
- config_name: subset_68
data_files:
- split: train
path: subset_68/train-*
- config_name: subset_69
data_files:
- split: train
path: subset_69/train-*
- config_name: subset_7
data_files:
- split: train
path: subset_7/train-*
- config_name: subset_70
data_files:
- split: train
path: subset_70/train-*
- config_name: subset_71
data_files:
- split: train
path: subset_71/train-*
- config_name: subset_72
data_files:
- split: train
path: subset_72/train-*
- config_name: subset_8
data_files:
- split: train
path: subset_8/train-*
- config_name: subset_9
data_files:
- split: train
path: subset_9/train-*
- config_name: subset_92
data_files:
- split: train
path: subset_92/train-*
- config_name: subset_93
data_files:
- split: train
path: subset_93/train-*
- config_name: subset_94
data_files:
- split: train
path: subset_94/train-*
- config_name: subset_95
data_files:
- split: train
path: subset_95/train-*
- config_name: subset_96
data_files:
- split: train
path: subset_96/train-*
- config_name: subset_97
data_files:
- split: train
path: subset_97/train-*
- config_name: subset_98
data_files:
- split: train
path: subset_98/train-*
- config_name: subset_99
data_files:
- split: train
path: subset_99/train-*
---
The dataset consists of multiple subsets, each with similar features and splits. Features include line number, German and English IDs, LASER scores, and audio tokens. Each subset has only one training split, providing the number of bytes and examples.
提供机构:
asahi417
原始信息汇总
数据集概述
数据集配置:subset_1
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- enA.audio.tokens: sequence of int64
- deA.audio.tokens: sequence of int64
- 分割:
- train: 2064 examples, 825169182 bytes
- 下载大小: 127787389 bytes
- 数据集大小: 825169182 bytes
数据集配置:subset_10
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- enA.audio.tokens: sequence of int64
- deA.audio.tokens: sequence of int64
- 分割:
- train: 2109 examples, 805994313 bytes
- 下载大小: 124771864 bytes
- 数据集大小: 805994313 bytes
数据集配置:subset_100
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- enA.audio.tokens: sequence of int64
- deA.audio.tokens: sequence of int64
- 分割:
- train: 1982 examples, 589512805 bytes
- 下载大小: 91508494 bytes
- 数据集大小: 589512805 bytes
数据集配置:subset_101
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 2029 examples, 603841640 bytes
- 下载大小: 93644008 bytes
- 数据集大小: 603841640 bytes
数据集配置:subset_102
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- enA.audio.tokens: sequence of int64
- deA.audio.tokens: sequence of int64
- 分割:
- train: 2029 examples, 604643147 bytes
- 下载大小: 93825688 bytes
- 数据集大小: 604643147 bytes
数据集配置:subset_103
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 1982 examples, 601136745 bytes
- 下载大小: 93374940 bytes
- 数据集大小: 601136745 bytes
数据集配置:subset_104
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 1985 examples, 590015590 bytes
- 下载大小: 91516565 bytes
- 数据集大小: 590015590 bytes
数据集配置:subset_11
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- enA.audio.tokens: sequence of int64
- deA.audio.tokens: sequence of int64
- 分割:
- train: 2135 examples, 811970945 bytes
- 下载大小: 125769382 bytes
- 数据集大小: 811970945 bytes
数据集配置:subset_12
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 2110 examples, 783606636 bytes
- 下载大小: 121396474 bytes
- 数据集大小: 783606636 bytes
数据集配置:subset_13
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 2163 examples, 818546882 bytes
- 下载大小: 126774705 bytes
- 数据集大小: 818546882 bytes
数据集配置:subset_14
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 2102 examples, 794334972 bytes
- 下载大小: 123031776 bytes
- 数据集大小: 794334972 bytes
数据集配置:subset_15
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 2114 examples, 779790032 bytes
- 下载大小: 120716436 bytes
- 数据集大小: 779790032 bytes
数据集配置:subset_16
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 2141 examples, 788648344 bytes
- 下载大小: 121996920 bytes
- 数据集大小: 788648344 bytes
数据集配置:subset_17
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 2108 examples, 781308113 bytes
- 下载大小: 120886271 bytes
- 数据集大小: 781308113 bytes
数据集配置:subset_18
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 2101 examples, 805637299 bytes
- 下载大小: 124644421 bytes
- 数据集大小: 805637299 bytes
数据集配置:subset_19
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 2118 examples, 783993898 bytes
- 下载大小: 121391405 bytes
- 数据集大小: 783993898 bytes
数据集配置:subset_2
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- enA.audio.tokens: sequence of int64
- deA.audio.tokens: sequence of int64
- 分割:
- train: 2058 examples, 834363947 bytes
- 下载大小: 129174194 bytes
- 数据集大小: 834363947 bytes
数据集配置:subset_20
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 2147 examples, 800202881 bytes
- 下载大小: 123592413 bytes
- 数据集大小: 800202881 bytes
数据集配置:subset_201
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 1837 examples, 455804873 bytes
- 下载大小: 70871882 bytes
- 数据集大小: 455804873 bytes
数据集配置:subset_202
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- enA.audio.tokens: sequence of int64
- deA.audio.tokens: sequence of int64
- 分割:
- train: 1830 examples, 471221210 bytes
- 下载大小: 73352192 bytes
- 数据集大小: 471221210 bytes
数据集配置:subset_203
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- enA.audio.tokens: sequence of int64
- deA.audio.tokens: sequence of int64
- 分割:
- train: 1865 examples, 470510522 bytes
- 下载大小: 73145719 bytes
- 数据集大小: 470510522 bytes
数据集配置:subset_204
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 1882 examples, 464875087 bytes
- 下载大小: 72339101 bytes
- 数据集大小: 464875087 bytes
数据集配置:subset_205
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 1872 examples, 475206665 bytes
- 下载大小: 73888571 bytes
- 数据集大小: 475206665 bytes
数据集配置:subset_206
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- deA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割:
- train: 1873 examples, 475125067 bytes
- 下载大小: 73962791 bytes
- 数据集大小: 475125067 bytes
数据集配置:subset_207
- 特征:
- line_no: int64
- deA.id: string
- deA.laser_score: float64
- enA.id: string
- enA.laser_score: float64
- enA.audio.tokens: sequence of int64
- deA.audio.tokens: sequence of int64
- **



