asahi417/seamless-align-enA-viA.tokenized.encodec
收藏Hugging Face2024-06-02 更新2024-06-12 收录
下载链接:
https://hf-mirror.com/datasets/asahi417/seamless-align-enA-viA.tokenized.encodec
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: subset_1
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 657972851
num_examples: 1853
download_size: 102026304
dataset_size: 657972851
- config_name: subset_10
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 333116252
num_examples: 1090
download_size: 51419545
dataset_size: 333116252
- config_name: subset_100
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 491581650
num_examples: 1574
download_size: 76262709
dataset_size: 491581650
- config_name: subset_101
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 494927539
num_examples: 1595
download_size: 76800440
dataset_size: 494927539
- config_name: subset_102
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 500976567
num_examples: 1592
download_size: 77763129
dataset_size: 500976567
- config_name: subset_103
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 491949198
num_examples: 1560
download_size: 76280326
dataset_size: 491949198
- config_name: subset_104
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 472661087
num_examples: 1519
download_size: 73378567
dataset_size: 472661087
- config_name: subset_105
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 488327829
num_examples: 1570
download_size: 75678039
dataset_size: 488327829
- config_name: subset_106
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 486696477
num_examples: 1565
download_size: 75520885
dataset_size: 486696477
- config_name: subset_107
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 497913216
num_examples: 1569
download_size: 77283571
dataset_size: 497913216
- config_name: subset_108
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 494871783
num_examples: 1589
download_size: 76786138
dataset_size: 494871783
- config_name: subset_109
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 501611035
num_examples: 1584
download_size: 77863224
dataset_size: 501611035
- config_name: subset_11
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 353248632
num_examples: 1146
download_size: 54703969
dataset_size: 353248632
- config_name: subset_110
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 468263236
num_examples: 1528
download_size: 72661963
dataset_size: 468263236
- config_name: subset_111
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 477188948
num_examples: 1509
download_size: 74099020
dataset_size: 477188948
- config_name: subset_112
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 482364297
num_examples: 1538
download_size: 74810446
dataset_size: 482364297
- config_name: subset_113
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 492600066
num_examples: 1546
download_size: 76430855
dataset_size: 492600066
- config_name: subset_114
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 483990773
num_examples: 1533
download_size: 75140277
dataset_size: 483990773
- config_name: subset_115
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 493183494
num_examples: 1553
download_size: 76448618
dataset_size: 493183494
- config_name: subset_116
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 506605142
num_examples: 1544
download_size: 78557575
dataset_size: 506605142
- config_name: subset_117
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 488031035
num_examples: 1572
download_size: 75713406
dataset_size: 488031035
- config_name: subset_118
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 497798416
num_examples: 1549
download_size: 77159963
dataset_size: 497798416
- config_name: subset_119
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 495877544
num_examples: 1589
download_size: 76866589
dataset_size: 495877544
- config_name: subset_12
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 332998283
num_examples: 1109
download_size: 51511210
dataset_size: 332998283
- config_name: subset_120
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 493496074
num_examples: 1587
download_size: 76515857
dataset_size: 493496074
- config_name: subset_121
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 497513829
num_examples: 1594
download_size: 77118595
dataset_size: 497513829
- config_name: subset_122
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 515705531
num_examples: 1592
download_size: 79969651
dataset_size: 515705531
- config_name: subset_123
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 488418863
num_examples: 1572
download_size: 75762235
dataset_size: 488418863
- config_name: subset_124
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 495959369
num_examples: 1550
download_size: 76970068
dataset_size: 495959369
- config_name: subset_125
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 505320112
num_examples: 1564
download_size: 78317831
dataset_size: 505320112
- config_name: subset_126
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 510527261
num_examples: 1565
download_size: 79122365
dataset_size: 510527261
- config_name: subset_127
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 513361391
num_examples: 1574
download_size: 79609170
dataset_size: 513361391
- config_name: subset_128
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 500907941
num_examples: 1589
download_size: 77727813
dataset_size: 500907941
- config_name: subset_129
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 509596758
num_examples: 1623
download_size: 79045064
dataset_size: 509596758
- config_name: subset_13
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 336439195
num_examples: 1147
download_size: 52206420
dataset_size: 336439195
- config_name: subset_130
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 501980567
num_examples: 1576
download_size: 77821571
dataset_size: 501980567
- config_name: subset_131
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 509779384
num_examples: 1586
download_size: 79103968
dataset_size: 509779384
- config_name: subset_132
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 497782151
num_examples: 1566
download_size: 77206778
dataset_size: 497782151
- config_name: subset_133
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 505759867
num_examples: 1582
download_size: 78491142
dataset_size: 505759867
- config_name: subset_134
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 491713869
num_examples: 1503
download_size: 76184994
dataset_size: 491713869
- config_name: subset_135
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 480849405
num_examples: 1480
download_size: 74578394
dataset_size: 480849405
- config_name: subset_136
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 503451038
num_examples: 1595
download_size: 78013403
dataset_size: 503451038
- config_name: subset_137
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 512425837
num_examples: 1622
download_size: 79576729
dataset_size: 512425837
- config_name: subset_138
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 519482776
num_examples: 1595
download_size: 80619280
dataset_size: 519482776
- config_name: subset_139
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 516131617
num_examples: 1581
download_size: 80015365
dataset_size: 516131617
- config_name: subset_14
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 345486369
num_examples: 1158
download_size: 53582595
dataset_size: 345486369
- config_name: subset_140
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 504867811
num_examples: 1574
download_size: 78318751
dataset_size: 504867811
- config_name: subset_141
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 501425415
num_examples: 1552
download_size: 77728190
dataset_size: 501425415
- config_name: subset_142
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 526129321
num_examples: 1617
download_size: 81604532
dataset_size: 526129321
- config_name: subset_143
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 521898171
num_examples: 1614
download_size: 80959074
dataset_size: 521898171
- config_name: subset_144
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 534003190
num_examples: 1647
download_size: 82904947
dataset_size: 534003190
- config_name: subset_145
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 528225568
num_examples: 1593
download_size: 81886588
dataset_size: 528225568
- config_name: subset_146
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 521354295
num_examples: 1603
download_size: 80760090
dataset_size: 521354295
- config_name: subset_147
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 535697647
num_examples: 1621
download_size: 83021010
dataset_size: 535697647
- config_name: subset_148
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 511554084
num_examples: 1606
download_size: 79375063
dataset_size: 511554084
- config_name: subset_149
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 63391375
num_examples: 190
download_size: 9823566
dataset_size: 63391375
- config_name: subset_15
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 353302745
num_examples: 1207
download_size: 54766415
dataset_size: 353302745
- config_name: subset_16
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 348183061
num_examples: 1161
download_size: 54026100
dataset_size: 348183061
- config_name: subset_17
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 356591979
num_examples: 1197
download_size: 55259328
dataset_size: 356591979
- config_name: subset_18
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 366275763
num_examples: 1263
download_size: 56870639
dataset_size: 366275763
- config_name: subset_19
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 354150737
num_examples: 1209
download_size: 54854573
dataset_size: 354150737
- config_name: subset_2
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 535059565
num_examples: 1497
download_size: 82727194
dataset_size: 535059565
- config_name: subset_20
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 343730272
num_examples: 1196
download_size: 53286585
dataset_size: 343730272
- config_name: subset_21
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 354986178
num_examples: 1235
download_size: 55024250
dataset_size: 354986178
- config_name: subset_22
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 354224442
num_examples: 1209
download_size: 54910052
dataset_size: 354224442
- config_name: subset_23
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 366593493
num_examples: 1245
download_size: 56810870
dataset_size: 366593493
- config_name: subset_24
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 357120673
num_examples: 1260
download_size: 55334011
dataset_size: 357120673
- config_name: subset_25
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 375727425
num_examples: 1290
download_size: 58329393
dataset_size: 375727425
- config_name: subset_26
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 383413369
num_examples: 1335
download_size: 59474557
dataset_size: 383413369
- config_name: subset_27
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 362444314
num_examples: 1267
download_size: 56215272
dataset_size: 362444314
- config_name: subset_28
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 382945073
num_examples: 1309
download_size: 59368434
dataset_size: 382945073
- config_name: subset_29
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 381021734
num_examples: 1305
download_size: 59145544
dataset_size: 381021734
- config_name: subset_3
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 462261860
num_examples: 1325
download_size: 71487580
dataset_size: 462261860
- config_name: subset_30
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 377299595
num_examples: 1313
download_size: 58393461
dataset_size: 377299595
- config_name: subset_31
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 388461756
num_examples: 1351
download_size: 60230639
dataset_size: 388461756
- config_name: subset_32
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 386511348
num_examples: 1355
download_size: 60017863
dataset_size: 386511348
- config_name: subset_33
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 385310098
num_examples: 1350
download_size: 59840192
dataset_size: 385310098
- config_name: subset_34
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 413257353
num_examples: 1388
download_size: 64036764
dataset_size: 413257353
- config_name: subset_35
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 380160821
num_examples: 1328
download_size: 59040382
dataset_size: 380160821
- config_name: subset_36
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 404828436
num_examples: 1371
download_size: 62743025
dataset_size: 404828436
- config_name: subset_37
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 394275488
num_examples: 1366
download_size: 61119526
dataset_size: 394275488
- config_name: subset_38
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 382252845
num_examples: 1315
download_size: 59312799
dataset_size: 382252845
- config_name: subset_39
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 378281152
num_examples: 1301
download_size: 58686367
dataset_size: 378281152
- config_name: subset_4
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 394036818
num_examples: 1171
download_size: 61063494
dataset_size: 394036818
- config_name: subset_40
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 390303780
num_examples: 1346
download_size: 60524584
dataset_size: 390303780
- config_name: subset_41
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 373952577
num_examples: 1246
download_size: 57950791
dataset_size: 373952577
- config_name: subset_42
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 400266516
num_examples: 1393
download_size: 62053653
dataset_size: 400266516
- config_name: subset_43
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 396898818
num_examples: 1389
download_size: 61594193
dataset_size: 396898818
- config_name: subset_44
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 410261960
num_examples: 1398
download_size: 63672887
dataset_size: 410261960
- config_name: subset_45
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 397866371
num_examples: 1413
download_size: 61751052
dataset_size: 397866371
- config_name: subset_46
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 394767172
num_examples: 1337
download_size: 61305296
dataset_size: 394767172
- config_name: subset_47
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 415235772
num_examples: 1404
download_size: 64464459
dataset_size: 415235772
- config_name: subset_48
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 392648158
num_examples: 1386
download_size: 60936746
dataset_size: 392648158
- config_name: subset_49
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 425378613
num_examples: 1428
download_size: 65983152
dataset_size: 425378613
- config_name: subset_5
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 371450968
num_examples: 1130
download_size: 57505146
dataset_size: 371450968
- config_name: subset_50
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 411156307
num_examples: 1390
download_size: 63731124
dataset_size: 411156307
- config_name: subset_51
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 418002894
num_examples: 1442
download_size: 64918337
dataset_size: 418002894
- config_name: subset_52
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 430178821
num_examples: 1474
download_size: 66714075
dataset_size: 430178821
- config_name: subset_53
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 425208061
num_examples: 1430
download_size: 65944614
dataset_size: 425208061
- config_name: subset_54
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 429891483
num_examples: 1466
download_size: 66726109
dataset_size: 429891483
- config_name: subset_55
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 430685991
num_examples: 1443
download_size: 66781706
dataset_size: 430685991
- config_name: subset_56
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 423657820
num_examples: 1428
download_size: 65782930
dataset_size: 423657820
- config_name: subset_57
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 436940893
num_examples: 1459
download_size: 67763646
dataset_size: 436940893
- config_name: subset_58
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 448301159
num_examples: 1500
download_size: 69468596
dataset_size: 448301159
- config_name: subset_59
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 421238312
num_examples: 1459
download_size: 65332628
dataset_size: 421238312
- config_name: subset_6
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 360188100
num_examples: 1128
download_size: 55742822
dataset_size: 360188100
- config_name: subset_60
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 439858839
num_examples: 1457
download_size: 68238426
dataset_size: 439858839
- config_name: subset_61
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 436845915
num_examples: 1456
download_size: 67787956
dataset_size: 436845915
- config_name: subset_62
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 425735185
num_examples: 1430
download_size: 66050425
dataset_size: 425735185
- config_name: subset_63
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 414013616
num_examples: 1376
download_size: 64286786
dataset_size: 414013616
- config_name: subset_64
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 422138858
num_examples: 1433
download_size: 65475401
dataset_size: 422138858
- config_name: subset_65
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 437318242
num_examples: 1450
download_size: 67867689
dataset_size: 437318242
- config_name: subset_66
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 443962279
num_examples: 1454
download_size: 68886689
dataset_size: 443962279
- config_name: subset_67
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 443943856
num_examples: 1473
download_size: 68885670
dataset_size: 443943856
- config_name: subset_68
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 431655225
num_examples: 1465
download_size: 66997688
dataset_size: 431655225
- config_name: subset_69
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 437616412
num_examples: 1462
download_size: 67927772
dataset_size: 437616412
- config_name: subset_7
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 336105400
num_examples: 1091
download_size: 52098760
dataset_size: 336105400
- config_name: subset_70
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 439351398
num_examples: 1488
download_size: 68157743
dataset_size: 439351398
- config_name: subset_71
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 439992625
num_examples: 1475
download_size: 68229834
dataset_size: 439992625
- config_name: subset_72
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 449162165
num_examples: 1498
download_size: 69665403
dataset_size: 449162165
- config_name: subset_73
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 451334428
num_examples: 1531
download_size: 70062622
dataset_size: 451334428
- config_name: subset_74
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 463231411
num_examples: 1545
download_size: 71883946
dataset_size: 463231411
- config_name: subset_75
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 460779098
num_examples: 1528
download_size: 71461779
dataset_size: 460779098
- config_name: subset_76
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 440438431
num_examples: 1485
download_size: 68309122
dataset_size: 440438431
- config_name: subset_77
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 449010799
num_examples: 1481
download_size: 69697282
dataset_size: 449010799
- config_name: subset_78
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 429070498
num_examples: 1404
download_size: 66538148
dataset_size: 429070498
- config_name: subset_79
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 441313788
num_examples: 1474
download_size: 68412582
dataset_size: 441313788
- config_name: subset_8
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 325511104
num_examples: 1060
download_size: 50468177
dataset_size: 325511104
- config_name: subset_80
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 452555436
num_examples: 1489
download_size: 70170748
dataset_size: 452555436
- config_name: subset_81
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 456556962
num_examples: 1488
download_size: 70850932
dataset_size: 456556962
- config_name: subset_82
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 470716462
num_examples: 1509
download_size: 73095226
dataset_size: 470716462
- config_name: subset_83
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 452725212
num_examples: 1502
download_size: 70220646
dataset_size: 452725212
- config_name: subset_84
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 466330973
num_examples: 1519
download_size: 72309940
dataset_size: 466330973
- config_name: subset_85
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 455717576
num_examples: 1492
download_size: 70670347
dataset_size: 455717576
- config_name: subset_86
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 484842394
num_examples: 1548
download_size: 75265453
dataset_size: 484842394
- config_name: subset_87
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 469462516
num_examples: 1537
download_size: 72871241
dataset_size: 469462516
- config_name: subset_88
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 476573689
num_examples: 1543
download_size: 73942643
dataset_size: 476573689
- config_name: subset_89
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 471075098
num_examples: 1525
download_size: 73045806
dataset_size: 471075098
- config_name: subset_9
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 312775583
num_examples: 1037
download_size: 48440834
dataset_size: 312775583
- config_name: subset_90
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 462068563
num_examples: 1500
download_size: 71546400
dataset_size: 462068563
- config_name: subset_91
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 459606990
num_examples: 1517
download_size: 71355419
dataset_size: 459606990
- config_name: subset_92
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 459953106
num_examples: 1508
download_size: 71319405
dataset_size: 459953106
- config_name: subset_93
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 467763920
num_examples: 1539
download_size: 72558301
dataset_size: 467763920
- config_name: subset_94
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 489179691
num_examples: 1559
download_size: 75877823
dataset_size: 489179691
- config_name: subset_95
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 462891254
num_examples: 1515
download_size: 71864386
dataset_size: 462891254
- config_name: subset_96
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 478543307
num_examples: 1533
download_size: 74239554
dataset_size: 478543307
- config_name: subset_97
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: viA.audio.tokens
sequence:
sequence: int64
- name: enA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 478304615
num_examples: 1560
download_size: 74186188
dataset_size: 478304615
- config_name: subset_98
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 464417057
num_examples: 1522
download_size: 72057917
dataset_size: 464417057
- config_name: subset_99
features:
- name: line_no
dtype: int64
- name: enA.id
dtype: string
- name: enA.laser_score
dtype: float64
- name: viA.id
dtype: string
- name: viA.laser_score
dtype: float64
- name: enA.audio.tokens
sequence:
sequence: int64
- name: viA.audio.tokens
sequence:
sequence: int64
splits:
- name: train
num_bytes: 489372247
num_examples: 1563
download_size: 75909159
dataset_size: 489372247
configs:
- config_name: subset_1
data_files:
- split: train
path: subset_1/train-*
- config_name: subset_10
data_files:
- split: train
path: subset_10/train-*
- config_name: subset_100
data_files:
- split: train
path: subset_100/train-*
- config_name: subset_101
data_files:
- split: train
path: subset_101/train-*
- config_name: subset_102
data_files:
- split: train
path: subset_102/train-*
- config_name: subset_103
data_files:
- split: train
path: subset_103/train-*
- config_name: subset_104
data_files:
- split: train
path: subset_104/train-*
- config_name: subset_105
data_files:
- split: train
path: subset_105/train-*
- config_name: subset_106
data_files:
- split: train
path: subset_106/train-*
- config_name: subset_107
data_files:
- split: train
path: subset_107/train-*
- config_name: subset_108
data_files:
- split: train
path: subset_108/train-*
- config_name: subset_109
data_files:
- split: train
path: subset_109/train-*
- config_name: subset_11
data_files:
- split: train
path: subset_11/train-*
- config_name: subset_110
data_files:
- split: train
path: subset_110/train-*
- config_name: subset_111
data_files:
- split: train
path: subset_111/train-*
- config_name: subset_112
data_files:
- split: train
path: subset_112/train-*
- config_name: subset_113
data_files:
- split: train
path: subset_113/train-*
- config_name: subset_114
data_files:
- split: train
path: subset_114/train-*
- config_name: subset_115
data_files:
- split: train
path: subset_115/train-*
- config_name: subset_116
data_files:
- split: train
path: subset_116/train-*
- config_name: subset_117
data_files:
- split: train
path: subset_117/train-*
- config_name: subset_118
data_files:
- split: train
path: subset_118/train-*
- config_name: subset_119
data_files:
- split: train
path: subset_119/train-*
- config_name: subset_12
data_files:
- split: train
path: subset_12/train-*
- config_name: subset_120
data_files:
- split: train
path: subset_120/train-*
- config_name: subset_121
data_files:
- split: train
path: subset_121/train-*
- config_name: subset_122
data_files:
- split: train
path: subset_122/train-*
- config_name: subset_123
data_files:
- split: train
path: subset_123/train-*
- config_name: subset_124
data_files:
- split: train
path: subset_124/train-*
- config_name: subset_125
data_files:
- split: train
path: subset_125/train-*
- config_name: subset_126
data_files:
- split: train
path: subset_126/train-*
- config_name: subset_127
data_files:
- split: train
path: subset_127/train-*
- config_name: subset_128
data_files:
- split: train
path: subset_128/train-*
- config_name: subset_129
data_files:
- split: train
path: subset_129/train-*
- config_name: subset_13
data_files:
- split: train
path: subset_13/train-*
- config_name: subset_130
data_files:
- split: train
path: subset_130/train-*
- config_name: subset_131
data_files:
- split: train
path: subset_131/train-*
- config_name: subset_132
data_files:
- split: train
path: subset_132/train-*
- config_name: subset_133
data_files:
- split: train
path: subset_133/train-*
- config_name: subset_134
data_files:
- split: train
path: subset_134/train-*
- config_name: subset_135
data_files:
- split: train
path: subset_135/train-*
- config_name: subset_136
data_files:
- split: train
path: subset_136/train-*
- config_name: subset_137
data_files:
- split: train
path: subset_137/train-*
- config_name: subset_138
data_files:
- split: train
path: subset_138/train-*
- config_name: subset_139
data_files:
- split: train
path: subset_139/train-*
- config_name: subset_14
data_files:
- split: train
path: subset_14/train-*
- config_name: subset_140
data_files:
- split: train
path: subset_140/train-*
- config_name: subset_141
data_files:
- split: train
path: subset_141/train-*
- config_name: subset_142
data_files:
- split: train
path: subset_142/train-*
- config_name: subset_143
data_files:
- split: train
path: subset_143/train-*
- config_name: subset_144
data_files:
- split: train
path: subset_144/train-*
- config_name: subset_145
data_files:
- split: train
path: subset_145/train-*
- config_name: subset_146
data_files:
- split: train
path: subset_146/train-*
- config_name: subset_147
data_files:
- split: train
path: subset_147/train-*
- config_name: subset_148
data_files:
- split: train
path: subset_148/train-*
- config_name: subset_149
data_files:
- split: train
path: subset_149/train-*
- config_name: subset_15
data_files:
- split: train
path: subset_15/train-*
- config_name: subset_16
data_files:
- split: train
path: subset_16/train-*
- config_name: subset_17
data_files:
- split: train
path: subset_17/train-*
- config_name: subset_18
data_files:
- split: train
path: subset_18/train-*
- config_name: subset_19
data_files:
- split: train
path: subset_19/train-*
- config_name: subset_2
data_files:
- split: train
path: subset_2/train-*
- config_name: subset_20
data_files:
- split: train
path: subset_20/train-*
- config_name: subset_21
data_files:
- split: train
path: subset_21/train-*
- config_name: subset_22
data_files:
- split: train
path: subset_22/train-*
- config_name: subset_23
data_files:
- split: train
path: subset_23/train-*
- config_name: subset_24
data_files:
- split: train
path: subset_24/train-*
- config_name: subset_25
data_files:
- split: train
path: subset_25/train-*
- config_name: subset_26
data_files:
- split: train
path: subset_26/train-*
- config_name: subset_27
data_files:
- split: train
path: subset_27/train-*
- config_name: subset_28
data_files:
- split: train
path: subset_28/train-*
- config_name: subset_29
data_files:
- split: train
path: subset_29/train-*
- config_name: subset_3
data_files:
- split: train
path: subset_3/train-*
- config_name: subset_30
data_files:
- split: train
path: subset_30/train-*
- config_name: subset_31
data_files:
- split: train
path: subset_31/train-*
- config_name: subset_32
data_files:
- split: train
path: subset_32/train-*
- config_name: subset_33
data_files:
- split: train
path: subset_33/train-*
- config_name: subset_34
data_files:
- split: train
path: subset_34/train-*
- config_name: subset_35
data_files:
- split: train
path: subset_35/train-*
- config_name: subset_36
data_files:
- split: train
path: subset_36/train-*
- config_name: subset_37
data_files:
- split: train
path: subset_37/train-*
- config_name: subset_38
data_files:
- split: train
path: subset_38/train-*
- config_name: subset_39
data_files:
- split: train
path: subset_39/train-*
- config_name: subset_4
data_files:
- split: train
path: subset_4/train-*
- config_name: subset_40
data_files:
- split: train
path: subset_40/train-*
- config_name: subset_41
data_files:
- split: train
path: subset_41/train-*
- config_name: subset_42
data_files:
- split: train
path: subset_42/train-*
- config_name: subset_43
data_files:
- split: train
path: subset_43/train-*
- config_name: subset_44
data_files:
- split: train
path: subset_44/train-*
- config_name: subset_45
data_files:
- split: train
path: subset_45/train-*
- config_name: subset_46
data_files:
- split: train
path: subset_46/train-*
- config_name: subset_47
data_files:
- split: train
path: subset_47/train-*
- config_name: subset_48
data_files:
- split: train
path: subset_48/train-*
- config_name: subset_49
data_files:
- split: train
path: subset_49/train-*
- config_name: subset_5
data_files:
- split: train
path: subset_5/train-*
- config_name: subset_50
data_files:
- split: train
path: subset_50/train-*
- config_name: subset_51
data_files:
- split: train
path: subset_51/train-*
- config_name: subset_52
data_files:
- split: train
path: subset_52/train-*
- config_name: subset_53
data_files:
- split: train
path: subset_53/train-*
- config_name: subset_54
data_files:
- split: train
path: subset_54/train-*
- config_name: subset_55
data_files:
- split: train
path: subset_55/train-*
- config_name: subset_56
data_files:
- split: train
path: subset_56/train-*
- config_name: subset_57
data_files:
- split: train
path: subset_57/train-*
- config_name: subset_58
data_files:
- split: train
path: subset_58/train-*
- config_name: subset_59
data_files:
- split: train
path: subset_59/train-*
- config_name: subset_6
data_files:
- split: train
path: subset_6/train-*
- config_name: subset_60
data_files:
- split: train
path: subset_60/train-*
- config_name: subset_61
data_files:
- split: train
path: subset_61/train-*
- config_name: subset_62
data_files:
- split: train
path: subset_62/train-*
- config_name: subset_63
data_files:
- split: train
path: subset_63/train-*
- config_name: subset_64
data_files:
- split: train
path: subset_64/train-*
- config_name: subset_65
data_files:
- split: train
path: subset_65/train-*
- config_name: subset_66
data_files:
- split: train
path: subset_66/train-*
- config_name: subset_67
data_files:
- split: train
path: subset_67/train-*
- config_name: subset_68
data_files:
- split: train
path: subset_68/train-*
- config_name: subset_69
data_files:
- split: train
path: subset_69/train-*
- config_name: subset_7
data_files:
- split: train
path: subset_7/train-*
- config_name: subset_70
data_files:
- split: train
path: subset_70/train-*
- config_name: subset_71
data_files:
- split: train
path: subset_71/train-*
- config_name: subset_72
data_files:
- split: train
path: subset_72/train-*
- config_name: subset_73
data_files:
- split: train
path: subset_73/train-*
- config_name: subset_74
data_files:
- split: train
path: subset_74/train-*
- config_name: subset_75
data_files:
- split: train
path: subset_75/train-*
- config_name: subset_76
data_files:
- split: train
path: subset_76/train-*
- config_name: subset_77
data_files:
- split: train
path: subset_77/train-*
- config_name: subset_78
data_files:
- split: train
path: subset_78/train-*
- config_name: subset_79
data_files:
- split: train
path: subset_79/train-*
- config_name: subset_8
data_files:
- split: train
path: subset_8/train-*
- config_name: subset_80
data_files:
- split: train
path: subset_80/train-*
- config_name: subset_81
data_files:
- split: train
path: subset_81/train-*
- config_name: subset_82
data_files:
- split: train
path: subset_82/train-*
- config_name: subset_83
data_files:
- split: train
path: subset_83/train-*
- config_name: subset_84
data_files:
- split: train
path: subset_84/train-*
- config_name: subset_85
data_files:
- split: train
path: subset_85/train-*
- config_name: subset_86
data_files:
- split: train
path: subset_86/train-*
- config_name: subset_87
data_files:
- split: train
path: subset_87/train-*
- config_name: subset_88
data_files:
- split: train
path: subset_88/train-*
- config_name: subset_89
data_files:
- split: train
path: subset_89/train-*
- config_name: subset_9
data_files:
- split: train
path: subset_9/train-*
- config_name: subset_90
data_files:
- split: train
path: subset_90/train-*
- config_name: subset_91
data_files:
- split: train
path: subset_91/train-*
- config_name: subset_92
data_files:
- split: train
path: subset_92/train-*
- config_name: subset_93
data_files:
- split: train
path: subset_93/train-*
- config_name: subset_94
data_files:
- split: train
path: subset_94/train-*
- config_name: subset_95
data_files:
- split: train
path: subset_95/train-*
- config_name: subset_96
data_files:
- split: train
path: subset_96/train-*
- config_name: subset_97
data_files:
- split: train
path: subset_97/train-*
- config_name: subset_98
data_files:
- split: train
path: subset_98/train-*
- config_name: subset_99
data_files:
- split: train
path: subset_99/train-*
---
The dataset consists of multiple subsets, each with the same feature structure including line number, English and Vietnamese IDs, LASER scores, and audio tokens. Each subset has only one training split, providing the size of the data and the number of examples.
提供机构:
asahi417
原始信息汇总
数据集概述
数据集配置名称:subset_1
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- viA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割信息:
- train: 1853 examples, 657972851 bytes
- download_size: 102026304 bytes
- dataset_size: 657972851 bytes
数据集配置名称:subset_10
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- viA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割信息:
- train: 1090 examples, 333116252 bytes
- download_size: 51419545 bytes
- dataset_size: 333116252 bytes
数据集配置名称:subset_100
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- viA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割信息:
- train: 1574 examples, 491581650 bytes
- download_size: 76262709 bytes
- dataset_size: 491581650 bytes
数据集配置名称:subset_101
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- enA.audio.tokens: sequence of int64
- viA.audio.tokens: sequence of int64
- 分割信息:
- train: 1595 examples, 494927539 bytes
- download_size: 76800440 bytes
- dataset_size: 494927539 bytes
数据集配置名称:subset_102
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- viA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割信息:
- train: 1592 examples, 500976567 bytes
- download_size: 77763129 bytes
- dataset_size: 500976567 bytes
数据集配置名称:subset_103
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- viA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割信息:
- train: 1560 examples, 491949198 bytes
- download_size: 76280326 bytes
- dataset_size: 491949198 bytes
数据集配置名称:subset_104
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- enA.audio.tokens: sequence of int64
- viA.audio.tokens: sequence of int64
- 分割信息:
- train: 1519 examples, 472661087 bytes
- download_size: 73378567 bytes
- dataset_size: 472661087 bytes
数据集配置名称:subset_105
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- viA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割信息:
- train: 1570 examples, 488327829 bytes
- download_size: 75678039 bytes
- dataset_size: 488327829 bytes
数据集配置名称:subset_106
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- viA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割信息:
- train: 1565 examples, 486696477 bytes
- download_size: 75520885 bytes
- dataset_size: 486696477 bytes
数据集配置名称:subset_107
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- enA.audio.tokens: sequence of int64
- viA.audio.tokens: sequence of int64
- 分割信息:
- train: 1569 examples, 497913216 bytes
- download_size: 77283571 bytes
- dataset_size: 497913216 bytes
数据集配置名称:subset_108
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- enA.audio.tokens: sequence of int64
- viA.audio.tokens: sequence of int64
- 分割信息:
- train: 1589 examples, 494871783 bytes
- download_size: 76786138 bytes
- dataset_size: 494871783 bytes
数据集配置名称:subset_109
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- enA.audio.tokens: sequence of int64
- viA.audio.tokens: sequence of int64
- 分割信息:
- train: 1584 examples, 501611035 bytes
- download_size: 77863224 bytes
- dataset_size: 501611035 bytes
数据集配置名称:subset_11
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- viA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割信息:
- train: 1146 examples, 353248632 bytes
- download_size: 54703969 bytes
- dataset_size: 353248632 bytes
数据集配置名称:subset_110
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- viA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割信息:
- train: 1528 examples, 468263236 bytes
- download_size: 72661963 bytes
- dataset_size: 468263236 bytes
数据集配置名称:subset_111
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- viA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割信息:
- train: 1509 examples, 477188948 bytes
- download_size: 74099020 bytes
- dataset_size: 477188948 bytes
数据集配置名称:subset_112
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- enA.audio.tokens: sequence of int64
- viA.audio.tokens: sequence of int64
- 分割信息:
- train: 1538 examples, 482364297 bytes
- download_size: 74810446 bytes
- dataset_size: 482364297 bytes
数据集配置名称:subset_113
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- viA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割信息:
- train: 1546 examples, 492600066 bytes
- download_size: 76430855 bytes
- dataset_size: 492600066 bytes
数据集配置名称:subset_114
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- viA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割信息:
- train: 1533 examples, 483990773 bytes
- download_size: 75140277 bytes
- dataset_size: 483990773 bytes
数据集配置名称:subset_115
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- viA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割信息:
- train: 1553 examples, 493183494 bytes
- download_size: 76448618 bytes
- dataset_size: 493183494 bytes
数据集配置名称:subset_116
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- viA.audio.tokens: sequence of int64
- enA.audio.tokens: sequence of int64
- 分割信息:
- train: 1544 examples, 506605142 bytes
- download_size: 78557575 bytes
- dataset_size: 506605142 bytes
数据集配置名称:subset_117
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- enA.audio.tokens: sequence of int64
- viA.audio.tokens: sequence of int64
- 分割信息:
- train: 1572 examples, 488031035 bytes
- download_size: 75713406 bytes
- dataset_size: 488031035 bytes
数据集配置名称:subset_118
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- enA.audio.tokens: sequence of int64
- viA.audio.tokens: sequence of int64
- 分割信息:
- train: 1549 examples, 497798416 bytes
- download_size: 77159963 bytes
- dataset_size: 497798416 bytes
数据集配置名称:subset_119
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- enA.audio.tokens: sequence of int64
- viA.audio.tokens: sequence of int64
- 分割信息:
- train: 1589 examples, 495877544 bytes
- download_size: 76866589 bytes
- dataset_size: 495877544 bytes
数据集配置名称:subset_12
- 特征信息:
- line_no: int64
- enA.id: string
- enA.laser_score: float64
- viA.id: string
- viA.laser_score: float64
- enA.audio.tokens: sequence of int64
- viA.audio.tokens: sequence of int64
- 分割信息:
- train: 1109 examples, 332998283 bytes



