five

asahi417/seamless-align-enA-frA.tokenized

收藏
Hugging Face2024-06-10 更新2024-06-12 收录
下载链接:
https://hf-mirror.com/datasets/asahi417/seamless-align-enA-frA.tokenized
下载链接
链接失效反馈
官方服务:
资源简介:
--- dataset_info: - config_name: subset_1 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 896960282 num_examples: 2344 download_size: 138828871 dataset_size: 896960282 - config_name: subset_10 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 849195322 num_examples: 2336 download_size: 131866595 dataset_size: 849195322 - config_name: subset_100 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 784236275 num_examples: 2326 download_size: 121581663 dataset_size: 784236275 - config_name: subset_101 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 778808804 num_examples: 2335 download_size: 120877210 dataset_size: 778808804 - config_name: subset_102 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 772584121 num_examples: 2306 download_size: 119739300 dataset_size: 772584121 - config_name: subset_103 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 783315490 num_examples: 2335 download_size: 121528799 dataset_size: 783315490 - config_name: subset_104 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 774101833 num_examples: 2330 download_size: 119997337 dataset_size: 774101833 - config_name: subset_105 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 782038760 num_examples: 2331 download_size: 121205066 dataset_size: 782038760 - config_name: subset_11 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 836642733 num_examples: 2315 download_size: 129888698 dataset_size: 836642733 - config_name: subset_12 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 866050440 num_examples: 2349 download_size: 134452465 dataset_size: 866050440 - config_name: subset_13 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 859275177 num_examples: 2341 download_size: 133374434 dataset_size: 859275177 - config_name: subset_14 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 854462165 num_examples: 2338 download_size: 132567966 dataset_size: 854462165 - config_name: subset_15 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 846548429 num_examples: 2363 download_size: 131471430 dataset_size: 846548429 - config_name: subset_16 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 849723661 num_examples: 2348 download_size: 131823703 dataset_size: 849723661 - config_name: subset_17 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 847314332 num_examples: 2328 download_size: 131528731 dataset_size: 847314332 - config_name: subset_18 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 856549312 num_examples: 2350 download_size: 132883574 dataset_size: 856549312 - config_name: subset_19 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 849231130 num_examples: 2348 download_size: 131806664 dataset_size: 849231130 - config_name: subset_2 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 885549898 num_examples: 2364 download_size: 137223195 dataset_size: 885549898 - config_name: subset_20 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 829390963 num_examples: 2346 download_size: 128708237 dataset_size: 829390963 - config_name: subset_21 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 841492981 num_examples: 2335 download_size: 130599466 dataset_size: 841492981 - config_name: subset_22 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 839566978 num_examples: 2331 download_size: 130433057 dataset_size: 839566978 - config_name: subset_23 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 831309666 num_examples: 2337 download_size: 129065953 dataset_size: 831309666 - config_name: subset_24 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 843207728 num_examples: 2333 download_size: 130820123 dataset_size: 843207728 - config_name: subset_25 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 826543551 num_examples: 2319 download_size: 128228152 dataset_size: 826543551 - config_name: subset_26 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 856166635 num_examples: 2338 download_size: 132709958 dataset_size: 856166635 - config_name: subset_27 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 835216842 num_examples: 2319 download_size: 129640044 dataset_size: 835216842 - config_name: subset_28 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 839904123 num_examples: 2334 download_size: 130295941 dataset_size: 839904123 - config_name: subset_29 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 787229439 num_examples: 2182 download_size: 122258606 dataset_size: 787229439 - config_name: subset_3 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 877322523 num_examples: 2354 download_size: 136064499 dataset_size: 877322523 - config_name: subset_30 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 731200910 num_examples: 2049 download_size: 113489889 dataset_size: 731200910 - config_name: subset_300 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 615595453 num_examples: 2112 download_size: 95508534 dataset_size: 615595453 - config_name: subset_301 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 618759301 num_examples: 2080 download_size: 95997104 dataset_size: 618759301 - config_name: subset_302 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 632889940 num_examples: 2108 download_size: 98160959 dataset_size: 632889940 - config_name: subset_303 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 609269662 num_examples: 2115 download_size: 94590996 dataset_size: 609269662 - config_name: subset_304 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 621920484 num_examples: 2109 download_size: 96549400 dataset_size: 621920484 - config_name: subset_305 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 621509233 num_examples: 2108 download_size: 96426327 dataset_size: 621509233 - config_name: subset_306 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 611209447 num_examples: 2090 download_size: 94887257 dataset_size: 611209447 - config_name: subset_307 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 611166497 num_examples: 2066 download_size: 94827583 dataset_size: 611166497 - config_name: subset_308 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 609561455 num_examples: 2073 download_size: 94558889 dataset_size: 609561455 - config_name: subset_309 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 614925629 num_examples: 2076 download_size: 95314537 dataset_size: 614925629 - config_name: subset_31 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 837041227 num_examples: 2343 download_size: 129841147 dataset_size: 837041227 - config_name: subset_310 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 618940832 num_examples: 2093 download_size: 96118544 dataset_size: 618940832 - config_name: subset_311 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 617176010 num_examples: 2080 download_size: 95799437 dataset_size: 617176010 - config_name: subset_312 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 620188688 num_examples: 2107 download_size: 96229842 dataset_size: 620188688 - config_name: subset_313 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 618673481 num_examples: 2101 download_size: 95998837 dataset_size: 618673481 - config_name: subset_314 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 598479264 num_examples: 2077 download_size: 92884108 dataset_size: 598479264 - config_name: subset_315 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 621758695 num_examples: 2100 download_size: 96437924 dataset_size: 621758695 - config_name: subset_316 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 607616749 num_examples: 2069 download_size: 94309186 dataset_size: 607616749 - config_name: subset_317 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 604649925 num_examples: 2062 download_size: 93843165 dataset_size: 604649925 - config_name: subset_318 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 590823189 num_examples: 2072 download_size: 91774830 dataset_size: 590823189 - config_name: subset_319 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 603460812 num_examples: 2081 download_size: 93693912 dataset_size: 603460812 - config_name: subset_32 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 833327844 num_examples: 2316 download_size: 129321785 dataset_size: 833327844 - config_name: subset_320 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 597239908 num_examples: 2055 download_size: 92548238 dataset_size: 597239908 - config_name: subset_321 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 600287614 num_examples: 2077 download_size: 93139913 dataset_size: 600287614 - config_name: subset_322 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 600033353 num_examples: 2070 download_size: 93107602 dataset_size: 600033353 - config_name: subset_323 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 597583948 num_examples: 2092 download_size: 92791855 dataset_size: 597583948 - config_name: subset_324 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 584192513 num_examples: 2031 download_size: 90606221 dataset_size: 584192513 - config_name: subset_325 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 598289672 num_examples: 2066 download_size: 92880431 dataset_size: 598289672 - config_name: subset_326 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 577370669 num_examples: 2029 download_size: 89634416 dataset_size: 577370669 - config_name: subset_327 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 600437626 num_examples: 2067 download_size: 93149645 dataset_size: 600437626 - config_name: subset_328 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 606382681 num_examples: 2098 download_size: 94050079 dataset_size: 606382681 - config_name: subset_329 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 495852006 num_examples: 1708 download_size: 76933542 dataset_size: 495852006 - config_name: subset_33 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 821475954 num_examples: 2324 download_size: 127495781 dataset_size: 821475954 - config_name: subset_330 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 590256104 num_examples: 2031 download_size: 91622869 dataset_size: 590256104 - config_name: subset_331 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 585303005 num_examples: 2046 download_size: 90758179 dataset_size: 585303005 - config_name: subset_332 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 601650721 num_examples: 2029 download_size: 93454688 dataset_size: 601650721 - config_name: subset_333 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 595456369 num_examples: 2042 download_size: 92340360 dataset_size: 595456369 - config_name: subset_334 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 596628853 num_examples: 2023 download_size: 92541068 dataset_size: 596628853 - config_name: subset_335 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 565330087 num_examples: 2018 download_size: 87753330 dataset_size: 565330087 - config_name: subset_336 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 587826604 num_examples: 2030 download_size: 91163467 dataset_size: 587826604 - config_name: subset_337 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 577560638 num_examples: 2029 download_size: 89721302 dataset_size: 577560638 - config_name: subset_338 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 583490791 num_examples: 2031 download_size: 90592272 dataset_size: 583490791 - config_name: subset_339 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 583317641 num_examples: 2042 download_size: 90484207 dataset_size: 583317641 - config_name: subset_34 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 833704005 num_examples: 2324 download_size: 129374458 dataset_size: 833704005 - config_name: subset_340 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 602220740 num_examples: 2071 download_size: 93412625 dataset_size: 602220740 - config_name: subset_341 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 602556715 num_examples: 2076 download_size: 93535274 dataset_size: 602556715 - config_name: subset_342 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 579025698 num_examples: 2032 download_size: 89878478 dataset_size: 579025698 - config_name: subset_343 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 582492962 num_examples: 2041 download_size: 90324037 dataset_size: 582492962 - config_name: subset_344 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 566346868 num_examples: 2014 download_size: 87993067 dataset_size: 566346868 - config_name: subset_345 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 578773526 num_examples: 2012 download_size: 89815165 dataset_size: 578773526 - config_name: subset_346 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 573559888 num_examples: 2007 download_size: 89084562 dataset_size: 573559888 - config_name: subset_347 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 578304027 num_examples: 2006 download_size: 89698009 dataset_size: 578304027 - config_name: subset_348 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 567698512 num_examples: 1985 download_size: 88108902 dataset_size: 567698512 - config_name: subset_349 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 580030047 num_examples: 2014 download_size: 90068855 dataset_size: 580030047 - config_name: subset_35 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 825705783 num_examples: 2342 download_size: 128114585 dataset_size: 825705783 - config_name: subset_350 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 558307377 num_examples: 1972 download_size: 86661716 dataset_size: 558307377 - config_name: subset_351 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 545101041 num_examples: 1931 download_size: 84617032 dataset_size: 545101041 - config_name: subset_352 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 576856641 num_examples: 2023 download_size: 89628183 dataset_size: 576856641 - config_name: subset_353 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 552918104 num_examples: 1973 download_size: 85742465 dataset_size: 552918104 - config_name: subset_354 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 567033343 num_examples: 2003 download_size: 88043567 dataset_size: 567033343 - config_name: subset_355 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 562524372 num_examples: 1974 download_size: 87328549 dataset_size: 562524372 - config_name: subset_356 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 567755097 num_examples: 2016 download_size: 88214858 dataset_size: 567755097 - config_name: subset_357 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 579880001 num_examples: 2016 download_size: 89978335 dataset_size: 579880001 - config_name: subset_358 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 565013465 num_examples: 1965 download_size: 87721360 dataset_size: 565013465 - config_name: subset_359 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 557687432 num_examples: 1989 download_size: 86538393 dataset_size: 557687432 - config_name: subset_36 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 821517839 num_examples: 2333 download_size: 127455481 dataset_size: 821517839 - config_name: subset_360 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 553125613 num_examples: 1984 download_size: 85853632 dataset_size: 553125613 - config_name: subset_361 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 568561423 num_examples: 2012 download_size: 88295647 dataset_size: 568561423 - config_name: subset_362 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 558296420 num_examples: 2004 download_size: 86750116 dataset_size: 558296420 - config_name: subset_363 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 565321555 num_examples: 2010 download_size: 87713995 dataset_size: 565321555 - config_name: subset_364 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 550709975 num_examples: 1971 download_size: 85413015 dataset_size: 550709975 - config_name: subset_365 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 560606119 num_examples: 2043 download_size: 87020174 dataset_size: 560606119 - config_name: subset_366 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 568384430 num_examples: 2005 download_size: 88240157 dataset_size: 568384430 - config_name: subset_368 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 548264320 num_examples: 1936 download_size: 85126656 dataset_size: 548264320 - config_name: subset_369 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 232768298 num_examples: 897 download_size: 36210423 dataset_size: 232768298 - config_name: subset_37 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 812589184 num_examples: 2315 download_size: 126149166 dataset_size: 812589184 - config_name: subset_370 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 541413568 num_examples: 1968 download_size: 84057142 dataset_size: 541413568 - config_name: subset_371 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 557115604 num_examples: 1962 download_size: 86562493 dataset_size: 557115604 - config_name: subset_372 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 554395090 num_examples: 1977 download_size: 86025480 dataset_size: 554395090 - config_name: subset_373 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 567127170 num_examples: 1989 download_size: 88099164 dataset_size: 567127170 - config_name: subset_374 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 554364830 num_examples: 2015 download_size: 86222175 dataset_size: 554364830 - config_name: subset_375 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 556663368 num_examples: 1960 download_size: 86368984 dataset_size: 556663368 - config_name: subset_376 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 539178775 num_examples: 1950 download_size: 83571243 dataset_size: 539178775 - config_name: subset_377 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 570851414 num_examples: 2030 download_size: 88605831 dataset_size: 570851414 - config_name: subset_378 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 556820943 num_examples: 1990 download_size: 86487994 dataset_size: 556820943 - config_name: subset_379 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 536625666 num_examples: 1963 download_size: 83287808 dataset_size: 536625666 - config_name: subset_38 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 829139016 num_examples: 2333 download_size: 128679026 dataset_size: 829139016 - config_name: subset_380 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 544162284 num_examples: 1978 download_size: 84530245 dataset_size: 544162284 - config_name: subset_39 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 820990456 num_examples: 2326 download_size: 127321393 dataset_size: 820990456 - config_name: subset_399 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 526772422 num_examples: 1921 download_size: 81778218 dataset_size: 526772422 - config_name: subset_4 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 864639230 num_examples: 2335 download_size: 134197917 dataset_size: 864639230 - config_name: subset_40 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 826303157 num_examples: 2338 download_size: 128242803 dataset_size: 826303157 - config_name: subset_400 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 526371492 num_examples: 1911 download_size: 81778986 dataset_size: 526371492 - config_name: subset_401 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 522407219 num_examples: 1953 download_size: 81189473 dataset_size: 522407219 - config_name: subset_402 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 521443650 num_examples: 1919 download_size: 80996621 dataset_size: 521443650 - config_name: subset_403 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 518409867 num_examples: 1939 download_size: 80531369 dataset_size: 518409867 - config_name: subset_404 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 513261105 num_examples: 1907 download_size: 79714635 dataset_size: 513261105 - config_name: subset_405 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 511509098 num_examples: 1923 download_size: 79341298 dataset_size: 511509098 - config_name: subset_406 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 536843486 num_examples: 1946 download_size: 83384329 dataset_size: 536843486 - config_name: subset_407 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 530216022 num_examples: 1957 download_size: 82296174 dataset_size: 530216022 - config_name: subset_408 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 531756687 num_examples: 1945 download_size: 82514018 dataset_size: 531756687 - config_name: subset_409 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 518561307 num_examples: 1922 download_size: 80599707 dataset_size: 518561307 - config_name: subset_41 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 816877471 num_examples: 2335 download_size: 126708091 dataset_size: 816877471 - config_name: subset_410 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 508740373 num_examples: 1906 download_size: 78969194 dataset_size: 508740373 - config_name: subset_411 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 522400050 num_examples: 1922 download_size: 81146748 dataset_size: 522400050 - config_name: subset_412 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 512421225 num_examples: 1920 download_size: 79557666 dataset_size: 512421225 - config_name: subset_413 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 521584119 num_examples: 1937 download_size: 80983283 dataset_size: 521584119 - config_name: subset_414 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 537900287 num_examples: 1972 download_size: 83587254 dataset_size: 537900287 - config_name: subset_415 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 519737252 num_examples: 1921 download_size: 80746916 dataset_size: 519737252 - config_name: subset_416 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 497694318 num_examples: 1876 download_size: 77313872 dataset_size: 497694318 - config_name: subset_417 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 519215825 num_examples: 1950 download_size: 80628898 dataset_size: 519215825 - config_name: subset_418 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 511717570 num_examples: 1892 download_size: 79512887 dataset_size: 511717570 - config_name: subset_419 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 531043725 num_examples: 1945 download_size: 82468881 dataset_size: 531043725 - config_name: subset_42 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 815869334 num_examples: 2322 download_size: 126553227 dataset_size: 815869334 - config_name: subset_420 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 520631918 num_examples: 1905 download_size: 80829772 dataset_size: 520631918 - config_name: subset_421 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 525932758 num_examples: 1928 download_size: 81648797 dataset_size: 525932758 - config_name: subset_422 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 519492770 num_examples: 1920 download_size: 80669724 dataset_size: 519492770 - config_name: subset_423 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 504774725 num_examples: 1866 download_size: 78400399 dataset_size: 504774725 - config_name: subset_424 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 506575267 num_examples: 1914 download_size: 78683868 dataset_size: 506575267 - config_name: subset_425 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 507984927 num_examples: 1894 download_size: 78902459 dataset_size: 507984927 - config_name: subset_43 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 821991670 num_examples: 2314 download_size: 127579521 dataset_size: 821991670 - config_name: subset_44 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 813794647 num_examples: 2343 download_size: 126265260 dataset_size: 813794647 - config_name: subset_45 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 814150214 num_examples: 2323 download_size: 126312319 dataset_size: 814150214 - config_name: subset_46 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 831736229 num_examples: 2328 download_size: 129074354 dataset_size: 831736229 - config_name: subset_47 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 804349610 num_examples: 2303 download_size: 124737087 dataset_size: 804349610 - config_name: subset_48 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 797151932 num_examples: 2269 download_size: 123520240 dataset_size: 797151932 - config_name: subset_49 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 794196758 num_examples: 2295 download_size: 123255723 dataset_size: 794196758 - config_name: subset_5 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 880665270 num_examples: 2369 download_size: 136657633 dataset_size: 880665270 - config_name: subset_50 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 805527014 num_examples: 2332 download_size: 124952694 dataset_size: 805527014 - config_name: subset_51 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 810987318 num_examples: 2320 download_size: 125909707 dataset_size: 810987318 - config_name: subset_52 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 804756878 num_examples: 2328 download_size: 124911962 dataset_size: 804756878 - config_name: subset_53 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 814579546 num_examples: 2346 download_size: 126323592 dataset_size: 814579546 - config_name: subset_54 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 817334379 num_examples: 2314 download_size: 126726543 dataset_size: 817334379 - config_name: subset_55 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 806167179 num_examples: 2340 download_size: 124962905 dataset_size: 806167179 - config_name: subset_56 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 788700818 num_examples: 2312 download_size: 122420697 dataset_size: 788700818 - config_name: subset_57 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 816783940 num_examples: 2309 download_size: 126678671 dataset_size: 816783940 - config_name: subset_58 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 802255501 num_examples: 2318 download_size: 124500843 dataset_size: 802255501 - config_name: subset_59 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 794304992 num_examples: 2332 download_size: 123265947 dataset_size: 794304992 - config_name: subset_6 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 870636443 num_examples: 2371 download_size: 135098487 dataset_size: 870636443 - config_name: subset_60 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 789133546 num_examples: 2319 download_size: 122322051 dataset_size: 789133546 - config_name: subset_61 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 797842879 num_examples: 2309 download_size: 123807907 dataset_size: 797842879 - config_name: subset_62 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 803617904 num_examples: 2334 download_size: 124639893 dataset_size: 803617904 - config_name: subset_63 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 576624503 num_examples: 1669 download_size: 89497145 dataset_size: 576624503 - config_name: subset_64 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 493957100 num_examples: 1451 download_size: 76672470 dataset_size: 493957100 - config_name: subset_65 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 809228985 num_examples: 2344 download_size: 125567690 dataset_size: 809228985 - config_name: subset_66 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 802463327 num_examples: 2324 download_size: 124524946 dataset_size: 802463327 - config_name: subset_67 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 809041834 num_examples: 2341 download_size: 125514106 dataset_size: 809041834 - config_name: subset_68 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 801443208 num_examples: 2331 download_size: 124409069 dataset_size: 801443208 - config_name: subset_69 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 767659950 num_examples: 2268 download_size: 119086299 dataset_size: 767659950 - config_name: subset_7 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 862300251 num_examples: 2336 download_size: 133899698 dataset_size: 862300251 - config_name: subset_70 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 807025850 num_examples: 2348 download_size: 125145079 dataset_size: 807025850 - config_name: subset_72 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 805465598 num_examples: 2344 download_size: 124948881 dataset_size: 805465598 - config_name: subset_8 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 860647946 num_examples: 2351 download_size: 133589761 dataset_size: 860647946 - config_name: subset_87 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 797836000 num_examples: 2335 download_size: 123676344 dataset_size: 797836000 - config_name: subset_88 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 790640388 num_examples: 2344 download_size: 122633634 dataset_size: 790640388 - config_name: subset_89 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 791670317 num_examples: 2317 download_size: 122761969 dataset_size: 791670317 - config_name: subset_9 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 853697723 num_examples: 2347 download_size: 132406781 dataset_size: 853697723 - config_name: subset_90 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 767339364 num_examples: 2330 download_size: 119048385 dataset_size: 767339364 - config_name: subset_91 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 803098340 num_examples: 2338 download_size: 124623706 dataset_size: 803098340 - config_name: subset_92 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 790881493 num_examples: 2341 download_size: 122681457 dataset_size: 790881493 - config_name: subset_93 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 792658076 num_examples: 2337 download_size: 122765555 dataset_size: 792658076 - config_name: subset_94 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 774818258 num_examples: 2319 download_size: 120167610 dataset_size: 774818258 - config_name: subset_95 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 773467274 num_examples: 2334 download_size: 119976106 dataset_size: 773467274 - config_name: subset_96 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 797247546 num_examples: 2314 download_size: 123609844 dataset_size: 797247546 - config_name: subset_97 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: enA.audio.tokens sequence: sequence: int64 - name: frA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 794004389 num_examples: 2334 download_size: 123150930 dataset_size: 794004389 - config_name: subset_98 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 783265503 num_examples: 2319 download_size: 121463499 dataset_size: 783265503 - config_name: subset_99 features: - name: line_no dtype: int64 - name: enA.id dtype: string - name: enA.laser_score dtype: float64 - name: frA.id dtype: string - name: frA.laser_score dtype: float64 - name: frA.audio.tokens sequence: sequence: int64 - name: enA.audio.tokens sequence: sequence: int64 splits: - name: train num_bytes: 776953555 num_examples: 2316 download_size: 120428249 dataset_size: 776953555 configs: - config_name: subset_1 data_files: - split: train path: subset_1/train-* - config_name: subset_10 data_files: - split: train path: subset_10/train-* - config_name: subset_100 data_files: - split: train path: subset_100/train-* - config_name: subset_101 data_files: - split: train path: subset_101/train-* - config_name: subset_102 data_files: - split: train path: subset_102/train-* - config_name: subset_103 data_files: - split: train path: subset_103/train-* - config_name: subset_104 data_files: - split: train path: subset_104/train-* - config_name: subset_105 data_files: - split: train path: subset_105/train-* - config_name: subset_11 data_files: - split: train path: subset_11/train-* - config_name: subset_12 data_files: - split: train path: subset_12/train-* - config_name: subset_13 data_files: - split: train path: subset_13/train-* - config_name: subset_14 data_files: - split: train path: subset_14/train-* - config_name: subset_15 data_files: - split: train path: subset_15/train-* - config_name: subset_16 data_files: - split: train path: subset_16/train-* - config_name: subset_17 data_files: - split: train path: subset_17/train-* - config_name: subset_18 data_files: - split: train path: subset_18/train-* - config_name: subset_19 data_files: - split: train path: subset_19/train-* - config_name: subset_2 data_files: - split: train path: subset_2/train-* - config_name: subset_20 data_files: - split: train path: subset_20/train-* - config_name: subset_21 data_files: - split: train path: subset_21/train-* - config_name: subset_22 data_files: - split: train path: subset_22/train-* - config_name: subset_23 data_files: - split: train path: subset_23/train-* - config_name: subset_24 data_files: - split: train path: subset_24/train-* - config_name: subset_25 data_files: - split: train path: subset_25/train-* - config_name: subset_26 data_files: - split: train path: subset_26/train-* - config_name: subset_27 data_files: - split: train path: subset_27/train-* - config_name: subset_28 data_files: - split: train path: subset_28/train-* - config_name: subset_29 data_files: - split: train path: subset_29/train-* - config_name: subset_3 data_files: - split: train path: subset_3/train-* - config_name: subset_30 data_files: - split: train path: subset_30/train-* - config_name: subset_300 data_files: - split: train path: subset_300/train-* - config_name: subset_301 data_files: - split: train path: subset_301/train-* - config_name: subset_302 data_files: - split: train path: subset_302/train-* - config_name: subset_303 data_files: - split: train path: subset_303/train-* - config_name: subset_304 data_files: - split: train path: subset_304/train-* - config_name: subset_305 data_files: - split: train path: subset_305/train-* - config_name: subset_306 data_files: - split: train path: subset_306/train-* - config_name: subset_307 data_files: - split: train path: subset_307/train-* - config_name: subset_308 data_files: - split: train path: subset_308/train-* - config_name: subset_309 data_files: - split: train path: subset_309/train-* - config_name: subset_31 data_files: - split: train path: subset_31/train-* - config_name: subset_310 data_files: - split: train path: subset_310/train-* - config_name: subset_311 data_files: - split: train path: subset_311/train-* - config_name: subset_312 data_files: - split: train path: subset_312/train-* - config_name: subset_313 data_files: - split: train path: subset_313/train-* - config_name: subset_314 data_files: - split: train path: subset_314/train-* - config_name: subset_315 data_files: - split: train path: subset_315/train-* - config_name: subset_316 data_files: - split: train path: subset_316/train-* - config_name: subset_317 data_files: - split: train path: subset_317/train-* - config_name: subset_318 data_files: - split: train path: subset_318/train-* - config_name: subset_319 data_files: - split: train path: subset_319/train-* - config_name: subset_32 data_files: - split: train path: subset_32/train-* - config_name: subset_320 data_files: - split: train path: subset_320/train-* - config_name: subset_321 data_files: - split: train path: subset_321/train-* - config_name: subset_322 data_files: - split: train path: subset_322/train-* - config_name: subset_323 data_files: - split: train path: subset_323/train-* - config_name: subset_324 data_files: - split: train path: subset_324/train-* - config_name: subset_325 data_files: - split: train path: subset_325/train-* - config_name: subset_326 data_files: - split: train path: subset_326/train-* - config_name: subset_327 data_files: - split: train path: subset_327/train-* - config_name: subset_328 data_files: - split: train path: subset_328/train-* - config_name: subset_329 data_files: - split: train path: subset_329/train-* - config_name: subset_33 data_files: - split: train path: subset_33/train-* - config_name: subset_330 data_files: - split: train path: subset_330/train-* - config_name: subset_331 data_files: - split: train path: subset_331/train-* - config_name: subset_332 data_files: - split: train path: subset_332/train-* - config_name: subset_333 data_files: - split: train path: subset_333/train-* - config_name: subset_334 data_files: - split: train path: subset_334/train-* - config_name: subset_335 data_files: - split: train path: subset_335/train-* - config_name: subset_336 data_files: - split: train path: subset_336/train-* - config_name: subset_337 data_files: - split: train path: subset_337/train-* - config_name: subset_338 data_files: - split: train path: subset_338/train-* - config_name: subset_339 data_files: - split: train path: subset_339/train-* - config_name: subset_34 data_files: - split: train path: subset_34/train-* - config_name: subset_340 data_files: - split: train path: subset_340/train-* - config_name: subset_341 data_files: - split: train path: subset_341/train-* - config_name: subset_342 data_files: - split: train path: subset_342/train-* - config_name: subset_343 data_files: - split: train path: subset_343/train-* - config_name: subset_344 data_files: - split: train path: subset_344/train-* - config_name: subset_345 data_files: - split: train path: subset_345/train-* - config_name: subset_346 data_files: - split: train path: subset_346/train-* - config_name: subset_347 data_files: - split: train path: subset_347/train-* - config_name: subset_348 data_files: - split: train path: subset_348/train-* - config_name: subset_349 data_files: - split: train path: subset_349/train-* - config_name: subset_35 data_files: - split: train path: subset_35/train-* - config_name: subset_350 data_files: - split: train path: subset_350/train-* - config_name: subset_351 data_files: - split: train path: subset_351/train-* - config_name: subset_352 data_files: - split: train path: subset_352/train-* - config_name: subset_353 data_files: - split: train path: subset_353/train-* - config_name: subset_354 data_files: - split: train path: subset_354/train-* - config_name: subset_355 data_files: - split: train path: subset_355/train-* - config_name: subset_356 data_files: - split: train path: subset_356/train-* - config_name: subset_357 data_files: - split: train path: subset_357/train-* - config_name: subset_358 data_files: - split: train path: subset_358/train-* - config_name: subset_359 data_files: - split: train path: subset_359/train-* - config_name: subset_36 data_files: - split: train path: subset_36/train-* - config_name: subset_360 data_files: - split: train path: subset_360/train-* - config_name: subset_361 data_files: - split: train path: subset_361/train-* - config_name: subset_362 data_files: - split: train path: subset_362/train-* - config_name: subset_363 data_files: - split: train path: subset_363/train-* - config_name: subset_364 data_files: - split: train path: subset_364/train-* - config_name: subset_365 data_files: - split: train path: subset_365/train-* - config_name: subset_366 data_files: - split: train path: subset_366/train-* - config_name: subset_368 data_files: - split: train path: subset_368/train-* - config_name: subset_369 data_files: - split: train path: subset_369/train-* - config_name: subset_37 data_files: - split: train path: subset_37/train-* - config_name: subset_370 data_files: - split: train path: subset_370/train-* - config_name: subset_371 data_files: - split: train path: subset_371/train-* - config_name: subset_372 data_files: - split: train path: subset_372/train-* - config_name: subset_373 data_files: - split: train path: subset_373/train-* - config_name: subset_374 data_files: - split: train path: subset_374/train-* - config_name: subset_375 data_files: - split: train path: subset_375/train-* - config_name: subset_376 data_files: - split: train path: subset_376/train-* - config_name: subset_377 data_files: - split: train path: subset_377/train-* - config_name: subset_378 data_files: - split: train path: subset_378/train-* - config_name: subset_379 data_files: - split: train path: subset_379/train-* - config_name: subset_38 data_files: - split: train path: subset_38/train-* - config_name: subset_380 data_files: - split: train path: subset_380/train-* - config_name: subset_39 data_files: - split: train path: subset_39/train-* - config_name: subset_399 data_files: - split: train path: subset_399/train-* - config_name: subset_4 data_files: - split: train path: subset_4/train-* - config_name: subset_40 data_files: - split: train path: subset_40/train-* - config_name: subset_400 data_files: - split: train path: subset_400/train-* - config_name: subset_401 data_files: - split: train path: subset_401/train-* - config_name: subset_402 data_files: - split: train path: subset_402/train-* - config_name: subset_403 data_files: - split: train path: subset_403/train-* - config_name: subset_404 data_files: - split: train path: subset_404/train-* - config_name: subset_405 data_files: - split: train path: subset_405/train-* - config_name: subset_406 data_files: - split: train path: subset_406/train-* - config_name: subset_407 data_files: - split: train path: subset_407/train-* - config_name: subset_408 data_files: - split: train path: subset_408/train-* - config_name: subset_409 data_files: - split: train path: subset_409/train-* - config_name: subset_41 data_files: - split: train path: subset_41/train-* - config_name: subset_410 data_files: - split: train path: subset_410/train-* - config_name: subset_411 data_files: - split: train path: subset_411/train-* - config_name: subset_412 data_files: - split: train path: subset_412/train-* - config_name: subset_413 data_files: - split: train path: subset_413/train-* - config_name: subset_414 data_files: - split: train path: subset_414/train-* - config_name: subset_415 data_files: - split: train path: subset_415/train-* - config_name: subset_416 data_files: - split: train path: subset_416/train-* - config_name: subset_417 data_files: - split: train path: subset_417/train-* - config_name: subset_418 data_files: - split: train path: subset_418/train-* - config_name: subset_419 data_files: - split: train path: subset_419/train-* - config_name: subset_42 data_files: - split: train path: subset_42/train-* - config_name: subset_420 data_files: - split: train path: subset_420/train-* - config_name: subset_421 data_files: - split: train path: subset_421/train-* - config_name: subset_422 data_files: - split: train path: subset_422/train-* - config_name: subset_423 data_files: - split: train path: subset_423/train-* - config_name: subset_424 data_files: - split: train path: subset_424/train-* - config_name: subset_425 data_files: - split: train path: subset_425/train-* - config_name: subset_43 data_files: - split: train path: subset_43/train-* - config_name: subset_44 data_files: - split: train path: subset_44/train-* - config_name: subset_45 data_files: - split: train path: subset_45/train-* - config_name: subset_46 data_files: - split: train path: subset_46/train-* - config_name: subset_47 data_files: - split: train path: subset_47/train-* - config_name: subset_48 data_files: - split: train path: subset_48/train-* - config_name: subset_49 data_files: - split: train path: subset_49/train-* - config_name: subset_5 data_files: - split: train path: subset_5/train-* - config_name: subset_50 data_files: - split: train path: subset_50/train-* - config_name: subset_51 data_files: - split: train path: subset_51/train-* - config_name: subset_52 data_files: - split: train path: subset_52/train-* - config_name: subset_53 data_files: - split: train path: subset_53/train-* - config_name: subset_54 data_files: - split: train path: subset_54/train-* - config_name: subset_55 data_files: - split: train path: subset_55/train-* - config_name: subset_56 data_files: - split: train path: subset_56/train-* - config_name: subset_57 data_files: - split: train path: subset_57/train-* - config_name: subset_58 data_files: - split: train path: subset_58/train-* - config_name: subset_59 data_files: - split: train path: subset_59/train-* - config_name: subset_6 data_files: - split: train path: subset_6/train-* - config_name: subset_60 data_files: - split: train path: subset_60/train-* - config_name: subset_61 data_files: - split: train path: subset_61/train-* - config_name: subset_62 data_files: - split: train path: subset_62/train-* - config_name: subset_63 data_files: - split: train path: subset_63/train-* - config_name: subset_64 data_files: - split: train path: subset_64/train-* - config_name: subset_65 data_files: - split: train path: subset_65/train-* - config_name: subset_66 data_files: - split: train path: subset_66/train-* - config_name: subset_67 data_files: - split: train path: subset_67/train-* - config_name: subset_68 data_files: - split: train path: subset_68/train-* - config_name: subset_69 data_files: - split: train path: subset_69/train-* - config_name: subset_7 data_files: - split: train path: subset_7/train-* - config_name: subset_70 data_files: - split: train path: subset_70/train-* - config_name: subset_72 data_files: - split: train path: subset_72/train-* - config_name: subset_8 data_files: - split: train path: subset_8/train-* - config_name: subset_87 data_files: - split: train path: subset_87/train-* - config_name: subset_88 data_files: - split: train path: subset_88/train-* - config_name: subset_89 data_files: - split: train path: subset_89/train-* - config_name: subset_9 data_files: - split: train path: subset_9/train-* - config_name: subset_90 data_files: - split: train path: subset_90/train-* - config_name: subset_91 data_files: - split: train path: subset_91/train-* - config_name: subset_92 data_files: - split: train path: subset_92/train-* - config_name: subset_93 data_files: - split: train path: subset_93/train-* - config_name: subset_94 data_files: - split: train path: subset_94/train-* - config_name: subset_95 data_files: - split: train path: subset_95/train-* - config_name: subset_96 data_files: - split: train path: subset_96/train-* - config_name: subset_97 data_files: - split: train path: subset_97/train-* - config_name: subset_98 data_files: - split: train path: subset_98/train-* - config_name: subset_99 data_files: - split: train path: subset_99/train-* ---

The dataset consists of multiple subsets, each with the same feature structure including line number, English and French IDs, LASER scores, and audio tokens. All subsets have only one training split, with varying sizes and number of examples per subset.
提供机构:
asahi417
原始信息汇总

数据集概述

数据集子集信息

子集名称 特征数量 主要特征 数据类型 训练集大小 训练集示例数量 下载大小 数据集大小
subset_1 8 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 896960282 bytes 2344 138828871 bytes 896960282 bytes
subset_10 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 849195322 bytes 2336 131866595 bytes 849195322 bytes
subset_100 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 784236275 bytes 2326 121581663 bytes 784236275 bytes
subset_101 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 778808804 bytes 2335 120877210 bytes 778808804 bytes
subset_102 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 772584121 bytes 2306 119739300 bytes 772584121 bytes
subset_103 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 783315490 bytes 2335 121528799 bytes 783315490 bytes
subset_104 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 774101833 bytes 2330 119997337 bytes 774101833 bytes
subset_105 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 782038760 bytes 2331 121205066 bytes 782038760 bytes
subset_11 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 836642733 bytes 2315 129888698 bytes 836642733 bytes
subset_12 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 866050440 bytes 2349 134452465 bytes 866050440 bytes
subset_13 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 859275177 bytes 2341 133374434 bytes 859275177 bytes
subset_14 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 854462165 bytes 2338 132567966 bytes 854462165 bytes
subset_15 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 846548429 bytes 2363 131471430 bytes 846548429 bytes
subset_16 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 849723661 bytes 2348 131823703 bytes 849723661 bytes
subset_17 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 847314332 bytes 2328 131528731 bytes 847314332 bytes
subset_18 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 856549312 bytes 2350 132883574 bytes 856549312 bytes
subset_19 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 849231130 bytes 2348 131806664 bytes 849231130 bytes
subset_2 8 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 885549898 bytes 2364 137223195 bytes 885549898 bytes
subset_20 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 829390963 bytes 2346 128708237 bytes 829390963 bytes
subset_21 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 841492981 bytes 2335 130599466 bytes 841492981 bytes
subset_22 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 839566978 bytes 2331 130433057 bytes 839566978 bytes
subset_23 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 831309666 bytes 2337 129065953 bytes 831309666 bytes
subset_24 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 843207728 bytes 2333 130820123 bytes 843207728 bytes
subset_25 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 826543551 bytes 2319 128228152 bytes 826543551 bytes
subset_26 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 856166635 bytes 2338 132709958 bytes 856166635 bytes
subset_27 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 835216842 bytes 2319 129640044 bytes 835216842 bytes
subset_28 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 839904123 bytes 2334 130295941 bytes 839904123 bytes
subset_29 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 787229439 bytes 2182 122258606 bytes 787229439 bytes
subset_3 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 877322523 bytes 2354 136064499 bytes 877322523 bytes
subset_30 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 731200910 bytes 2049 113489889 bytes 731200910 bytes
subset_300 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 615595453 bytes 2112 95508534 bytes 615595453 bytes
subset_301 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 618759301 bytes 2080 95997104 bytes 618759301 bytes
subset_302 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 632889940 bytes 2108 98160959 bytes 632889940 bytes
subset_303 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 609269662 bytes 2115 94590996 bytes 609269662 bytes
subset_304 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64, string, float64, int64 621920484 bytes 2109 96549400 bytes 621920484 bytes
subset_305 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, enA.audio.tokens, frA.audio.tokens int64, string, float64, int64 621509233 bytes 2108 96426327 bytes 621509233 bytes
subset_306 7 line_no, enA.id, enA.laser_score, frA.id, frA.laser_score, frA.audio.tokens, enA.audio.tokens int64
5,000+
优质数据集
54 个
任务类型
进入经典数据集
二维码
社区交流群

面向社区/商业的数据集话题

二维码
科研交流群

面向高校/科研机构的开源数据集话题

数据驱动未来

携手共赢发展

商业合作