BabyLM-community/BabyLM-BLIMP-Filtered
收藏Hugging Face2026-04-24 更新2026-04-26 收录
下载链接:
https://hf-mirror.com/datasets/BabyLM-community/BabyLM-BLIMP-Filtered
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: adjunct_island
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 153698
num_examples: 928
download_size: 57004
dataset_size: 153698
- config_name: anaphor_gender_agreement
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 127229
num_examples: 971
download_size: 37603
dataset_size: 127229
- config_name: anaphor_number_agreement
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 129912
num_examples: 931
download_size: 38407
dataset_size: 129912
- config_name: animate_subject_passive
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 128954
num_examples: 895
download_size: 42350
dataset_size: 128954
- config_name: animate_subject_trans
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 117854
num_examples: 923
download_size: 45628
dataset_size: 117854
- config_name: causative
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 99693
num_examples: 818
download_size: 40609
dataset_size: 99693
- config_name: complex_NP_island
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 167132
num_examples: 846
download_size: 64073
dataset_size: 167132
- config_name: coordinate_structure_constraint_complex_left_branch
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 190854
num_examples: 906
download_size: 61086
dataset_size: 190854
- config_name: coordinate_structure_constraint_object_extraction
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 162753
num_examples: 949
download_size: 48286
dataset_size: 162753
- config_name: determiner_noun_agreement_1
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 144954
num_examples: 929
download_size: 46195
dataset_size: 144954
- config_name: determiner_noun_agreement_2
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 145195
num_examples: 931
download_size: 45907
dataset_size: 145195
- config_name: determiner_noun_agreement_irregular_1
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 110806
num_examples: 681
download_size: 33860
dataset_size: 110806
- config_name: determiner_noun_agreement_irregular_2
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 131902
num_examples: 820
download_size: 39364
dataset_size: 131902
- config_name: determiner_noun_agreement_with_adj_2
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 169138
num_examples: 941
download_size: 52521
dataset_size: 169138
- config_name: determiner_noun_agreement_with_adj_irregular_1
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 132767
num_examples: 718
download_size: 40590
dataset_size: 132767
- config_name: determiner_noun_agreement_with_adj_irregular_2
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 154800
num_examples: 840
download_size: 45927
dataset_size: 154800
- config_name: determiner_noun_agreement_with_adjective_1
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 172813
num_examples: 933
download_size: 51450
dataset_size: 172813
- config_name: distractor_agreement_relational_noun
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 150207
num_examples: 788
download_size: 47872
dataset_size: 150207
- config_name: distractor_agreement_relative_clause
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 188221
num_examples: 871
download_size: 66880
dataset_size: 188221
- config_name: drop_argument
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 100710
num_examples: 920
download_size: 36628
dataset_size: 100710
- config_name: ellipsis_n_bar_1
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 173046
num_examples: 802
download_size: 72720
dataset_size: 173046
- config_name: ellipsis_n_bar_2
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 192016
num_examples: 828
download_size: 81516
dataset_size: 192016
- config_name: existential_there_object_raising
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 182342
num_examples: 812
download_size: 61146
dataset_size: 182342
- config_name: existential_there_quantifiers_1
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 150915
num_examples: 930
download_size: 47299
dataset_size: 150915
- config_name: existential_there_quantifiers_2
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 149381
num_examples: 911
download_size: 47310
dataset_size: 149381
- config_name: existential_there_subject_raising
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 184554
num_examples: 924
download_size: 54641
dataset_size: 184554
- config_name: expletive_it_object_raising
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 180549
num_examples: 759
download_size: 65351
dataset_size: 180549
- config_name: inchoative
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 88803
num_examples: 855
download_size: 34532
dataset_size: 88803
- config_name: intransitive
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 95798
num_examples: 868
download_size: 36946
dataset_size: 95798
- config_name: irregular_past_participle_adjectives
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 138819
num_examples: 961
download_size: 34942
dataset_size: 138819
- config_name: irregular_past_participle_verbs
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 117909
num_examples: 942
download_size: 34856
dataset_size: 117909
- config_name: irregular_plural_subject_verb_agreement_1
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 132855
num_examples: 804
download_size: 41311
dataset_size: 132855
- config_name: irregular_plural_subject_verb_agreement_2
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 137040
num_examples: 892
download_size: 37966
dataset_size: 137040
- config_name: left_branch_island_echo_question
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 139838
num_examples: 947
download_size: 47085
dataset_size: 139838
- config_name: left_branch_island_simple_question
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 142502
num_examples: 951
download_size: 47155
dataset_size: 142502
- config_name: matrix_question_npi_licensor_present
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 142314
num_examples: 929
download_size: 48161
dataset_size: 142314
- config_name: npi_present_1
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 125909
num_examples: 909
download_size: 47011
dataset_size: 125909
- config_name: npi_present_2
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 116306
num_examples: 914
download_size: 46949
dataset_size: 116306
- config_name: only_npi_licensor_present
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 130324
num_examples: 882
download_size: 45384
dataset_size: 130324
- config_name: only_npi_scope
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 174125
num_examples: 837
download_size: 68813
dataset_size: 174125
- config_name: passive_1
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 121341
num_examples: 840
download_size: 45358
dataset_size: 121341
- config_name: passive_2
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 102210
num_examples: 903
download_size: 36517
dataset_size: 102210
- config_name: principle_A_c_command
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 178521
num_examples: 946
download_size: 63621
dataset_size: 178521
- config_name: principle_A_case_1
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 154989
num_examples: 912
download_size: 55439
dataset_size: 154989
- config_name: principle_A_case_2
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 155551
num_examples: 915
download_size: 51465
dataset_size: 155551
- config_name: principle_A_domain_1
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 156196
num_examples: 914
download_size: 53801
dataset_size: 156196
- config_name: principle_A_domain_2
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 151020
num_examples: 915
download_size: 53161
dataset_size: 151020
- config_name: principle_A_domain_3
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 149541
num_examples: 941
download_size: 49207
dataset_size: 149541
- config_name: principle_A_reconstruction
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 147141
num_examples: 967
download_size: 42520
dataset_size: 147141
- config_name: regular_plural_subject_verb_agreement_1
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 141476
num_examples: 890
download_size: 44004
dataset_size: 141476
- config_name: regular_plural_subject_verb_agreement_2
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 145414
num_examples: 945
download_size: 40594
dataset_size: 145414
- config_name: sentential_negation_npi_licensor_present
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 157376
num_examples: 919
download_size: 49911
dataset_size: 157376
- config_name: sentential_negation_npi_scope
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 201082
num_examples: 871
download_size: 76126
dataset_size: 201082
- config_name: sentential_subject_island
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 165635
num_examples: 961
download_size: 53621
dataset_size: 165635
- config_name: superlative_quantifiers_1
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 155940
num_examples: 979
download_size: 46999
dataset_size: 155940
- config_name: superlative_quantifiers_2
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 157100
num_examples: 986
download_size: 49326
dataset_size: 157100
- config_name: tough_vs_raising_1
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 140278
num_examples: 948
download_size: 41906
dataset_size: 140278
- config_name: tough_vs_raising_2
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 155322
num_examples: 920
download_size: 55945
dataset_size: 155322
- config_name: transitive
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 114545
num_examples: 868
download_size: 47671
dataset_size: 114545
- config_name: wh_island
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 136580
num_examples: 960
download_size: 50258
dataset_size: 136580
- config_name: wh_questions_object_gap
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 165310
num_examples: 859
download_size: 60272
dataset_size: 165310
- config_name: wh_questions_subject_gap
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 175751
num_examples: 898
download_size: 63982
dataset_size: 175751
- config_name: wh_questions_subject_gap_long_distance
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 229930
num_examples: 857
download_size: 84767
dataset_size: 229930
- config_name: wh_vs_that_no_gap
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 162460
num_examples: 861
download_size: 61955
dataset_size: 162460
- config_name: wh_vs_that_no_gap_long_distance
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 216004
num_examples: 875
download_size: 82841
dataset_size: 216004
- config_name: wh_vs_that_with_gap
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 159580
num_examples: 919
download_size: 55203
dataset_size: 159580
- config_name: wh_vs_that_with_gap_long_distance
features:
- name: sentence_good
dtype: string
- name: sentence_bad
dtype: string
- name: field
dtype: string
- name: linguistics_term
dtype: string
- name: UID
dtype: string
- name: simple_LM_method
dtype: bool
- name: one_prefix_method
dtype: bool
- name: two_prefix_method
dtype: bool
- name: lexically_identical
dtype: bool
- name: pair_id
dtype: int32
splits:
- name: train
num_bytes: 210791
num_examples: 910
download_size: 75708
dataset_size: 210791
configs:
- config_name: adjunct_island
data_files:
- split: train
path: adjunct_island/train-*
- config_name: anaphor_gender_agreement
data_files:
- split: train
path: anaphor_gender_agreement/train-*
- config_name: anaphor_number_agreement
data_files:
- split: train
path: anaphor_number_agreement/train-*
- config_name: animate_subject_passive
data_files:
- split: train
path: animate_subject_passive/train-*
- config_name: animate_subject_trans
data_files:
- split: train
path: animate_subject_trans/train-*
- config_name: causative
data_files:
- split: train
path: causative/train-*
- config_name: complex_NP_island
data_files:
- split: train
path: complex_NP_island/train-*
- config_name: coordinate_structure_constraint_complex_left_branch
data_files:
- split: train
path: coordinate_structure_constraint_complex_left_branch/train-*
- config_name: coordinate_structure_constraint_object_extraction
data_files:
- split: train
path: coordinate_structure_constraint_object_extraction/train-*
- config_name: determiner_noun_agreement_1
data_files:
- split: train
path: determiner_noun_agreement_1/train-*
- config_name: determiner_noun_agreement_2
data_files:
- split: train
path: determiner_noun_agreement_2/train-*
- config_name: determiner_noun_agreement_irregular_1
data_files:
- split: train
path: determiner_noun_agreement_irregular_1/train-*
- config_name: determiner_noun_agreement_irregular_2
data_files:
- split: train
path: determiner_noun_agreement_irregular_2/train-*
- config_name: determiner_noun_agreement_with_adj_2
data_files:
- split: train
path: determiner_noun_agreement_with_adj_2/train-*
- config_name: determiner_noun_agreement_with_adj_irregular_1
data_files:
- split: train
path: determiner_noun_agreement_with_adj_irregular_1/train-*
- config_name: determiner_noun_agreement_with_adj_irregular_2
data_files:
- split: train
path: determiner_noun_agreement_with_adj_irregular_2/train-*
- config_name: determiner_noun_agreement_with_adjective_1
data_files:
- split: train
path: determiner_noun_agreement_with_adjective_1/train-*
- config_name: distractor_agreement_relational_noun
data_files:
- split: train
path: distractor_agreement_relational_noun/train-*
- config_name: distractor_agreement_relative_clause
data_files:
- split: train
path: distractor_agreement_relative_clause/train-*
- config_name: drop_argument
data_files:
- split: train
path: drop_argument/train-*
- config_name: ellipsis_n_bar_1
data_files:
- split: train
path: ellipsis_n_bar_1/train-*
- config_name: ellipsis_n_bar_2
data_files:
- split: train
path: ellipsis_n_bar_2/train-*
- config_name: existential_there_object_raising
data_files:
- split: train
path: existential_there_object_raising/train-*
- config_name: existential_there_quantifiers_1
data_files:
- split: train
path: existential_there_quantifiers_1/train-*
- config_name: existential_there_quantifiers_2
data_files:
- split: train
path: existential_there_quantifiers_2/train-*
- config_name: existential_there_subject_raising
data_files:
- split: train
path: existential_there_subject_raising/train-*
- config_name: expletive_it_object_raising
data_files:
- split: train
path: expletive_it_object_raising/train-*
- config_name: inchoative
data_files:
- split: train
path: inchoative/train-*
- config_name: intransitive
data_files:
- split: train
path: intransitive/train-*
- config_name: irregular_past_participle_adjectives
data_files:
- split: train
path: irregular_past_participle_adjectives/train-*
- config_name: irregular_past_participle_verbs
data_files:
- split: train
path: irregular_past_participle_verbs/train-*
- config_name: irregular_plural_subject_verb_agreement_1
data_files:
- split: train
path: irregular_plural_subject_verb_agreement_1/train-*
- config_name: irregular_plural_subject_verb_agreement_2
data_files:
- split: train
path: irregular_plural_subject_verb_agreement_2/train-*
- config_name: left_branch_island_echo_question
data_files:
- split: train
path: left_branch_island_echo_question/train-*
- config_name: left_branch_island_simple_question
data_files:
- split: train
path: left_branch_island_simple_question/train-*
- config_name: matrix_question_npi_licensor_present
data_files:
- split: train
path: matrix_question_npi_licensor_present/train-*
- config_name: npi_present_1
data_files:
- split: train
path: npi_present_1/train-*
- config_name: npi_present_2
data_files:
- split: train
path: npi_present_2/train-*
- config_name: only_npi_licensor_present
data_files:
- split: train
path: only_npi_licensor_present/train-*
- config_name: only_npi_scope
data_files:
- split: train
path: only_npi_scope/train-*
- config_name: passive_1
data_files:
- split: train
path: passive_1/train-*
- config_name: passive_2
data_files:
- split: train
path: passive_2/train-*
- config_name: principle_A_c_command
data_files:
- split: train
path: principle_A_c_command/train-*
- config_name: principle_A_case_1
data_files:
- split: train
path: principle_A_case_1/train-*
- config_name: principle_A_case_2
data_files:
- split: train
path: principle_A_case_2/train-*
- config_name: principle_A_domain_1
data_files:
- split: train
path: principle_A_domain_1/train-*
- config_name: principle_A_domain_2
data_files:
- split: train
path: principle_A_domain_2/train-*
- config_name: principle_A_domain_3
data_files:
- split: train
path: principle_A_domain_3/train-*
- config_name: principle_A_reconstruction
data_files:
- split: train
path: principle_A_reconstruction/train-*
- config_name: regular_plural_subject_verb_agreement_1
data_files:
- split: train
path: regular_plural_subject_verb_agreement_1/train-*
- config_name: regular_plural_subject_verb_agreement_2
data_files:
- split: train
path: regular_plural_subject_verb_agreement_2/train-*
- config_name: sentential_negation_npi_licensor_present
data_files:
- split: train
path: sentential_negation_npi_licensor_present/train-*
- config_name: sentential_negation_npi_scope
data_files:
- split: train
path: sentential_negation_npi_scope/train-*
- config_name: sentential_subject_island
data_files:
- split: train
path: sentential_subject_island/train-*
- config_name: superlative_quantifiers_1
data_files:
- split: train
path: superlative_quantifiers_1/train-*
- config_name: superlative_quantifiers_2
data_files:
- split: train
path: superlative_quantifiers_2/train-*
- config_name: tough_vs_raising_1
data_files:
- split: train
path: tough_vs_raising_1/train-*
- config_name: tough_vs_raising_2
data_files:
- split: train
path: tough_vs_raising_2/train-*
- config_name: transitive
data_files:
- split: train
path: transitive/train-*
- config_name: wh_island
data_files:
- split: train
path: wh_island/train-*
- config_name: wh_questions_object_gap
data_files:
- split: train
path: wh_questions_object_gap/train-*
- config_name: wh_questions_subject_gap
data_files:
- split: train
path: wh_questions_subject_gap/train-*
- config_name: wh_questions_subject_gap_long_distance
data_files:
- split: train
path: wh_questions_subject_gap_long_distance/train-*
- config_name: wh_vs_that_no_gap
data_files:
- split: train
path: wh_vs_that_no_gap/train-*
- config_name: wh_vs_that_no_gap_long_distance
data_files:
- split: train
path: wh_vs_that_no_gap_long_distance/train-*
- config_name: wh_vs_that_with_gap
data_files:
- split: train
path: wh_vs_that_with_gap/train-*
- config_name: wh_vs_that_with_gap_long_distance
data_files:
- split: train
path: wh_vs_that_with_gap_long_distance/train-*
---
提供机构:
BabyLM-community



