mashriram/AI4Bharat-Indic-Languages-and-Cultures
收藏Hugging Face2026-02-22 更新2026-03-29 收录
下载链接:
https://hf-mirror.com/datasets/mashriram/AI4Bharat-Indic-Languages-and-Cultures
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: Andhra_Pradesh
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 362277136
num_examples: 70510
- name: conv
num_bytes: 29743586
num_examples: 28910
- name: cult
num_bytes: 621448196
num_examples: 87854
download_size: 333223795
dataset_size: 1013468918
- config_name: Arunachal_Pradesh
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 196876736
num_examples: 83124
- name: conv
num_bytes: 40283797
num_examples: 51760
download_size: 114269572
dataset_size: 237160533
- config_name: Assam
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 350598773
num_examples: 70515
- name: conv
num_bytes: 78409870
num_examples: 43394
- name: cult
num_bytes: 76252723
num_examples: 12338
download_size: 198949011
dataset_size: 505261366
- config_name: Bihar
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 352573688
num_examples: 70517
- name: conv
num_bytes: 77975695
num_examples: 43303
- name: cult
num_bytes: 19313079
num_examples: 14714
download_size: 173725028
dataset_size: 449862462
- config_name: Chhattisgarh
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 352573688
num_examples: 70517
- name: conv
num_bytes: 77975695
num_examples: 43303
- name: cult
num_bytes: 649362
num_examples: 232
download_size: 168471868
dataset_size: 431198745
- config_name: Delhi
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 352573688
num_examples: 70517
- name: conv
num_bytes: 77975695
num_examples: 43303
- name: cult
num_bytes: 1668102
num_examples: 648
download_size: 168800246
dataset_size: 432217485
- config_name: Goa
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 196876736
num_examples: 83124
- name: conv
num_bytes: 77024534
num_examples: 43428
- name: cult
num_bytes: 23819801
num_examples: 4259
download_size: 128897383
dataset_size: 297721071
- config_name: Gujarat
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 228148497
num_examples: 35308
- name: conv
num_bytes: 75805151
num_examples: 43054
- name: cult
num_bytes: 75239503
num_examples: 30445
download_size: 133872237
dataset_size: 379193151
- config_name: Haryana
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 352573688
num_examples: 70517
- name: conv
num_bytes: 77975695
num_examples: 43303
- name: cult
num_bytes: 461829
num_examples: 185
download_size: 168389573
dataset_size: 431011212
- config_name: Himachal_Pradesh
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 352573688
num_examples: 70517
- name: conv
num_bytes: 77975695
num_examples: 43303
- name: cult
num_bytes: 516038
num_examples: 193
download_size: 168404054
dataset_size: 431065421
- config_name: Jammu_and_Kashmir
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 281896541
num_examples: 70529
- name: conv
num_bytes: 53173698
num_examples: 43429
- name: cult
num_bytes: 2584236
num_examples: 4307
download_size: 152716247
dataset_size: 337654475
- config_name: Jharkhand
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 352573688
num_examples: 70517
- name: conv
num_bytes: 77975695
num_examples: 43303
- name: cult
num_bytes: 43357819
num_examples: 9767
download_size: 182962049
dataset_size: 473907202
- config_name: Karnataka
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 377851071
num_examples: 70516
- name: conv
num_bytes: 85911364
num_examples: 43388
- name: cult
num_bytes: 216580254
num_examples: 31437
download_size: 252956706
dataset_size: 680342689
- config_name: Kerala
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 404377779
num_examples: 70518
- name: conv
num_bytes: 93451675
num_examples: 43233
- name: cult
num_bytes: 394393427
num_examples: 85791
download_size: 330135131
dataset_size: 892222881
- config_name: Madhya_Pradesh
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 352573688
num_examples: 70517
- name: conv
num_bytes: 77975695
num_examples: 43303
- name: cult
num_bytes: 879092
num_examples: 259
download_size: 168555704
dataset_size: 431428475
- config_name: Maharashtra
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 346620213
num_examples: 70518
- name: conv
num_bytes: 77538929
num_examples: 43424
- name: cult
num_bytes: 223089903
num_examples: 94133
download_size: 238283703
dataset_size: 647249045
- config_name: Manipur
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 196876736
num_examples: 83124
- name: conv
num_bytes: 40283797
num_examples: 51760
- name: cult
num_bytes: 8326654
num_examples: 10894
download_size: 116042593
dataset_size: 245487187
- config_name: Meghalaya
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 196876736
num_examples: 83124
- name: conv
num_bytes: 40283797
num_examples: 51760
download_size: 114269572
dataset_size: 237160533
- config_name: Mizoram
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 196876736
num_examples: 83124
- name: conv
num_bytes: 40283797
num_examples: 51760
download_size: 114269572
dataset_size: 237160533
- config_name: Nagaland
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 196876736
num_examples: 83124
- name: conv
num_bytes: 40283797
num_examples: 51760
- name: cult
num_bytes: 355
num_examples: 1
download_size: 114272858
dataset_size: 237160888
- config_name: Odisha
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 375238964
num_examples: 70513
- name: conv
num_bytes: 91865964
num_examples: 49384
- name: cult
num_bytes: 63445880
num_examples: 17375
download_size: 198473575
dataset_size: 530550808
- config_name: Punjab
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 355575909
num_examples: 70511
- name: conv
num_bytes: 77143243
num_examples: 43344
- name: cult
num_bytes: 182750995
num_examples: 51423
download_size: 239307067
dataset_size: 615470147
- config_name: Rajasthan
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 352573688
num_examples: 70517
- name: conv
num_bytes: 77975695
num_examples: 43303
- name: cult
num_bytes: 1892084
num_examples: 703
download_size: 168899118
dataset_size: 432441467
- config_name: Sikkim
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 196876736
num_examples: 83124
- name: conv
num_bytes: 29743602
num_examples: 28910
- name: cult
num_bytes: 88357123
num_examples: 32885
download_size: 137796789
dataset_size: 314977461
- config_name: Tamil_Nadu
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 416617224
num_examples: 70514
- name: conv
num_bytes: 93823229
num_examples: 43142
- name: cult
num_bytes: 656595101
num_examples: 160651
download_size: 397127678
dataset_size: 1167035554
- config_name: Telangana
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 362277136
num_examples: 70510
- name: conv
num_bytes: 82708038
num_examples: 43189
- name: cult
num_bytes: 2720971
num_examples: 420
download_size: 174051680
dataset_size: 447706145
- config_name: Tripura
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 349784158
num_examples: 70517
- name: conv
num_bytes: 79894327
num_examples: 43324
- name: cult
num_bytes: 526752
num_examples: 106
download_size: 167084845
dataset_size: 430205237
- config_name: Uttar_Pradesh
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 352573688
num_examples: 70517
- name: conv
num_bytes: 77975695
num_examples: 43303
- name: cult
num_bytes: 1850573
num_examples: 1308
download_size: 168794347
dataset_size: 432399956
- config_name: Uttarakhand
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 352573688
num_examples: 70517
- name: conv
num_bytes: 77975695
num_examples: 43303
- name: cult
num_bytes: 46569502
num_examples: 12156
download_size: 183618206
dataset_size: 477118885
- config_name: West_Bengal
features:
- name: instruction
dtype: string
- name: input
dtype: string
- name: response
dtype: string
splits:
- name: indic
num_bytes: 349784158
num_examples: 70517
- name: conv
num_bytes: 79894327
num_examples: 43324
- name: cult
num_bytes: 749711780
num_examples: 143069
download_size: 432018510
dataset_size: 1179390265
configs:
- config_name: Andhra_Pradesh
data_files:
- split: indic
path: Andhra_Pradesh/indic-*
- split: conv
path: Andhra_Pradesh/conv-*
- split: cult
path: Andhra_Pradesh/cult-*
- config_name: Arunachal_Pradesh
data_files:
- split: indic
path: Arunachal_Pradesh/indic-*
- split: conv
path: Arunachal_Pradesh/conv-*
- config_name: Assam
data_files:
- split: indic
path: Assam/indic-*
- split: conv
path: Assam/conv-*
- split: cult
path: Assam/cult-*
- config_name: Bihar
data_files:
- split: indic
path: Bihar/indic-*
- split: conv
path: Bihar/conv-*
- split: cult
path: Bihar/cult-*
- config_name: Chhattisgarh
data_files:
- split: indic
path: Chhattisgarh/indic-*
- split: conv
path: Chhattisgarh/conv-*
- split: cult
path: Chhattisgarh/cult-*
- config_name: Delhi
data_files:
- split: indic
path: Delhi/indic-*
- split: conv
path: Delhi/conv-*
- split: cult
path: Delhi/cult-*
- config_name: Goa
data_files:
- split: indic
path: Goa/indic-*
- split: conv
path: Goa/conv-*
- split: cult
path: Goa/cult-*
- config_name: Gujarat
data_files:
- split: indic
path: Gujarat/indic-*
- split: conv
path: Gujarat/conv-*
- split: cult
path: Gujarat/cult-*
- config_name: Haryana
data_files:
- split: indic
path: Haryana/indic-*
- split: conv
path: Haryana/conv-*
- split: cult
path: Haryana/cult-*
- config_name: Himachal_Pradesh
data_files:
- split: indic
path: Himachal_Pradesh/indic-*
- split: conv
path: Himachal_Pradesh/conv-*
- split: cult
path: Himachal_Pradesh/cult-*
- config_name: Jammu_and_Kashmir
data_files:
- split: indic
path: Jammu_and_Kashmir/indic-*
- split: conv
path: Jammu_and_Kashmir/conv-*
- split: cult
path: Jammu_and_Kashmir/cult-*
- config_name: Jharkhand
data_files:
- split: indic
path: Jharkhand/indic-*
- split: conv
path: Jharkhand/conv-*
- split: cult
path: Jharkhand/cult-*
- config_name: Karnataka
data_files:
- split: indic
path: Karnataka/indic-*
- split: conv
path: Karnataka/conv-*
- split: cult
path: Karnataka/cult-*
- config_name: Kerala
data_files:
- split: indic
path: Kerala/indic-*
- split: conv
path: Kerala/conv-*
- split: cult
path: Kerala/cult-*
- config_name: Madhya_Pradesh
data_files:
- split: indic
path: Madhya_Pradesh/indic-*
- split: conv
path: Madhya_Pradesh/conv-*
- split: cult
path: Madhya_Pradesh/cult-*
- config_name: Maharashtra
data_files:
- split: indic
path: Maharashtra/indic-*
- split: conv
path: Maharashtra/conv-*
- split: cult
path: Maharashtra/cult-*
- config_name: Manipur
data_files:
- split: indic
path: Manipur/indic-*
- split: conv
path: Manipur/conv-*
- split: cult
path: Manipur/cult-*
- config_name: Meghalaya
data_files:
- split: indic
path: Meghalaya/indic-*
- split: conv
path: Meghalaya/conv-*
- config_name: Mizoram
data_files:
- split: indic
path: Mizoram/indic-*
- split: conv
path: Mizoram/conv-*
- config_name: Nagaland
data_files:
- split: indic
path: Nagaland/indic-*
- split: conv
path: Nagaland/conv-*
- split: cult
path: Nagaland/cult-*
- config_name: Odisha
data_files:
- split: indic
path: Odisha/indic-*
- split: conv
path: Odisha/conv-*
- split: cult
path: Odisha/cult-*
- config_name: Punjab
data_files:
- split: indic
path: Punjab/indic-*
- split: conv
path: Punjab/conv-*
- split: cult
path: Punjab/cult-*
- config_name: Rajasthan
data_files:
- split: indic
path: Rajasthan/indic-*
- split: conv
path: Rajasthan/conv-*
- split: cult
path: Rajasthan/cult-*
- config_name: Sikkim
data_files:
- split: indic
path: Sikkim/indic-*
- split: conv
path: Sikkim/conv-*
- split: cult
path: Sikkim/cult-*
- config_name: Tamil_Nadu
data_files:
- split: indic
path: Tamil_Nadu/indic-*
- split: conv
path: Tamil_Nadu/conv-*
- split: cult
path: Tamil_Nadu/cult-*
- config_name: Telangana
data_files:
- split: indic
path: Telangana/indic-*
- split: conv
path: Telangana/conv-*
- split: cult
path: Telangana/cult-*
- config_name: Tripura
data_files:
- split: indic
path: Tripura/indic-*
- split: conv
path: Tripura/conv-*
- split: cult
path: Tripura/cult-*
- config_name: Uttar_Pradesh
data_files:
- split: indic
path: Uttar_Pradesh/indic-*
- split: conv
path: Uttar_Pradesh/conv-*
- split: cult
path: Uttar_Pradesh/cult-*
- config_name: Uttarakhand
data_files:
- split: indic
path: Uttarakhand/indic-*
- split: conv
path: Uttarakhand/conv-*
- split: cult
path: Uttarakhand/cult-*
- config_name: West_Bengal
data_files:
- split: indic
path: West_Bengal/indic-*
- split: conv
path: West_Bengal/conv-*
- split: cult
path: West_Bengal/cult-*
---
提供机构:
mashriram



