mychen76/wildreceipts_ocr_train
收藏数据集卡片 "wildreceipts_ocr_train"
数据集概述
该数据集包含来自Wildreceipt的增强文本信息的收据图像,以及经过精心筛选的收据图像。每个图像包含OCR信息,包括单词、边界框、标签和关键信息提取数据,格式为JSON和XML。
特征和数据结构
视觉数据
- 收据图像展示了复杂的布局效果。
文本数据
ocr_json:以JSON格式表示的收据关键信息数据。ocr_boxes:表示最新的OCR扫描结果作为真实数据。ocr_words:从收据图像中检测和识别的单词。ocr_labels:原始标签类和文本位置的映射(可能与实际OCR扫描结果有所偏差)。ocr_xml:关键信息的XML格式。ocr_kie:从收据图像中提取的关键信息。
语言
数据集主要使用英语。
数据实例
数据集中的每个实例代表经过增强的收据集合中的条目。
数据样本
图像
file_name:receipt_0.jpeg
样本:ocr_words
plaintext [CHO EUN, KOREAN RESTAURANT, 2621 ORANGETHORPE AVE,FULLERTON., 714879-3574, THANKYOU!!, DATE12/30/2016 FRI, TIME19:19, BIBIM.OCTOPU T1, $13.99, S-FOODP.CAKT1, $14.99, PORK DUMPLIN T1, $8.99, LA BEEF RIB T1, $17.99, 4.00xITEMS, SUBTOTAL, $55.96, TAX1, $4.48, TOTAL, $60.44, $60AA]
样本:ocr_json
json {"store_name": "CHOEUN KOREANRESTAURANT", "store_addr": "2621ORANGETHORPEAVE,FULLERTON.", "telephone": "(714)879-3574", "date": "12/30/2016FRI", "time": "19:19", "subtotal": "$55.96", "tax": "$4.48", "total": "$60.44", "ignore": " ", "tips": "", "line_items": [{"item_key": "", "item_name": "BIBIM.OCTOPUT1", "item_value": "$13.99", "item_quantity": "1"}, {"item_key": "", "item_name": "S-FOODP.CAKT1", "item_value": "$14.99", "item_quantity": "1"}, {"item_key": "", "item_name": "PORKDUMPLINT1", "item_value": "$8.99", "item_quantity": "1"}, {"item_key": "", "item_name": "LABEEFRIBT1", "item_value": "uffe517.99", "item_quantity": "1"}, {"item_key": "4.00xITEMS", "item_name": "", "item_value": "", "item_quantity": ""}]}
样本:ocr_xml
xml <s_receipt><s_total>$60.44</s_total><s_tips></s_tips><s_time>19:19</s_time><s_telephone>(714)879-3574</s_telephone><s_tax>$4.48</s_tax><s_subtotal>$55.96</s_subtotal><s_store_name>CHOEUN KOREANRESTAURANT</s_store_name><s_store_addr>2621ORANGETHORPEAVE,FULLERTON.</s_store_addr><s_line_items><s_item_value>$13.99</s_item_value><s_item_quantity>1</s_item_quantity><s_item_name>BIBIM.OCTOPUT1</s_item_name><s_item_key></s_item_key><sep/><s_item_value>$14.99</s_item_value><s_item_quantity>1</s_item_quantity><s_item_name>S-FOODP.CAKT1</s_item_name><s_item_key></s_item_key><sep/><s_item_value>$8.99</s_item_value><s_item_quantity>1</s_item_quantity><s_item_name>PORKDUMPLINT1</s_item_name><s_item_key></s_item_key><sep/><s_item_value>¥17.99</s_item_value><s_item_quantity>1</s_item_quantity><s_item_name>LABEEFRIBT1</s_item_name><s_item_key></s_item_key><sep/><s_item_value></s_item_value><s_item_quantity></s_item_quantity><s_item_name></s_item_name><s_item_key>4.00xITEMS</s_item_key></s_line_items><s_ignore> </s_ignore><s_date>12/30/2016FRI</s_date></s_receipt>
样本:ocr_kie
plaintext [{label: Store_name_value, transcription: CHOEUN}, {label: Store_name_value, transcription: KOREANRESTAURANT}, {label: Store_addr_value, transcription: 2621ORANGETHORPEAVE,FULLERTON.}, {label: Tel_value, transcription: (714)879-3574}, {label: Others, transcription: THANKYOU!!}, {label: Date_key, transcription: DATE}, {label: Date_value, transcription: 12/30/2016FRI}, {label: Time_value, transcription: 19:19}, {label: Prod_item_value, transcription: BIBIM.OCTOPUT1}, {label: Prod_item_value, transcription: S-FOODP.CAKT1}, {label: Prod_item_value, transcription: PORKDUMPLINT1}, {label: Prod_item_value, transcription: LABEEFRIBT1}, {label: Prod_price_value, transcription: $13.99}, {label: Prod_price_value, transcription: $14.99}, {label: Prod_price_value, transcription: $8.99}, {label: Prod_price_value, transcription: ¥17.99}, {label: Prod_item_key, transcription: 4.00xITEMS}, {label: Subtotal_key, transcription: SUBTOTAL}, {label: Tax_key, transcription: TAX1}, {label: Total_key, transcription: TOTAL}, {label: Subtotal_value, transcription: $55.96}, {label: Tax_value, transcription: $4.48}, {label: Total_value, transcription: $60.44}, {label: Ignore, transcription: }, {label: Ignore, transcription: }, {label: Time_key, transcription: TIME}]
样本:ocr_labels
plaintext [{label: Store_name_value, transcription: CHOEUN, points: [[114.0, 19.0], [230.0, 19.0], [230.0, 1.0], [114.0, 1.0]]}, {label: Store_name_value, transcription: KOREANRESTAURANT, points: [[97.0, 35.0], [236.0, 35.0], [236.0, 19.0], [97.0, 19.0]]}, {label: Store_addr_value, transcription: 2621ORANGETHORPEAVE,FULLERTON., points: [[29.0, 56.0], [295.0, 56.0], [295.0, 34.0], [29.0, 34.0]]}, {label: Tel_value, transcription: (714)879-3574, points: [[48.0, 73.0], [280.0, 73.0], [280.0, 54.0], [48.0, 54.0]]}, {label: Others, transcription: THANKYOU!!, points: [[79.0, 92.0], [259.0, 92.0], [259.0, 74.0], [79.0, 74.0]]}, {label: Date_key, transcription: DATE, points: [[22.0, 130.0], [61.0, 130.0], [61.0, 112.0], [22.0, 112.0]]}, {label: Date_value, transcription: 12/30/2016FRI, points: [[70.0, 131.0], [192.0, 131.0], [192.0, 112.0], [70.0, 112.0]]}, {label: Time_value, transcription: 19:19, points: [[263.0, 128.0], [307.0, 128.0], [307.0, 111.0], [263.0, 111.0]]}, {label: Prod_item_value, transcription: BIBIM.OCTOPUT1, points: [[19.0, 168.0], [157.0, 168.0], [157.0, 149.0], [19.0, 149.0]]}, {label: Prod_item_value, transcription: S-FOODP.CAKT1, points: [[17.0, 190.0], [158.0, 190.0], [158.0, 171.0], [17.0, 171.0]]}, {label: Prod_item_value, transcription: PORKDUMPLINT1, points: [[14.0, 214.0], [158.0, 214.0], [158.0, 192.0], [14.0, 192.0]]}, {label: Prod_item_value, transcription: LABEEFRIBT1, points: [[14.0, 236.0], [151.0, 236.0], [151.0, 215.0], [14.0, 215.0]]}, {transcription: $13.99, points: [[254.0, 168.0], [312.0, 168.0], [312.0, 149.0], [254.0, 149.0]]}, {transcription: $14.99, points: [[257.0, 189.0], [314.0, 189.0], [314.0, 170.0], [257.0, 170.0]]}, {transcription: $8.99, points: [[268.0, 212.0], [316.0, 212.0], [316.0, 191.0], [268.0, 191.0]]}, {transcription: ¥17.99, points: [[261.0, 234.0], [318.0, 234.0], [318.0, 213.0], [261.0, 213.0]]}, {label: Prod_item_key, transcription: 4.00xITEMS, points: [[118.0, 260.0], [217.0, 260.0], [217.0, 239.0], [118.0, 239.0]]}, {label: Subtotal_key, transcription: SUBTOTAL, points: [[8.0, 285.0], [91.0, 285.0], [91.0, 264.0], [8.0, 264.0]]}, {label: Tax_key, transcription: TAX1, points: [[8.0, 312.0], [49.0, 312.0], [49.0, 291.0], [8.0, 291.0]]}, {label: Total_key, transcription: TOTAL, points: [[8.0, 336.0], [61.0, 336.0], [61.0, 316.0], [8.0, 316.0]]}, {label: Subtotal_value, transcription: $55.96, points: [[263.0, 283.0], [325.0, 283.0], [325.0, 260.0], [263.0, 260.0]]}, {label: Tax_value, transcription: $4.48, points: [[274.0, 308.0], [326.0, 308.0], [326.0, 286.0], [274.0, 286.0]]}, {label: Total_value, transcription: $60.44, points: [[267.0, 334.0], [328.0, 334.0], [328.0, 310.0], [267.0, 310.0]]}, {label: Ignore, transcription: , points: [[269.0, 347.0], [328.0, 347.0], [328.0, 336.0], [269.0, 336.0]]}, {label: Ignore, transcription: , points: [[11.0, 347.0], [50.0, 347.0], [50.0, 342.0], [11.0, 342.0]]}, {label: Time_key, transcription: TIME, points: [[215.0, 128.0], [253.0, 128.0], [253.0, 112.0], [215.0, 112.0]]}]
样本:ocr_boxes
plaintext [[[[113.0, 0.0], [228.0, 3.0], [227.0, 20.0], [113.0, 17.0]], (CHO EUN, 0.9466678500175476)], [[[96.0, 17.0], [236.0, 21.0], [236.0, 38.0], [96.0, 33.0]], (KOREAN RESTAURANT, 0.9685913324356079)], [[[28.0, 32.0], [293.0, 37.0], [292.0, 56.0], [28.




