llm-jp/jawildtext
收藏Hugging Face2026-04-13 更新2026-04-05 收录
下载链接:
https://hf-mirror.com/datasets/llm-jp/jawildtext
下载链接
链接失效反馈官方服务:
资源简介:
---
dataset_info:
- config_name: board_vqa
features:
- name: subset
dtype: string
- name: image_id
dtype: string
- name: filename
dtype: string
- name: polygons
list:
- name: polygon_id
dtype: string
- name: polygon
list:
list: float64
- name: text
dtype: string
- name: direction
dtype: string
- name: question
dtype: string
- name: answer
dtype: string
- name: evidence
list: string
- name: tool
dtype: string
- name: writer_id
dtype: int64
- name: fields
struct:
- name: store_name
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: store_address
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: receipt_id
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: date
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: time
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: total_amount
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: tax_amount
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: line_items
list:
- name: item_name
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: item_price
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: item_quantity
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: image
dtype: image
splits:
- name: train
num_bytes: 2629852826
num_examples: 1025
download_size: 2577051472
dataset_size: 2629852826
- config_name: default
features:
- name: subset
dtype: string
- name: image_id
dtype: string
- name: filename
dtype: string
- name: polygons
list:
- name: polygon_id
dtype: string
- name: polygon
list:
list: float64
- name: text
dtype: string
- name: direction
dtype: string
- name: question
dtype: string
- name: answer
dtype: string
- name: evidence
list: string
- name: tool
dtype: string
- name: writer_id
dtype: int64
- name: fields
struct:
- name: store_name
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: store_address
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: receipt_id
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: date
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: time
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: total_amount
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: tax_amount
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: line_items
list:
- name: item_name
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: item_price
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: item_quantity
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: image
dtype: image
splits:
- name: train
num_bytes: 8309148507
num_examples: 3241
download_size: 8855834580
dataset_size: 8309148507
- config_name: handwriting_ocr
features:
- name: subset
dtype: string
- name: image_id
dtype: string
- name: filename
dtype: string
- name: polygons
list:
- name: polygon_id
dtype: string
- name: polygon
list:
list: float64
- name: text
dtype: string
- name: direction
dtype: string
- name: question
dtype: string
- name: answer
dtype: string
- name: evidence
list: string
- name: tool
dtype: string
- name: writer_id
dtype: int64
- name: fields
struct:
- name: store_name
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: store_address
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: receipt_id
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: date
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: time
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: total_amount
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: tax_amount
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: line_items
list:
- name: item_name
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: item_price
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: item_quantity
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: image
dtype: image
splits:
- name: train
num_bytes: 2547248088
num_examples: 1065
download_size: 2558935770
dataset_size: 2547248088
- config_name: receipt_kie
features:
- name: subset
dtype: string
- name: image_id
dtype: string
- name: filename
dtype: string
- name: polygons
list:
- name: polygon_id
dtype: string
- name: polygon
list:
list: float64
- name: text
dtype: string
- name: direction
dtype: string
- name: question
dtype: string
- name: answer
dtype: string
- name: evidence
list: string
- name: tool
dtype: string
- name: writer_id
dtype: int64
- name: fields
struct:
- name: store_name
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: store_address
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: receipt_id
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: date
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: time
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: total_amount
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: tax_amount
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: line_items
list:
- name: item_name
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: item_price
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: item_quantity
struct:
- name: value
dtype: string
- name: polygon_ids
list: string
- name: image
dtype: image
splits:
- name: train
num_bytes: 3842464017
num_examples: 1151
download_size: 3722180644
dataset_size: 3842464017
configs:
- config_name: board_vqa
data_files:
- split: train
path: board_vqa/train-*
- config_name: default
data_files:
- split: train
path: data/train-*
- config_name: handwriting_ocr
data_files:
- split: train
path: handwriting_ocr/train-*
- config_name: receipt_kie
data_files:
- split: train
path: receipt_kie/train-*
license: apache-2.0
language:
- ja
size_categories:
- 1K<n<10K
---
提供机构:
llm-jp



