vietnamese_handwritten
收藏数据集概述
数据下载与解压
数据集文件可通过以下Python代码下载并解压:
python import requests import zipfile import os
url = https://huggingface.co/datasets/Daominhwysi/VNonDB/resolve/main/vn_handwritten_images.zip?download=true file_name = vn_handwritten_images.zip
response = requests.get(url, stream=True)
if response.status_code == 200: with open(file_name, wb) as file: for chunk in response.iter_content(chunk_size=8192): file.write(chunk) print(f下载成功: {file_name}) else: print(f无法下载文件。错误代码: {response.status_code})
zip_file_path = /content/vn_handwritten_images.zip extract_to_dir = /content/datasets
if not os.path.exists(extract_to_dir): os.makedirs(extract_to_dir)
with zipfile.ZipFile(zip_file_path, r) as zip_ref: zip_ref.extractall(extract_to_dir)
print(f解压成功到目录: {extract_to_dir})
标签数据读取
从 labels.json 读取标签数据 (CinamonAI)
python import json import pandas as pd import logging
try: with open(/content/datasets/cinamonai/labels.json, r) as file: labels = json.load(file) df_1 = pd.DataFrame(list(labels.items()), columns=[file_name, label]) except Exception as e: logging.error(f"加载标签时出错: {e}")
df_1[file_name] = df_1[file_name].apply(lambda x: f/content/datasets/cinamonai/data/{x}.jpeg)
df_1.head()
从 labels.json 读取标签数据 (VNonDB)
python import json import pandas as pd import logging
try: with open(/content/datasets/vnondb/labels.json, r) as file: labels = json.load(file) df_2 = pd.DataFrame(list(labels.items()), columns=[file_name, label]) except Exception as e: logging.error(f"加载标签时出错: {e}")
df_2[file_name] = df_2[file_name].apply(lambda x: f/content/datasets/vnondb/outputs_image/{x}.jpeg)
df_2.head()
显示图像和标签
从 df_1 显示图像
python import pandas as pd import random from PIL import Image import matplotlib.pyplot as plt
random_row = df_1.sample(n=1).iloc[0] file_path = random_row[file_name] image = Image.open(file_path).convert(RGB) plt.figure(figsize=(20, 10)) plt.imshow(image) plt.axis(off) plt.show()
print(f"标签为: {random_row[label]}")
从 df_2 显示图像
python import pandas as pd import random from PIL import Image import matplotlib.pyplot as plt
random_row = df_2.sample(n=1).iloc[0] file_path = random_row[file_name] image = Image.open(file_path).convert(RGB) plt.figure(figsize=(20, 10)) plt.imshow(image) plt.axis(off) plt.show()
print(f"标签为: {random_row[label]}")
合并两个数据集
python df = pd.concat([df_1, df_2], ignore_index=True) df.head()




