Qwen-Image-Self-Generated-Dataset
收藏魔搭社区2026-05-23 更新2025-08-16 收录
下载链接:
https://modelscope.cn/datasets/DiffSynth-Studio/Qwen-Image-Self-Generated-Dataset
下载链接
链接失效反馈官方服务:
资源简介:
# Generated Image Dataset from Qwen-Image
## Intruduction
This is an image dataset generated using the Qwen-Image model, with a total of 160,000 `1024 x 1024` images, including the general data subset (diffusion_db subset), the English text rendering data subset (en_text_anywordlaion subset), and the Chinese text rendering data subset (zhtext_anywordlaion subset).
The input prompts come from DiffusionDB and AnyWord.
||||
|-|-|-|
||||
We provide caption, entity and control images annotations for each image. The annotation model we used is `Qwen2.5-VL-72B-Instruct`.
### Caption Annotations
We provide caption annotations for each image in `image_captions.jsonl`.
Example:
```
image_path: diffusion_db/000469.png
chinese_short_description: 戴着墨镜的皮卡丘。
chinese_long_description: 这是一张展示了一只可爱的皮卡丘的图片,皮卡丘是黄色的,有着标志性的红色脸颊和闪电形状的尾巴。它戴着一副黑色的墨镜,显得非常酷炫和时尚。皮卡丘的表情平静,微微微笑,整体氛围轻松愉快。背景是深色的,突出了皮卡丘的形象,使其更加引人注目。
english_short_description: Pikachu wearing sunglasses.
english_long_description: This image features a charming Pikachu, the iconic yellow Pokémon with distinctive red cheeks and a lightning bolt-shaped tail. Pikachu is adorned with a pair of black sunglasses, giving it a cool and stylish appearance. Its expression is calm and slightly smiling, creating a relaxed and cheerful atmosphere. The background is dark, which effectively highlights Pikachu's figure, making it stand out prominently.
```
### Entity Annotations
We provide entity-level annotations for each image in `image_entities.jsonl`.
Example:
```
image_path: diffusion_db/000038.png
caption: A young woman in a witch hat and orange dress stands with hands clasped, surrounded by vibrant autumn flowers at the hem of her dress.
entities: [{"entity": "Young woman in witch attire", "bboxes": [[0.36486486486486486, 0.0472972972972973, 0.6032818532818532, 0.8204633204633205]]}, {"entity": "Vibrant autumn flowers", "bboxes": [[0.24034749034749034, 0.48359073359073357, 0.6959459459459459, 0.9536679536679536]]}]
```
### Control Image Annotations
We provide control images for each image in `control_images.jsonl`.
Example:
```
image_path: diffusion_db/053769.png
control_images:
canny: control_images_new/canny/diffusion_db/053769.png
depth: control_images_new/depth/diffusion_db/053769.png
softedge: control_images_new/softedge/diffusion_db/053769.png
lineart: control_images_new/lineart/diffusion_db/053769.png
lineart_anime: control_images_new/lineart_anime/diffusion_db/053769.png
openpose: control_images_new/openpose/diffusion_db/053769.png
normal: control_images_new/normal/diffusion_db/053769.png
```
### Text Quality Assessment
For the Chinese and English text-render image subsets, we used VLM to perform text quality assessment. Images containing **text that is too small, incorrect text, or overlapping text elements** are considered low-quality. The quality assessment results can be found in the `low_quality_text` field of `image_text_quality.jsonl`.
Example:
```
image_path: en_text_anywordlaion/0036144.png
low_quality_text: false
```
## Download
```
pip install modelscope
modelscope download DiffSynth-Studio/Qwen-Image-Self-Generated-Dataset --repo-type dataset --local_dir qwen_image_data
```
## How To Use
For annotations, see above.
Image files are stored in the files, we provide a script to unzip the images from tar files to subfolders in `./images`:
```python
import os
import subprocess
from tqdm import tqdm
import multiprocessing
import glob
source_tar_dir = "tars"
target_image_dir = "images"
os.makedirs(target_image_dir, exist_ok=True)
def unpack_tar(tar_path, dest_dir):
cmd = [
"tar",
"xf",
tar_path,
"-C", dest_dir
]
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return os.path.join(dest_dir, os.path.basename(tar_path).replace('.tar', ''))
def unpack_split_tar(split_prefix, dest_dir):
cmd = f"cat {split_prefix}* | tar xf - -C {dest_dir}"
subprocess.run(cmd, shell=True, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return os.path.join(dest_dir, os.path.basename(split_prefix))
def process_tar(args):
tar_path, dest_dir = args
if '.tar.part_' in tar_path:
base_name = os.path.basename(tar_path).split('.tar.part_')[0]
split_prefix = os.path.join(os.path.dirname(tar_path), f"{base_name}.tar.part_")
return unpack_split_tar(split_prefix, dest_dir)
else:
return unpack_tar(tar_path, dest_dir)
all_tar_files = []
files = os.listdir(source_tar_dir)
processed_files = set()
for f in files:
if f.endswith('.tar.part_aa'):
base_name = f.split('.tar.part_')[0]
split_prefix = os.path.join(source_tar_dir, f"{base_name}.tar.part_")
all_tar_files.append((split_prefix, target_image_dir))
for part_file in glob.glob(f"{split_prefix}*"):
processed_files.add(os.path.basename(part_file))
elif f.endswith('.tar') and f not in processed_files:
if '.tar.part_' not in f:
all_tar_files.append((os.path.join(source_tar_dir, f), target_image_dir))
num_processes = min(50, multiprocessing.cpu_count())
with multiprocessing.Pool() as pool:
results = []
print(f"Total TAR files to unpack: {len(all_tar_files)}")
with tqdm(total=len(all_tar_files), desc="Unpacking TAR files") as pbar:
for res in pool.imap_unordered(process_tar, all_tar_files):
pbar.update(1)
results.append(res)
print(f"\nAll TAR files unpacked to: {target_image_dir}")
print(f"Total unpacked folders: {len(results)}")
```
Furthermore, you can verify file correctness using the following code and inject your absolute path into the annotations:
```python
import os
import random
import json
from tqdm import tqdm
def read_jsonl(file_path, num_samples=None):
print(f"reading from {file_path}")
data_list = []
samples = 0
with open(file_path, 'r', encoding='utf-8') as file:
for line in tqdm(file):
data = json.loads(line.strip())
data_list.append(data)
samples += 1
if num_samples is not None and samples >= num_samples:
break
print(f"read {len(data_list)} samples")
return data_list
root_path = 'path_to_images'
datas = read_jsonl('image_captions.jsonl')
random.shuffle(datas)
for data in datas[:1000]:
image_path = data['image_path']
full_path = os.path.join(root_path, image_path)
assert os.path.exists(full_path), f"Image does not exist: {full_path}"
print(f"All images exists")
```
# 基于Qwen-Image生成的图像数据集
## 数据集简介
本数据集由Qwen-Image模型生成,总计包含160000张分辨率为`1024×1024`的图像,涵盖三大子集:通用数据子集(diffusion_db子集)、英文文本渲染数据子集(en_text_anywordlaion子集)以及中文文本渲染数据子集(zhtext_anywordlaion子集)。输入提示词源自DiffusionDB与AnyWord。
以下为各子集的示例图示:
||||
我们为每张图像提供了标题标注、实体标注与可控图像标注,本次标注所使用的模型为`Qwen2.5-VL-72B-Instruct`。
### 标题标注
我们在`image_captions.jsonl`文件中为每张图像提供了标题标注。示例如下:
image_path: diffusion_db/000469.png
chinese_short_description: 戴着墨镜的皮卡丘。
chinese_long_description: 这是一张展示了一只可爱的皮卡丘的图片,皮卡丘是黄色的,有着标志性的红色脸颊和闪电形状的尾巴。它戴着一副黑色的墨镜,显得非常酷炫和时尚。皮卡丘的表情平静,微微微笑,整体氛围轻松愉快。背景是深色的,突出了皮卡丘的形象,使其更加引人注目。
english_short_description: Pikachu wearing sunglasses.
english_long_description: This image features a charming Pikachu, the iconic yellow Pokémon with distinctive red cheeks and a lightning bolt-shaped tail. Pikachu is adorned with a pair of black sunglasses, giving it a cool and stylish appearance. Its expression is calm and slightly smiling, creating a relaxed and cheerful atmosphere. The background is dark, which effectively highlights Pikachu's figure, making it stand out prominently.
### 实体标注
我们在`image_entities.jsonl`文件中为每张图像提供了实体级标注。示例如下:
image_path: diffusion_db/000038.png
caption: A young woman in a witch hat and orange dress stands with hands clasped, surrounded by vibrant autumn flowers at the hem of her dress.
entities: [{"entity": "Young woman in witch attire", "bboxes": [[0.36486486486486486, 0.0472972972972973, 0.6032818532818532, 0.8204633204633205]]}, {"entity": "Vibrant autumn flowers", "bboxes": [[0.24034749034749034, 0.48359073359073357, 0.6959459459459459, 0.9536679536679536]]}]
### 可控图像标注
我们在`control_images.jsonl`文件中为每张图像提供了可控图像标注。示例如下:
image_path: diffusion_db/053769.png
control_images:
canny: control_images_new/canny/diffusion_db/053769.png
depth: control_images_new/depth/diffusion_db/053769.png
softedge: control_images_new/softedge/diffusion_db/053769.png
lineart: control_images_new/lineart/diffusion_db/053769.png
lineart_anime: control_images_new/lineart_anime/diffusion_db/053769.png
openpose: control_images_new/openpose/diffusion_db/053769.png
normal: control_images_new/normal/diffusion_db/053769.png
### 文本质量评估
针对中英文文本渲染图像子集,我们使用视觉语言模型(Vision Language Model,VLM)开展了文本质量评估。若图像存在**文本过小、文本错误或文本元素重叠**的问题,则被判定为低质量图像。质量评估结果存储于`image_text_quality.jsonl`的`low_quality_text`字段中。示例如下:
image_path: en_text_anywordlaion/0036144.png
low_quality_text: false
## 数据集下载
我们提供了如下下载命令:
bash
pip install modelscope
modelscope download DiffSynth-Studio/Qwen-Image-Self-Generated-Dataset --repo-type dataset --local_dir qwen_image_data
## 使用方法
标注格式详见前文说明。
图像文件存储于压缩包中,我们提供了用于将tar包解压至`./images`子文件夹的脚本:
python
import os
import subprocess
from tqdm import tqdm
import multiprocessing
import glob
source_tar_dir = "tars"
target_image_dir = "images"
os.makedirs(target_image_dir, exist_ok=True)
def unpack_tar(tar_path, dest_dir):
cmd = [
"tar",
"xf",
tar_path,
"-C", dest_dir
]
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return os.path.join(dest_dir, os.path.basename(tar_path).replace('.tar', ''))
def unpack_split_tar(split_prefix, dest_dir):
cmd = f"cat {split_prefix}* | tar xf - -C {dest_dir}"
subprocess.run(cmd, shell=True, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return os.path.join(dest_dir, os.path.basename(split_prefix))
def process_tar(args):
tar_path, dest_dir = args
if '.tar.part_' in tar_path:
base_name = os.path.basename(tar_path).split('.tar.part_')[0]
split_prefix = os.path.join(os.path.dirname(tar_path), f"{base_name}.tar.part_")
return unpack_split_tar(split_prefix, dest_dir)
else:
return unpack_tar(tar_path, dest_dir)
all_tar_files = []
files = os.listdir(source_tar_dir)
processed_files = set()
for f in files:
if f.endswith('.tar.part_aa'):
base_name = f.split('.tar.part_')[0]
split_prefix = os.path.join(source_tar_dir, f"{base_name}.tar.part_")
all_tar_files.append((split_prefix, target_image_dir))
for part_file in glob.glob(f"{split_prefix}*"):
processed_files.add(os.path.basename(part_file))
elif f.endswith('.tar') and f not in processed_files:
if '.tar.part_' not in f:
all_tar_files.append((os.path.join(source_tar_dir, f), target_image_dir))
num_processes = min(50, multiprocessing.cpu_count())
with multiprocessing.Pool() as pool:
results = []
print(f"Total TAR files to unpack: {len(all_tar_files)}")
with tqdm(total=len(all_tar_files), desc="Unpacking TAR files") as pbar:
for res in pool.imap_unordered(process_tar, all_tar_files):
pbar.update(1)
results.append(res)
print(f"
All TAR files unpacked to: {target_image_dir}")
print(f"Total unpacked folders: {len(results)}")
此外,您可通过如下代码验证文件完整性,并将绝对路径注入标注文件中:
python
import os
import random
import json
from tqdm import tqdm
def read_jsonl(file_path, num_samples=None):
print(f"reading from {file_path}")
data_list = []
samples = 0
with open(file_path, 'r', encoding='utf-8') as file:
for line in tqdm(file):
data = json.loads(line.strip())
data_list.append(data)
samples += 1
if num_samples is not None and samples >= num_samples:
break
print(f"read {len(data_list)} samples")
return data_list
root_path = 'path_to_images'
datas = read_jsonl('image_captions.jsonl')
random.shuffle(datas)
for data in datas[:1000]:
image_path = data['image_path']
full_path = os.path.join(root_path, image_path)
assert os.path.exists(full_path), f"Image does not exist: {full_path}"
print(f"All images exists")
提供机构:
maas
创建时间:
2025-08-14



