Labelme批量转换json文件(代码注释)
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import argparse
import base64
import json
import os
import os.path as osp
import warnings
import PIL.Image
import yaml
from labelme import utils
def main():
warnings.warn("This script is aimed to demonstrate how to convert the\n"
"JSON file to a single image dataset, and not to handle\n"
"multiple JSON files to generate a real-use dataset.")
parser = argparse.ArgumentParser() # 创建一个解释对象
parser.add_argument('json_file') # 向该对象中添加你要关注的命令行参数和选项
parser.add_argument('-o', '--out', default=None)
args = parser.parse_args() # 进行解析
json_file = args.json_file
if args.out is None:
# os.path.basename(path),返回path最后的文件名
# Python replace() 方法把字符串中的 old(旧字符串) 替换成 new(新字符串),如果指定第三个参数max,则替换不超过
out_dir = osp.basename(json_file).replace('.', '_')
# os.path.join(path1[, path2[, ...]]),将多个路径组合后返回,第一个绝对路径之前的参数将被忽略。
# os.path.dirname(path),去掉文件名,返回目录
out_dir = osp.join(osp.dirname(json_file), out_dir)
else:
out_dir = args.out
# os.path.exists(path) ,如果path存在,返回True;如果path不存在,返回False。
if not osp.exists(out_dir):
os.mkdir(out_dir)
# Json 模块提供了四个方法: dumps、dump、loads、load
# load 只接收文件描述符,完成了读取文件和反序列化
# python open() 函数用于打开一个文件
data = json.load(open(json_file))
if data['imageData']:
imageData = data['imageData']
else:
# os.path.join(path1[, path2[, ...]]),将多个路径组合后返回,第一个绝对路径之前的参数将被忽略。
# os.path.dirname(path),去掉文件名,返回目录
imagePath = os.path.join(os.path.dirname(json_file), data['imagePath'])
# python open() 函数用于打开一个文件
with open(imagePath, 'rb') as f:
imageData = f.read()
# base64编码是将二进制字节流编码为可打印的64个字符
# 使用decode()和encode()来进行解码和编码,以utf-8编码对unicode对像进行编码
imageData = base64.b64encode(imageData).decode('utf-8')
img = utils.img_b64_to_arr(imageData)
label_name_to_value = {'_background_': 0}
# sorted() 函数对所有可迭代的对象进行排序操作。
for shape in sorted(data['shapes'], key=lambda x: x['label']):
label_name = shape['label']
if label_name in label_name_to_value:
label_value = label_name_to_value[label_name]
else:
# len() 方法返回对象(字符、列表、元组等)长度或项目个数。
label_value = len(label_name_to_value)
label_name_to_value[label_name] = label_value
lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value)
# max() 方法返回给定参数的最大值,参数可以为序列。
label_names = [None] * (max(label_name_to_value.values()) + 1)
for name, value in label_name_to_value.items():
label_names[value] = name
lbl_viz = utils.draw_label(lbl, img, label_names)
# PIL (Python Imaging Library)是 Python 中最常用的图像处理库
# os.path.join(path1[, path2[, ...]]),将多个路径组合后返回,第一个绝对路径之前的参数将被忽略。
PIL.Image.fromarray(img).save(osp.join(out_dir, 'img.png'))
utils.lblsave(osp.join(out_dir, 'label.png'), lbl)
PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, 'label_viz.png'))
with open(osp.join(out_dir, 'label_names.txt'), 'w') as f:
for lbl_name in label_names:
f.write(lbl_name + '\n')
warnings.warn('info.yaml is being replaced by label_names.txt')
info = dict(label_names=label_names)
with open(osp.join(out_dir, 'info.yaml'), 'w') as f:
#
yaml.safe_dump(info, f, default_flow_style=False)
print('Saved to: %s' % out_dir)
if __name__ == '__main__':
main()