"""
coco_to_labelme.py
Converts a COCO dataset to labelme format (one .json per image file).
If you want to convert YOLO-formatted data to labelme format, use yolo_to_coco, then
coco_to_labelme.
"""
#%% Imports and constants
import os
import json
import sys
import argparse
from tqdm import tqdm
from collections import defaultdict
from megadetector.visualization.visualization_utils import open_image
from megadetector.detection.run_detector import FAILURE_IMAGE_OPEN
from megadetector.utils import ct_utils
#%% Functions
[docs]
def get_labelme_dict_for_image_from_coco_record(im,annotations,categories,info=None):
"""
For the given image struct in COCO format and associated list of annotations, reformats the
detections into labelme format.
Args:
im (dict): image dict, as loaded from a COCO .json file; 'height' and 'width' are required
annotations (list): a list of annotations that refer to this image (this function errors if
that's not the case)
categories (list): a list of category in dicts in COCO format ({'id':x,'name':'s'})
info (dict, optional): a dict to store in a non-standard "custom_info" field in the output
Returns:
dict: a dict in labelme format, suitable for writing to a labelme .json file
"""
image_base_name = os.path.basename(im['file_name'])
output_dict = {}
if info is not None:
output_dict['custom_info'] = info
output_dict['version'] = '5.3.0a0'
output_dict['flags'] = {}
output_dict['shapes'] = []
output_dict['imagePath'] = image_base_name
output_dict['imageHeight'] = im['height']
output_dict['imageWidth'] = im['width']
output_dict['imageData'] = None
# Store COCO categories in case we want to reconstruct the original IDs later
output_dict['coco_categories'] = categories
category_id_to_name = {c['id']:c['name'] for c in categories}
if 'flags' in im:
output_dict['flags'] = im['flags']
# ann = annotations[0]
for ann in annotations:
assert ann['image_id'] == im['id'], 'Annotation {} does not refer to image {}'.format(
ann['id'],im['id'])
if 'bbox' not in ann:
continue
shape = {}
shape['label'] = category_id_to_name[ann['category_id']]
shape['shape_type'] = 'rectangle'
shape['description'] = ''
shape['group_id'] = None
# COCO boxes are [x_min, y_min, width_of_box, height_of_box] (absolute)
#
# labelme boxes are [[x0,y0],[x1,y1]] (absolute)
x0 = ann['bbox'][0]
y0 = ann['bbox'][1]
x1 = ann['bbox'][0] + ann['bbox'][2]
y1 = ann['bbox'][1] + ann['bbox'][3]
shape['points'] = [[x0,y0],[x1,y1]]
output_dict['shapes'].append(shape)
# ...for each detection
return output_dict
# ...def get_labelme_dict_for_image()
[docs]
def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check=False,verbose=False):
"""
For all the images in [coco_data] (a dict or a filename), write a .json file in
labelme format alongside the corresponding relative path within image_base.
Args:
coco_data (dict or str): path to a COCO-formatted .json file, or an already-loaded
COCO-formatted dict
image_base (str): path where images live (filenames in [coco_data] should be relative to
[image_base]); this is also where labelme files will be written
overwrite (bool, optional): overwrite existing .json files
bypass_image_size_check (bool, optional): if you're sure that the COCO data already has
correct 'width' and 'height' fields, this bypasses the somewhat-slow loading of
each image to fetch image sizes
verbose (bool, optional): enable additional debug output
"""
# Load COCO data if necessary
if isinstance(coco_data,str):
with open(coco_data,'r') as f:
coco_data = json.load(f)
assert isinstance(coco_data,dict)
## Read image sizes if necessary
if bypass_image_size_check:
print('Bypassing size check')
else:
# TODO: parallelize this loop
print('Reading/validating image sizes...')
# im = coco_data['images'][0]
for im in tqdm(coco_data['images']):
# Make sure this file exists
im_full_path = os.path.join(image_base,im['file_name'])
assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
# Load w/h information if necessary
if 'height' not in im or 'width' not in im:
try:
pil_im = open_image(im_full_path)
im['width'] = pil_im.width
im['height'] = pil_im.height
except Exception:
print('Warning: cannot open image {}'.format(im_full_path))
if 'failure' not in im:
im['failure'] = FAILURE_IMAGE_OPEN
# ...if we need to read w/h information
# ...for each image
# ...if we need to load image sizes
## Generate labelme files
print('Generating .json files...')
image_id_to_annotations = defaultdict(list)
for ann in coco_data['annotations']:
image_id_to_annotations[ann['image_id']].append(ann)
n_json_files_written = 0
n_json_files_error = 0
n_json_files_exist = 0
# Write output
for im in tqdm(coco_data['images']):
# Skip this image if it failed to load in whatever system generated this COCO file
skip_image = False
# Errors are represented differently depending on the source
for error_string in ('failure','error'):
if (error_string in im) and (im[error_string] is not None):
if verbose:
print('Warning: skipping labelme file generation for failed image {}'.format(
im['file_name']))
skip_image = True
n_json_files_error += 1
break
if skip_image:
continue
im_full_path = os.path.join(image_base,im['file_name'])
json_path = os.path.splitext(im_full_path)[0] + '.json'
if (not overwrite) and (os.path.isfile(json_path)):
if verbose:
print('Skipping existing file {}'.format(json_path))
n_json_files_exist += 1
continue
annotations_this_image = image_id_to_annotations[im['id']]
output_dict = get_labelme_dict_for_image_from_coco_record(im,
annotations_this_image,
coco_data['categories'],
info=None)
n_json_files_written += 1
ct_utils.write_json(json_path, output_dict)
# ...for each image
print('\nWrote {} .json files (skipped {} for errors, {} because they exist)'.format(
n_json_files_written,n_json_files_error,n_json_files_exist))
# ...def coco_to_labelme()
#%% Interactive driver
if False:
pass
#%% Configure options
coco_file = \
r'C:\\temp\\snapshot-exploration\\images\\training-images-good\\training-images-good_from_yolo.json'
image_folder = os.path.dirname(coco_file)
overwrite = True
#%% Programmatic execution
coco_to_labelme(coco_data=coco_file,image_base=image_folder,overwrite=overwrite)
#%% Command-line execution
s = 'python coco_to_labelme.py "{}" "{}"'.format(coco_file,image_folder)
if overwrite:
s += ' --overwrite'
print(s)
import clipboard; clipboard.copy(s)
#%% Opening labelme
s = 'python labelme {}'.format(image_folder)
print(s)
import clipboard; clipboard.copy(s)
#%% Command-line driver
def main(): # noqa
parser = argparse.ArgumentParser(
description='Convert a COCO database to labelme annotation format')
parser.add_argument(
'coco_file',
type=str,
help='Path to COCO data file (.json)')
parser.add_argument(
'image_base',
type=str,
help='Path to images (also the output folder)')
parser.add_argument(
'--overwrite',
action='store_true',
help='Overwrite existing labelme .json files')
if len(sys.argv[1:]) == 0:
parser.print_help()
parser.exit()
args = parser.parse_args()
coco_to_labelme(coco_data=args.coco_file,image_base=args.image_base,overwrite=args.overwrite)
if __name__ == '__main__':
main()