Source code for megadetector.data_management.coco_to_labelme

"""

coco_to_labelme.py

Converts a COCO dataset to labelme format (one .json per image file).

If you want to convert YOLO-formatted data to labelme format, use yolo_to_coco, then
coco_to_labelme.

"""

#%% Imports and constants

import os
import json
import sys
import argparse

from tqdm import tqdm
from collections import defaultdict

from megadetector.visualization.visualization_utils import open_image
from megadetector.detection.run_detector import FAILURE_IMAGE_OPEN
from megadetector.utils import ct_utils


#%% Functions

[docs] def get_labelme_dict_for_image_from_coco_record(im,annotations,categories,info=None): """ For the given image struct in COCO format and associated list of annotations, reformats the detections into labelme format. Args: im (dict): image dict, as loaded from a COCO .json file; 'height' and 'width' are required annotations (list): a list of annotations that refer to this image (this function errors if that's not the case) categories (list): a list of category in dicts in COCO format ({'id':x,'name':'s'}) info (dict, optional): a dict to store in a non-standard "custom_info" field in the output Returns: dict: a dict in labelme format, suitable for writing to a labelme .json file """ image_base_name = os.path.basename(im['file_name']) output_dict = {} if info is not None: output_dict['custom_info'] = info output_dict['version'] = '5.3.0a0' output_dict['flags'] = {} output_dict['shapes'] = [] output_dict['imagePath'] = image_base_name output_dict['imageHeight'] = im['height'] output_dict['imageWidth'] = im['width'] output_dict['imageData'] = None # Store COCO categories in case we want to reconstruct the original IDs later output_dict['coco_categories'] = categories category_id_to_name = {c['id']:c['name'] for c in categories} if 'flags' in im: output_dict['flags'] = im['flags'] # ann = annotations[0] for ann in annotations: assert ann['image_id'] == im['id'], 'Annotation {} does not refer to image {}'.format( ann['id'],im['id']) if 'bbox' not in ann: continue shape = {} shape['label'] = category_id_to_name[ann['category_id']] shape['shape_type'] = 'rectangle' shape['description'] = '' shape['group_id'] = None # COCO boxes are [x_min, y_min, width_of_box, height_of_box] (absolute) # # labelme boxes are [[x0,y0],[x1,y1]] (absolute) x0 = ann['bbox'][0] y0 = ann['bbox'][1] x1 = ann['bbox'][0] + ann['bbox'][2] y1 = ann['bbox'][1] + ann['bbox'][3] shape['points'] = [[x0,y0],[x1,y1]] output_dict['shapes'].append(shape) # ...for each detection return output_dict
# ...def get_labelme_dict_for_image()
[docs] def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check=False,verbose=False): """ For all the images in [coco_data] (a dict or a filename), write a .json file in labelme format alongside the corresponding relative path within image_base. Args: coco_data (dict or str): path to a COCO-formatted .json file, or an already-loaded COCO-formatted dict image_base (str): path where images live (filenames in [coco_data] should be relative to [image_base]); this is also where labelme files will be written overwrite (bool, optional): overwrite existing .json files bypass_image_size_check (bool, optional): if you're sure that the COCO data already has correct 'width' and 'height' fields, this bypasses the somewhat-slow loading of each image to fetch image sizes verbose (bool, optional): enable additional debug output """ # Load COCO data if necessary if isinstance(coco_data,str): with open(coco_data,'r') as f: coco_data = json.load(f) assert isinstance(coco_data,dict) ## Read image sizes if necessary if bypass_image_size_check: print('Bypassing size check') else: # TODO: parallelize this loop print('Reading/validating image sizes...') # im = coco_data['images'][0] for im in tqdm(coco_data['images']): # Make sure this file exists im_full_path = os.path.join(image_base,im['file_name']) assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path) # Load w/h information if necessary if 'height' not in im or 'width' not in im: try: pil_im = open_image(im_full_path) im['width'] = pil_im.width im['height'] = pil_im.height except Exception: print('Warning: cannot open image {}'.format(im_full_path)) if 'failure' not in im: im['failure'] = FAILURE_IMAGE_OPEN # ...if we need to read w/h information # ...for each image # ...if we need to load image sizes ## Generate labelme files print('Generating .json files...') image_id_to_annotations = defaultdict(list) for ann in coco_data['annotations']: image_id_to_annotations[ann['image_id']].append(ann) n_json_files_written = 0 n_json_files_error = 0 n_json_files_exist = 0 # Write output for im in tqdm(coco_data['images']): # Skip this image if it failed to load in whatever system generated this COCO file skip_image = False # Errors are represented differently depending on the source for error_string in ('failure','error'): if (error_string in im) and (im[error_string] is not None): if verbose: print('Warning: skipping labelme file generation for failed image {}'.format( im['file_name'])) skip_image = True n_json_files_error += 1 break if skip_image: continue im_full_path = os.path.join(image_base,im['file_name']) json_path = os.path.splitext(im_full_path)[0] + '.json' if (not overwrite) and (os.path.isfile(json_path)): if verbose: print('Skipping existing file {}'.format(json_path)) n_json_files_exist += 1 continue annotations_this_image = image_id_to_annotations[im['id']] output_dict = get_labelme_dict_for_image_from_coco_record(im, annotations_this_image, coco_data['categories'], info=None) n_json_files_written += 1 ct_utils.write_json(json_path, output_dict) # ...for each image print('\nWrote {} .json files (skipped {} for errors, {} because they exist)'.format( n_json_files_written,n_json_files_error,n_json_files_exist))
# ...def coco_to_labelme() #%% Interactive driver if False: pass #%% Configure options coco_file = \ r'C:\\temp\\snapshot-exploration\\images\\training-images-good\\training-images-good_from_yolo.json' image_folder = os.path.dirname(coco_file) overwrite = True #%% Programmatic execution coco_to_labelme(coco_data=coco_file,image_base=image_folder,overwrite=overwrite) #%% Command-line execution s = 'python coco_to_labelme.py "{}" "{}"'.format(coco_file,image_folder) if overwrite: s += ' --overwrite' print(s) import clipboard; clipboard.copy(s) #%% Opening labelme s = 'python labelme {}'.format(image_folder) print(s) import clipboard; clipboard.copy(s) #%% Command-line driver def main(): # noqa parser = argparse.ArgumentParser( description='Convert a COCO database to labelme annotation format') parser.add_argument( 'coco_file', type=str, help='Path to COCO data file (.json)') parser.add_argument( 'image_base', type=str, help='Path to images (also the output folder)') parser.add_argument( '--overwrite', action='store_true', help='Overwrite existing labelme .json files') if len(sys.argv[1:]) == 0: parser.print_help() parser.exit() args = parser.parse_args() coco_to_labelme(coco_data=args.coco_file,image_base=args.image_base,overwrite=args.overwrite) if __name__ == '__main__': main()