Source code for megadetector.data_management.labelme_to_yolo

"""

labelme_to_yolo.py

Create YOLO .txt files in a folder containing labelme .json files.

"""

#%% Imports

import os
import json
import argparse

from multiprocessing.pool import Pool, ThreadPool
from functools import partial
from tqdm import tqdm

from megadetector.utils.path_utils import recursive_file_list
from megadetector.utils.ct_utils import write_json


#%% Main function

[docs] def labelme_file_to_yolo_file(labelme_file, category_name_to_category_id, yolo_file=None, required_token=None, overwrite_behavior='overwrite'): """ Convert the single .json file labelme_file to yolo format, writing the results to the text file yolo_file (defaults to s/json/txt). If required_token is not None and the dict in labelme_file does not contain the key [required_token], this function no-ops (i.e., does not generate a YOLO file). overwrite_behavior should be 'skip' or 'overwrite' (default). Args: labelme_file (str): .json file to convert category_name_to_category_id (dict): category name --> ID mapping yolo_file (str, optional): output .txt file defaults to s/json/txt required_token (str, optional): only process filenames containing this token overwrite_behavior (str, optional): "skip" or "overwrite" """ result = {} result['labelme_file'] = labelme_file result['status'] = 'unknown' assert os.path.isfile(labelme_file), 'Could not find labelme .json file {}'.format(labelme_file) assert labelme_file.endswith('.json'), 'Illegal labelme .json file {}'.format(labelme_file) if yolo_file is None: yolo_file = os.path.splitext(labelme_file)[0] + '.txt' if os.path.isfile(yolo_file): if overwrite_behavior == 'skip': result['status'] = 'skip-exists' return result else: assert overwrite_behavior == 'overwrite', \ 'Unrecognized overwrite behavior {}'.format(overwrite_behavior) with open(labelme_file,'r') as f: labelme_data = json.load(f) if required_token is not None and required_token not in labelme_data: result['status'] = 'skip-no-required-token' return result im_height = labelme_data['imageHeight'] im_width = labelme_data['imageWidth'] yolo_lines = [] for shape in labelme_data['shapes']: assert shape['shape_type'] == 'rectangle', \ 'I only know how to convert rectangles to YOLO format' assert shape['label'] in category_name_to_category_id, \ 'Category {} not in category mapping'.format(shape['label']) assert len(shape['points']) == 2, 'Illegal rectangle' category_id = category_name_to_category_id[shape['label']] p0 = shape['points'][0] p1 = shape['points'][1] # Labelme: [[x0,y0],[x1,y1]] (arbitrarily sorted) (absolute coordinates) # # YOLO: [class, x_center, y_center, width, height] (normalized coordinates) minx_abs = min(p0[0],p1[0]) maxx_abs = max(p0[0],p1[0]) miny_abs = min(p0[1],p1[1]) maxy_abs = max(p0[1],p1[1]) if (minx_abs >= (im_width-1)) or (maxx_abs <= 0) or \ (miny_abs >= (im_height-1)) or (maxy_abs <= 0): print('Skipping invalid shape in {}'.format(labelme_file)) continue # Clip to [0,1]... it's not obvious that the YOLO format doesn't allow bounding # boxes to extend outside the image, but YOLOv5 and YOLOv8 get sad about boxes # that extend outside the image. maxx_abs = min(maxx_abs,im_width-1) maxy_abs = min(maxy_abs,im_height-1) minx_abs = max(minx_abs,0.0) miny_abs = max(miny_abs,0.0) # Handle degenerate cases where image is one pixel wide if im_width == 1: minx_rel = 0.0 maxx_rel = 0.0 else: minx_rel = minx_abs / (im_width-1) maxx_rel = maxx_abs / (im_width-1) # Handle degenerate cases where image is one pixel tall if im_height == 1: miny_rel = 0.0 maxy_rel = 0.0 else: miny_rel = miny_abs / (im_height-1) maxy_rel = maxy_abs / (im_height-1) assert maxx_rel >= minx_rel assert maxy_rel >= miny_rel xcenter_rel = (maxx_rel + minx_rel) / 2.0 ycenter_rel = (maxy_rel + miny_rel) / 2.0 w_rel = maxx_rel - minx_rel h_rel = maxy_rel - miny_rel yolo_line = '{} {:.3f} {:.3f} {:.3f} {:.3f}'.format(category_id, xcenter_rel, ycenter_rel, w_rel, h_rel) yolo_lines.append(yolo_line) # ...for each shape with open(yolo_file,'w') as f: for s in yolo_lines: f.write(s + '\n') result['status'] = 'converted' return result
[docs] def labelme_folder_to_yolo(labelme_folder, category_name_to_category_id=None, required_token=None, overwrite_behavior='overwrite', relative_filenames_to_convert=None, n_workers=1, use_threads=True): """ Given a folder with images and labelme .json files, convert the .json files to YOLO .txt format. If category_name_to_category_id is None, first reads all the labels in the folder to build a zero-indexed name --> ID mapping. If required_token is not None and a labelme_file does not contain the key [required_token], it won't be converted. Typically used to specify a field that indicates which files have been reviewed. If relative_filenames_to_convert is not None, this should be a list of .json (not image) files that should get converted, relative to the base folder. overwrite_behavior should be 'skip' or 'overwrite' (default). returns a dict with: 'category_name_to_category_id', whether it was passed in or constructed 'image_results': a list of results for each image (converted, skipped, error) Args: labelme_folder (str): folder of .json files to convert category_name_to_category_id (dict): category name --> ID mapping required_token (str, optional): only process filenames containing this token overwrite_behavior (str, optional): "skip" or "overwrite" relative_filenames_to_convert (list of str, optional): only process filenames on this list n_workers (int, optional): parallelism level use_threads (bool, optional): whether to use threads (True) or processes (False) for parallelism """ if relative_filenames_to_convert is not None: labelme_files_relative = relative_filenames_to_convert assert all([fn.endswith('.json') for fn in labelme_files_relative]), \ 'relative_filenames_to_convert contains non-json files' else: labelme_files_relative = recursive_file_list(labelme_folder,return_relative_paths=True) labelme_files_relative = [fn for fn in labelme_files_relative if fn.endswith('.json')] if required_token is None: valid_labelme_files_relative = labelme_files_relative else: valid_labelme_files_relative = [] # fn_relative = labelme_files_relative[-1] for fn_relative in labelme_files_relative: fn_abs = os.path.join(labelme_folder,fn_relative) with open(fn_abs,'r') as f: labelme_data = json.load(f) if required_token not in labelme_data: continue valid_labelme_files_relative.append(fn_relative) print('{} of {} files are valid'.format(len(valid_labelme_files_relative), len(labelme_files_relative))) del labelme_files_relative if category_name_to_category_id is None: category_name_to_category_id = {} for fn_relative in valid_labelme_files_relative: fn_abs = os.path.join(labelme_folder,fn_relative) with open(fn_abs,'r') as f: labelme_data = json.load(f) for shape in labelme_data['shapes']: label = shape['label'] if label not in category_name_to_category_id: category_name_to_category_id[label] = len(category_name_to_category_id) # ...for each file # ...if we need to build a category mapping image_results = [] n_workers = min(n_workers,len(valid_labelme_files_relative)) if n_workers <= 1: for fn_relative in tqdm(valid_labelme_files_relative): fn_abs = os.path.join(labelme_folder,fn_relative) image_result = labelme_file_to_yolo_file(fn_abs, category_name_to_category_id, yolo_file=None, required_token=required_token, overwrite_behavior=overwrite_behavior) image_results.append(image_result) # ...for each file else: pool = None try: if use_threads: pool = ThreadPool(n_workers) else: pool = Pool(n_workers) valid_labelme_files_abs = [os.path.join(labelme_folder,fn_relative) for \ fn_relative in valid_labelme_files_relative] image_results = list(tqdm(pool.imap( partial(labelme_file_to_yolo_file, category_name_to_category_id=category_name_to_category_id, yolo_file=None, required_token=required_token, overwrite_behavior=overwrite_behavior), valid_labelme_files_abs), total=len(valid_labelme_files_abs))) finally: if pool is not None: pool.close() pool.join() print('Pool closed and joined for labelme conversion to YOLO') assert len(valid_labelme_files_relative) == len(image_results) print('Converted {} labelme .json files to YOLO'.format( len(valid_labelme_files_relative))) labelme_to_yolo_results = {} labelme_to_yolo_results['category_name_to_category_id'] = category_name_to_category_id labelme_to_yolo_results['image_results'] = image_results return labelme_to_yolo_results
# ...def labelme_folder_to_yolo(...) #%% Command-line driver def main(): """ Command-line interface to convert Labelme JSON files to YOLO format """ parser = argparse.ArgumentParser( description='Convert a folder of Labelme .json files to YOLO .txt format' ) parser.add_argument( 'labelme_folder', type=str, help='Folder of Labelme .json files to convert' ) parser.add_argument( '--output_category_file', type=str, default=None, help='Path to save the generated category mapping (.json)' ) parser.add_argument( '--required_token', type=str, default=None, help='Only process files containing this token as a key in the Labelme JSON dict' ) parser.add_argument( '--overwrite_behavior', type=str, default='overwrite', choices=['skip', 'overwrite'], help="Behavior if YOLO .txt files exist (default: 'overwrite')" ) parser.add_argument( '--n_workers', type=int, default=1, help='Number of workers for parallel processing (default: 1)' ) parser.add_argument( '--use_processes', action='store_true', help='Use processes instead of threads for parallelization (defaults to threads)' ) args = parser.parse_args() results = labelme_folder_to_yolo( labelme_folder=args.labelme_folder, category_name_to_category_id=None, required_token=args.required_token, overwrite_behavior=args.overwrite_behavior, relative_filenames_to_convert=None, n_workers=args.n_workers, use_threads=(not args.use_processes) ) if args.output_category_file: category_map = results['category_name_to_category_id'] write_json(args.output_category_file,category_map) print(f'Saved category mapping to {args.output_category_file}') if __name__ == '__main__': main()