Source code for megadetector.data_management.yolo_to_coco

"""

yolo_to_coco.py

Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.

"""

#%% Imports and constants

import json
import os
import argparse
import sys

from multiprocessing.pool import ThreadPool
from multiprocessing.pool import Pool
from functools import partial

from tqdm import tqdm

from megadetector.utils.path_utils import find_images
from megadetector.utils.path_utils import recursive_file_list
from megadetector.utils.path_utils import find_image_strings
from megadetector.utils.ct_utils import round_float
from megadetector.utils.ct_utils import round_float_array
from megadetector.utils.ct_utils import invert_dictionary
from megadetector.utils.ct_utils import write_json
from megadetector.visualization.visualization_utils import open_image
from megadetector.data_management.yolo_output_to_md_output import \
    read_classes_from_yolo_dataset_file


#%% Support functions

def _filename_to_image_id(fn):
    """
    Image IDs can't have spaces in them, replace spaces with underscores
    """

    return fn.replace(' ','_').replace('\\','/')


def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
    """
    Internal support function for processing one image's labels.
    """

    # Create the image object for this image
    #
    # Always use forward slashes in image filenames and IDs
    image_fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
    image_id = _filename_to_image_id(image_fn_relative)

    # This is done in a separate loop now
    #
    # assert image_id not in image_ids, \
    #    'Oops, you have hit a very esoteric case where you have the same filename ' + \
    #    'with both spaces and underscores, this is not currently handled.'
    # image_ids.add(image_id)

    im = {}
    im['file_name'] = image_fn_relative
    im['id'] = image_id

    annotations_this_image = []

    try:
        pil_im = open_image(fn_abs)
        im_width, im_height = pil_im.size
        im['width'] = im_width
        im['height'] = im_height
        im['error'] = None
    except Exception as e:
        print('Warning: error reading {}:\n{}'.format(image_fn_relative,str(e)))
        im['width'] = -1
        im['height'] = -1
        im['error'] = str(e)
        return (im,annotations_this_image)

    # Is there an annotation file for this image?
    if label_folder is not None:
        assert input_folder in fn_abs, \
            'Annotation file {} is not inside folder {}'.format(
                fn_abs,input_folder)
        label_file_abs_base = fn_abs.replace(input_folder,label_folder)
    else:
        label_file_abs_base = fn_abs

    annotation_file = os.path.splitext(label_file_abs_base)[0] + '.txt'
    if not os.path.isfile(annotation_file):
        annotation_file = os.path.splitext(fn_abs)[0] + '.TXT'

    if os.path.isfile(annotation_file):

        with open(annotation_file,'r') as f:
            lines = f.readlines()
        lines = [s.strip() for s in lines]

        # s = lines[0]
        annotation_number = 0

        for s in lines:

            if len(s.strip()) == 0:
                continue

            tokens = s.split()
            assert len(tokens) == 5, \
                'Illegal line in annotation file {}:\n{}'.format(
                    annotation_file,s)
            category_id = int(tokens[0])
            assert category_id in category_id_to_name, \
                'Unrecognized category ID {} in annotation file {}'.format(
                    category_id,annotation_file)
            ann = {}
            ann['id'] = im['id'] + '_' + str(annotation_number)
            ann['image_id'] = im['id']
            ann['category_id'] = category_id
            ann['sequence_level_annotation'] = False

            # COCO: [x_min, y_min, width, height] in absolute coordinates
            # YOLO: [class, x_center, y_center, width, height] in normalized coordinates

            yolo_bbox = [float(x) for x in tokens[1:]]

            normalized_x_center = yolo_bbox[0]
            normalized_y_center = yolo_bbox[1]
            normalized_width = yolo_bbox[2]
            normalized_height = yolo_bbox[3]

            absolute_x_center = normalized_x_center * im_width
            absolute_y_center = normalized_y_center * im_height
            absolute_width = normalized_width * im_width
            absolute_height = normalized_height * im_height
            absolute_x_min = absolute_x_center - absolute_width / 2
            absolute_y_min = absolute_y_center - absolute_height / 2

            coco_bbox = [absolute_x_min, absolute_y_min, absolute_width, absolute_height]

            ann['bbox'] = coco_bbox
            annotation_number += 1

            annotations_this_image.append(ann)

        # ...for each annotation

    # ...if this image has annotations

    return (im,annotations_this_image)

# ...def _process_image(...)


[docs] def load_yolo_class_list(class_name_file): """ Loads a dictionary mapping zero-indexed IDs to class names from the text/yaml file [class_name_file]. Args: class_name_file (str or list): this can be: - a .yaml or .yaml file in YOLO's dataset.yaml format - a .txt or .data file containing a flat list of class names - a list of class names Returns: dict: A dict mapping zero-indexed integer IDs to class names """ # class_name_file can also be a list of class names if isinstance(class_name_file,list): category_id_to_name = {} for i_name,name in enumerate(class_name_file): category_id_to_name[i_name] = name return category_id_to_name ext = os.path.splitext(class_name_file)[1][1:] assert ext in ('yml','txt','yaml','data'), \ 'Unrecognized class name file type {}'.format( class_name_file) if ext in ('txt','data'): with open(class_name_file,'r') as f: lines = f.readlines() lines = [s.strip() for s in lines] assert len(lines) > 0, \ 'Empty class name file {}'.format(class_name_file) assert len(lines[0]) > 0, \ 'Empty class name file {} (empty first line)'.format(class_name_file) # Blank lines should only appear at the end b_found_blank = False for s in lines: if len(s) == 0: b_found_blank = True elif b_found_blank: raise ValueError('Invalid class name file {}, non-blank line after the last blank line'.format( class_name_file)) category_id_to_name = {} for i_category_id,category_name in enumerate(lines): assert len(category_name) > 0, \ 'Empty category name in file {}'.format(class_name_file) category_id_to_name[i_category_id] = category_name else: assert ext in ('yml','yaml'), \ 'Illegal class name file extension for {}'.format(class_name_file) category_id_to_name = read_classes_from_yolo_dataset_file(class_name_file) return category_id_to_name
# ...load_yolo_class_list(...)
[docs] def validate_label_file(label_file,category_id_to_name=None,verbose=False): """" Verifies that [label_file] is a valid YOLO label file. Does not check the extension. Args: label_file (str): the .txt file to validate category_id_to_name (dict, optional): a dict mapping integer category IDs to names; if this is not None, this function errors if the file uses a category that's not in this dict verbose (bool, optional): enable additional debug console output Returns: dict: a dict with keys 'file' (the same as [label_file]) and 'errors' (a list of errors (if any) that we found in this file) """ label_result = {} label_result['file'] = label_file label_result['errors'] = [] try: with open(label_file,'r') as f: lines = f.readlines() except Exception as e: label_result['errors'].append('Read error: {}'.format(str(e))) return label_result # i_line 0; line = lines[i_line] for i_line,line in enumerate(lines): s = line.strip() if len(s) == 0 or s[0] == '#': continue try: tokens = s.split() assert len(tokens) == 5, \ 'YOLO label lines should have five tokens, found {} on line {} of file {}'.format( len(tokens),i_line,label_file) if category_id_to_name is not None: category_id = int(tokens[0]) assert category_id in category_id_to_name, \ 'Unrecognized category ID {}'.format(category_id) yolo_bbox = [float(x) for x in tokens[1:]] except Exception as e: label_result['errors'].append('Token error at line {}: {}'.format(i_line,str(e))) continue normalized_x_center = yolo_bbox[0] normalized_y_center = yolo_bbox[1] normalized_width = yolo_bbox[2] normalized_height = yolo_bbox[3] normalized_x_min = normalized_x_center - normalized_width / 2.0 normalized_x_max = normalized_x_center + normalized_width / 2.0 normalized_y_min = normalized_y_center - normalized_height / 2.0 normalized_y_max = normalized_y_center + normalized_height / 2.0 if normalized_x_min < 0 or normalized_y_min < 0 or \ normalized_x_max > 1 or normalized_y_max > 1: label_result['errors'].append('Invalid bounding box: {} {} {} {}'.format( normalized_x_min,normalized_y_min,normalized_x_max,normalized_y_max)) # ...for each line if verbose: if len(label_result['errors']) > 0: print('Errors for {}:'.format(label_file)) for error in label_result['errors']: print(error) return label_result
# ...def validate_label_file(...)
[docs] def validate_yolo_dataset(input_folder, class_name_file, n_workers=1, pool_type='thread', verbose=False): """ Verifies all the labels in a YOLO dataset folder. Does not yet support the case where the labels and images are in different folders (yolo_to_coco() supports this). Looks for: * Image files without label files * Text files without image files * Illegal classes in label files * Invalid boxes in label files Args: input_folder (str): the YOLO dataset folder to validate class_name_file (str or list): a list of classes, a flat text file, or a yolo dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to input_folder as the base folder, though this is not explicitly checked. n_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelization pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization; not used if [n_workers] <= 1 verbose (bool, optional): enable additional debug console output Returns: dict: validation results, as a dict with fields: - image_files_without_label_files (list) - label_files_without_image_files (list) - label_results (list of dicts with field 'filename', 'errors') (list) """ # Validate arguments assert os.path.isdir(input_folder), \ 'Could not find input folder {}'.format(input_folder) if n_workers > 1: assert pool_type in ('thread','process'), \ 'Illegal pool type {}'.format(pool_type) category_id_to_name = load_yolo_class_list(class_name_file) print('Enumerating files in {}'.format(input_folder)) all_files = recursive_file_list(input_folder,recursive=True,return_relative_paths=False, convert_slashes=True) label_files = [fn for fn in all_files if fn.endswith('.txt')] image_files = find_image_strings(all_files) print('Found {} images files and {} label files in {}'.format( len(image_files),len(label_files),input_folder)) label_files_set = set(label_files) image_files_without_extension = set() for fn in image_files: image_file_without_extension = os.path.splitext(fn)[0] assert image_file_without_extension not in image_files_without_extension, \ 'Duplicate image file, likely with different extensions: {}'.format(fn) image_files_without_extension.add(image_file_without_extension) print('Looking for missing image/label files') image_files_without_label_files = [] label_files_without_images = [] for image_file in tqdm(image_files): expected_label_file = os.path.splitext(image_file)[0] + '.txt' if expected_label_file not in label_files_set: image_files_without_label_files.append(image_file) for label_file in tqdm(label_files): expected_image_file_without_extension = os.path.splitext(label_file)[0] if expected_image_file_without_extension not in image_files_without_extension: label_files_without_images.append(label_file) print('Found {} image files without labels, {} labels without images'.format( len(image_files_without_label_files),len(label_files_without_images))) print('Validating label files') if n_workers <= 1: label_results = [] for fn_abs in tqdm(label_files): label_results.append(validate_label_file(fn_abs, category_id_to_name=category_id_to_name, verbose=verbose)) else: assert pool_type in ('process','thread'), \ 'Illegal pool type {}'.format(pool_type) pool = None try: if pool_type == 'thread': pool = ThreadPool(n_workers) else: pool = Pool(n_workers) print('Starting a {} pool of {} workers'.format(pool_type,n_workers)) p = partial(validate_label_file, category_id_to_name=category_id_to_name, verbose=verbose) label_results = list(tqdm(pool.imap(p, label_files), total=len(label_files))) finally: if pool is not None: pool.close() pool.join() print('Pool closed and joined for label file validation') assert len(label_results) == len(label_files), \ 'Mismatch: {} results for {} files'.format( len(label_results),len(label_files)) validation_results = {} validation_results['image_files_without_label_files'] = image_files_without_label_files validation_results['label_files_without_images'] = label_files_without_images validation_results['label_results'] = label_results return validation_results
# ...validate_yolo_dataset(...) #%% Main conversion function
[docs] def yolo_to_coco(input_folder, class_name_file, output_file=None, empty_image_handling='no_annotations', empty_image_category_name='empty', error_image_handling='no_annotations', allow_images_without_label_files=True, n_workers=1, pool_type='thread', recursive=True, exclude_string=None, include_string=None, overwrite_handling='overwrite', label_folder=None, supercategory=None, force_integer_ids=False, include_area=False, include_crowd=False, invalid_annotation_handling='error', precision=3): """ Converts a YOLO-formatted dataset to a COCO-formatted dataset. All images will be assigned an "error" value, usually None. Args: input_folder (str): the YOLO dataset folder to convert. If the image and label folders are different, this is the image folder, and [label_folder] is the label folder. class_name_file (str or list): a list of classes, a flat text file, or a yolo dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to input_folder as the base folder, though this is not explicitly checked. output_file (str, optional): .json file to which we should write COCO .json data empty_image_handling (str, optional): how to handle images with no boxes; whether this includes images with no .txt files depends on the value of [allow_images_without_label_files]. Can be: - 'no_annotations': include the image in the image list, with no annotations - 'empty_annotations': include the image in the image list, and add an annotation without any bounding boxes, using a category called [empty_image_category_name]. - 'skip': don't include the image in the image list - 'error': there shouldn't be any empty images empty_image_category_name (str, optional): if we're going to be inserting annotations for images with no boxes, what category name should we use? error_image_handling (str, optional): how to handle images that don't load properly; can be: - 'skip': don't include the image at all - 'no_annotations': include with no annotations allow_images_without_label_files (bool, optional): whether to silently allow images with no label files (True) or raise errors for images with no label files (False) n_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelization pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization; not used if [n_workers] <= 1 recursive (bool, optional): whether to recurse into [input_folder] exclude_string (str, optional): exclude any images whose filename contains a string include_string (str, optional): include only images whose filename contains a string overwrite_handling (bool, optional): behavior if output_file exists ('load', 'overwrite', or 'error') label_folder (str, optional): label folder, if different from the image folder supercategory (str, optional): populate the 'supercategory' field, currently only supports None (don't populate) or a single supercategory for the whole dataset. This is mostly only here because RF-DETR requires something to be populated in this field. force_integer_ids (bool, optional): force image and annotation IDs to be integers include_area (bool, optional): add the "area" field for boxes include_crowd (bool, optional): include the "iscrowd" field (always 0) for annotations invalid_annotation_handling (str, optional): how to handle invalid annotations, e.g. negative-height bounding boxes. Can be 'error', 'warn', or 'exclude'. 'exclude' implies 'warn'. precision (int, optional): round box coordinates to this many decimal places, or None to bypass rounding. Returns: dict: COCO-formatted data, the same as what's written to [output_file] """ ## Validate input input_folder = input_folder.replace('\\','/') assert os.path.isdir(input_folder), \ 'Input folder {} does not exist or is not a folder'.format(input_folder) if isinstance(class_name_file,str): assert os.path.isfile(class_name_file), \ 'Class name file {} does not exist or is not a file'.format(class_name_file) assert empty_image_handling in \ ('no_annotations','empty_annotations','skip','error'), \ 'Unrecognized empty image handling spec: {}'.format(empty_image_handling) assert invalid_annotation_handling in ('error','warn','exclude') if (output_file is not None) and os.path.isfile(output_file): if overwrite_handling == 'overwrite': print('Warning: output file {} exists, over-writing'.format(output_file)) elif overwrite_handling == 'load': print('Output file {} exists, loading and returning'.format(output_file)) with open(output_file,'r') as f: d = json.load(f) return d elif overwrite_handling == 'error': raise ValueError('Output file {} exists'.format(output_file)) else: raise ValueError('Unrecognized overwrite_handling value: {}'.format(overwrite_handling)) ## Read class names category_id_to_name = load_yolo_class_list(class_name_file) # Find or create the empty image category, if necessary empty_category_id = None if empty_image_handling == 'empty_annotations': category_name_to_id = invert_dictionary(category_id_to_name) if empty_image_category_name in category_name_to_id: empty_category_id = category_name_to_id[empty_image_category_name] print('Using existing empty image category with name {}, ID {}'.format( empty_image_category_name,empty_category_id)) else: empty_category_id = len(category_id_to_name) print('Adding an empty category with name {}, ID {}'.format( empty_image_category_name,empty_category_id)) category_id_to_name[empty_category_id] = empty_image_category_name ## Enumerate images print('Enumerating images...') image_files_abs = find_images(input_folder,recursive=recursive,convert_slashes=True) n_files_original = len(image_files_abs) # Optionally include/exclude images matching specific strings if exclude_string is not None: image_files_abs = [fn for fn in image_files_abs if exclude_string not in fn] if include_string is not None: image_files_abs = [fn for fn in image_files_abs if include_string in fn] if len(image_files_abs) != n_files_original or exclude_string is not None or include_string is not None: n_excluded = n_files_original - len(image_files_abs) print('Excluded {} of {} images based on filenames'.format(n_excluded,n_files_original)) categories = [] for category_id in category_id_to_name: categories.append({'id':category_id,'name':category_id_to_name[category_id]}) if supercategory is not None: for cat in categories: cat['supercategory'] = supercategory info = {} info['version'] = '1.0' info['description'] = 'Converted from YOLO format' image_ids = set() ## If we're expected to have labels for every image, check before we process all the images if not allow_images_without_label_files: print('Verifying that label files exist') # image_file_abs = image_files_abs[0] for image_file_abs in tqdm(image_files_abs): if label_folder is not None: assert input_folder in image_file_abs, \ 'File {} is not in folder {}'.format(image_file_abs,input_folder) label_file_abs_base = image_file_abs.replace(input_folder,label_folder) else: label_file_abs_base = image_file_abs label_file_abs = os.path.splitext(label_file_abs_base)[0] + '.txt' assert os.path.isfile(label_file_abs), \ 'No annotation file for {}'.format(image_file_abs) ## Initial loop to make sure image IDs will be unique print('Validating image IDs...') for fn_abs in tqdm(image_files_abs): fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/') image_id = _filename_to_image_id(fn_relative) assert image_id not in image_ids, \ 'Oops, you have hit a very esoteric case where you have the same filename ' + \ 'with both spaces and underscores, this is not currently handled.' image_ids.add(image_id) ## Main loop to process labels print('Processing labels...') if n_workers <= 1: image_results = [] # fn_abs = image_files_abs[0] for fn_abs in tqdm(image_files_abs): image_results.append(_process_image(fn_abs, input_folder, category_id_to_name, label_folder)) else: assert pool_type in ('process','thread'), \ 'Illegal pool type {}'.format(pool_type) pool = None try: if pool_type == 'thread': pool = ThreadPool(n_workers) else: pool = Pool(n_workers) print('Starting a {} pool of {} workers'.format(pool_type,n_workers)) p = partial(_process_image, input_folder=input_folder, category_id_to_name=category_id_to_name, label_folder=label_folder) image_results = list(tqdm(pool.imap(p, image_files_abs), total=len(image_files_abs))) finally: if pool is not None: pool.close() pool.join() print('Pool closed and joined for YOLO to COCO conversion') assert len(image_results) == len(image_files_abs), \ 'Result count mismatch: {} results for {} image files'.format( len(image_results),len(image_files_abs)) ## Re-assembly of results into a COCO dict print('Assembling labels...') images = [] annotations = [] input_id_to_output_id = None if force_integer_ids: input_id_to_output_id = {} for image_result in tqdm(image_results): im = image_result[0] annotations_this_image = image_result[1] # This will be set to True if (a) the image has invalid annotations and # (b) we are excluding invalid annotations, but not erroring skip_image = False # Validate annotations for ann in annotations_this_image: if 'bbox' not in ann: continue # coco_bbox = [absolute_x_min, absolute_y_min, absolute_width, absolute_height] box_is_valid = True if len(ann['bbox']) != 4: box_is_valid = False elif ann['bbox'][2] < 0: box_is_valid = False elif ann['bbox'][3] < 0: box_is_valid = False if not box_is_valid: s = 'Illegal bounding box {} for image {}'.format( str(ann['bbox']),im['file_name']) if invalid_annotation_handling == 'error': raise ValueError(s) if invalid_annotation_handling in ('warn','exclude'): print('Warning: {}'.format(s)) if invalid_annotation_handling == 'exclude': skip_image = True break if precision is not None: ann['bbox'] = round_float_array(ann['bbox'],precision=precision) if include_area: ann['area'] = ann['bbox'][2] * ann['bbox'][3] if precision is not None: ann['area'] = round_float(ann['area'],precision=precision) # ...for each annotation if skip_image: continue # If we need to constrain image IDs to be integers if force_integer_ids: input_id = im['id'] output_id = len(input_id_to_output_id) input_id_to_output_id[input_id] = output_id im['id'] = output_id for ann in annotations_this_image: ann['image_id'] = im['id'] # If we have annotations for this image if len(annotations_this_image) > 0: assert im['error'] is None, \ "We shouldn't have errors for images that have annotations" images.append(im) for ann in annotations_this_image: if include_crowd: ann['iscrowd'] = 0 annotations.append(ann) # If this image failed to read elif im['error'] is not None: if error_image_handling == 'skip': pass elif error_image_handling == 'no_annotations': images.append(im) # If this image read successfully, but there are no annotations else: if empty_image_handling == 'skip': pass elif empty_image_handling == 'no_annotations': images.append(im) elif empty_image_handling == 'empty_annotations': assert empty_category_id is not None, \ 'An empty category ID must be supplied if we are including empty annotations' ann = {} if include_crowd: ann['iscrowd'] = 0 ann['id'] = im['id'] + '_0' ann['image_id'] = im['id'] ann['category_id'] = empty_category_id ann['sequence_level_annotation'] = False # This would also be a reasonable thing to do, but it's not the convention # we're adopting, i.e. we are not including fake boxes for annotations # on empty images. # ann['bbox'] = [0,0,0,0] annotations.append(ann) images.append(im) # ...if we do/don't have annotations for this image # ...for each image result # Create integer IDs for annotations if necessary # # Annotation IDs don't really mean anything, so just assign incrementing # integers. if force_integer_ids: for i_ann,ann in enumerate(annotations): ann['id'] = i_ann # Clean up unnecessary error fields for im in images: if 'error' in im and im['error'] is None: del im['error'] print('Read {} annotations for {} images'.format(len(annotations), len(images))) d = {} d['images'] = images d['annotations'] = annotations d['categories'] = categories d['info'] = info if output_file is not None: print('Writing to {}'.format(output_file)) write_json(output_file,d) return d
# ...def yolo_to_coco() #%% Interactive driver if False: pass #%% Convert YOLO folders to COCO preview_folder = '/home/user/data/noaa-fish/val-coco-conversion-preview' input_folder = '/home/user/data/noaa-fish/val' output_file = '/home/user/data/noaa-fish/val.json' class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt' d = yolo_to_coco(input_folder,class_name_file,output_file) input_folder = '/home/user/data/noaa-fish/train' output_file = '/home/user/data/noaa-fish/train.json' class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt' d = yolo_to_coco(input_folder,class_name_file,output_file) #%% Check DB integrity from megadetector.data_management.databases import integrity_check_json_db options = integrity_check_json_db.IntegrityCheckOptions() options.baseDir = input_folder options.bCheckImageSizes = False options.bCheckImageExistence = True options.bFindUnusedImages = True _, _, _ = integrity_check_json_db.integrity_check_json_db(output_file, options) #%% Preview some images from megadetector.visualization import visualize_db viz_options = visualize_db.DbVizOptions() viz_options.num_to_visualize = None viz_options.trim_to_images_with_bboxes = False viz_options.add_search_links = False viz_options.sort_by_filename = False viz_options.parallelize_rendering = True viz_options.include_filename_links = True html_output_file, _ = visualize_db.visualize_db(db_path=output_file, output_dir=preview_folder, image_base_dir=input_folder, options=viz_options) from megadetector.utils.path_utils import open_file open_file(html_output_file) #%% Command-line driver def main(): """ Command-line driver for YOLO to COCO conversion. """ parser = argparse.ArgumentParser( description='Convert a YOLO-formatted dataset to COCO format' ) parser.add_argument( 'input_folder', type=str, help='Path to the YOLO dataset folder (image folder)' ) parser.add_argument( 'class_name_file', type=str, help='Path to the file containing class names (e.g., classes.txt or dataset.yaml)' ) parser.add_argument( 'output_file', type=str, help='Path to the output COCO .json file.' ) parser.add_argument( '--label_folder', type=str, default=None, help='Label folder, if different from the image folder. Default: None (labels are in the image folder)' ) parser.add_argument( '--empty_image_handling', type=str, default='no_annotations', choices=['no_annotations', 'empty_annotations', 'skip', 'error'], help='How to handle images with no bounding boxes.' ) parser.add_argument( '--empty_image_category_name', type=str, default='empty', help='Category name for empty images if empty_image_handling is "empty_annotations"' ) parser.add_argument( '--error_image_handling', type=str, default='no_annotations', choices=['skip', 'no_annotations'], help='How to handle images that fail to load' ) parser.add_argument( '--allow_images_without_label_files', type=str, default='true', choices=['true', 'false'], help='Whether to allow images that do not have corresponding label files (true/false)' ) parser.add_argument( '--n_workers', type=int, default=1, help='Number of workers for parallel processing. <=1 for sequential' ) parser.add_argument( '--pool_type', type=str, default='thread', choices=['thread', 'process'], help='Type of multiprocessing pool if n_workers > 1' ) parser.add_argument( '--recursive', type=str, default='true', choices=['true', 'false'], help='Whether to search for images recursively in the input folder (true/false)' ) parser.add_argument( '--exclude_string', type=str, default=None, help='Exclude images whose filename contains this string' ) parser.add_argument( '--include_string', type=str, default=None, help='Include images only if filename contains this string' ) parser.add_argument( '--overwrite_handling', type=str, default='overwrite', choices=['load', 'overwrite', 'error'], help='Behavior if output_file exists.' ) if len(sys.argv[1:]) == 0: parser.print_help() parser.exit() args = parser.parse_args() parsed_allow_images = args.allow_images_without_label_files.lower() == 'true' parsed_recursive = args.recursive.lower() == 'true' yolo_to_coco( args.input_folder, args.class_name_file, output_file=args.output_file, label_folder=args.label_folder, empty_image_handling=args.empty_image_handling, empty_image_category_name=args.empty_image_category_name, error_image_handling=args.error_image_handling, allow_images_without_label_files=parsed_allow_images, n_workers=args.n_workers, pool_type=args.pool_type, recursive=parsed_recursive, exclude_string=args.exclude_string, include_string=args.include_string, overwrite_handling=args.overwrite_handling ) print(f"Dataset conversion complete, output written to {args.output_file}") if __name__ == '__main__': main()