Source code for megadetector.detection.run_inference_with_yolov5_val

"""

run_inference_with_yolov5_val.py

Runs a folder of images through MegaDetector (or another YOLOv5/YOLOv8 model) with YOLO's
val.py, converting the output to the standard MD format.  The reasons this script exists,
as an alternative to the standard run_detector_batch.py are:

* This script provides access to YOLO's test-time augmentation tools.
* This script serves a reference implementation: by any reasonable definition, YOLOv5's
  val.py produces the "correct" result for any image, since it matches what was used in
  training.
* This script works for any Ultralytics detection model, including YOLOv8 models

YOLOv5's val.py uses each file's base name as a unique identifier, which doesn't work
when you have typical camera trap images like:

* a/b/c/RECONYX0001.JPG
* d/e/f/RECONYX0001.JPG

...both of which would just be "RECONYX0001.JPG".  So this script jumps through a bunch of
hoops to put a symlinks in a flat folder, run YOLOv5 on that folder, and map the results back
to the real files.

If you are running a YOLOv5 model, this script currently requires the caller to supply the path
where a working YOLOv5 install lives, and assumes that the current conda environment is all set up for
YOLOv5.  If you are running a YOLOv8 model, the folder doesn't matter, but it assumes that ultralytics
tools are available in the current environment.

By default, this script uses symlinks to format the input images in a way that YOLO's
val.py likes, as per above.  This requires admin privileges on Windows... actually technically this
only requires permissions to create symbolic links, but I've never seen a case where someone has
that permission and *doesn't* have admin privileges.  If you are running this script on
Windows and you don't have admin privileges, use --no_use_symlinks, which will make copies of images,
rather than using symlinks.

"""

#%% Imports

import os
import sys
import uuid
import glob
import tempfile
import shutil
import json
import copy
import argparse

from tqdm import tqdm

from megadetector.utils import path_utils
from megadetector.utils import process_utils
from megadetector.utils import string_utils
from megadetector.utils.ct_utils import args_to_object

from megadetector.utils.ct_utils import is_iterable, split_list_into_fixed_size_chunks
from megadetector.utils import ct_utils
from megadetector.utils.path_utils import path_is_abs
from megadetector.data_management import yolo_output_to_md_output
from megadetector.detection.run_detector import try_download_known_detector
from megadetector.postprocessing.combine_batch_outputs import combine_batch_output_files

default_image_size_with_augmentation = int(1280 * 1.3)
default_image_size_with_no_augmentation = 1280


#%% Options class


[docs]
class YoloInferenceOptions:
    """
    Parameters that control the behavior of run_inference_with_yolov5_val(), including
    the input/output filenames.
    """

    def __init__(self):

        ## Required-ish ##

        #: Folder of images to process (can be None if image_filename_list contains absolute paths)
        self.input_folder = None

        #: If this is None, [input_folder] can't be None, we'll process all images in [input_folder].
        #:
        #: If this is not None, and [input_folder] is not None, this should be a list of relative image
        #: paths within [input_folder] to process, or a .txt or .json file containing a list of
        #: relative image paths.
        #:
        #: If this is not None, and [input_folder] is None, this should be a list of absolute image
        #: paths, or a .txt or .json file containing a list of absolute image paths.
        self.image_filename_list = None

        #: Model filename (ending in .pt), or a well-known model name (e.g. "MDV5A")
        self.model_filename = None

        #: .json output file, in MD results format
        self.output_file = None


        ## Optional ##

        #: Required for older YOLOv5 inference, not for newer ulytralytics/YOLOv8 inference
        self.yolo_working_folder = None

        #: Currently 'yolov5' and 'ultralytics' are supported, and really these are proxies for
        #: "the yolov5 repo" and "the ultralytics repo".
        self.model_type = 'yolov5'

        #: Image size to use; this is a single int, which in ultralytics's terminology means
        #: "scale the long side of the image to this size, and preserve aspect ratio".
        #:
        #: If None, will choose based on whether augmentation is enabled.
        self.image_size = None

        #: Detections below this threshold will not be included in the output file
        self.conf_thres = '0.001'

        #: Batch size... has no impact on results, but may create memory issues if you set
        #: this to large values
        self.batch_size = 1

        #: Device string: typically '0' for GPU 0, '1' for GPU 1, etc., or 'cpu'
        self.device_string = '0'

        #: Should we enable test-time augmentation?
        self.augment = False

        #: Should we enable half-precision inference?
        self.half_precision_enabled = None

        #: Where should we stash the temporary symlinks (or copies) used to give unique identifiers to image
        # files?
        #:
        #: If this is None, we'll create a folder in system temp space.
        self.symlink_folder = None

        #: Should we use symlinks to give unique identifiers to image files (vs. copies)?
        self.use_symlinks = True

        #: How should we guarantee that YOLO IDs (base filenames) are unique?  Choices are:
        #:
        #: * 'verify': assume image IDs are unique, but verify and error if they're not
        #: * 'links': create symlinks (or copies, depending on use_symlinks) to enforce uniqueness
        #: * 'auto': check whether IDs are unique, create links if necessary
        self.unique_id_strategy = 'links'

        #: Temporary folder to stash intermediate YOLO results.
        #:
        #: If this is None, we'll create a folder in system temp space.
        self.yolo_results_folder = None

        #: Should we remove the symlink folder when we're done?
        self.remove_symlink_folder = True

        #: Should we remove the intermediate results folder when we're done?
        self.remove_yolo_results_folder = True

        #: These are deliberately offset from the standard MD categories; YOLOv5
        #: needs categories IDs to start at 0.
        #:
        #: This can also be a string that points to any class mapping file supported
        #: by read_classes_from_yolo_dataset_file(): a YOLO dataset.yaml file, a text
        #: file with a list of classes, or a .json file with an ID --> name dict
        self.yolo_category_id_to_name = {0:'animal',1:'person',2:'vehicle'}

        #: What should we do if the output file already exists?
        #:
        #: Can be 'error', 'skip', or 'overwrite'.
        self.overwrite_handling = 'skip'

        #: If True, we'll do a dry run that lets you preview the YOLO val command, without
        #: actually running it.
        self.preview_yolo_command_only = False

        #: By default, if any errors occur while we're copying images or creating symlinks, it's
        #: game over.  If this is True, those errors become warnings, and we plow ahead.
        self.treat_copy_failures_as_warnings = False

        #: Save YOLO console output
        self.save_yolo_debug_output = False

        #: Whether to search for images recursively within [input_folder]
        #:
        #: Ignored if a list of files is provided.
        self.recursive = True

        #: Maximum number of images to run in a single chunk
        self.checkpoint_frequency = None

        #: By default, if we're creating symlinks to images, we append a unique job ID to the
        #: symlink folder.  If the caller is 100% sure that the symlink folder can be re-used
        #: across calls, this can be set to False.
        self.append_job_id_to_symlink_folder = True

        #: By default, we turn category ID 0 coming out of the YOLO .json file
        #: into category 1 in the MD-formatted .json file.
        self.offset_yolo_category_ids = True


    # ...def __init__()

# ...YoloInferenceOptions()


#%% Support functions

def _clean_up_temporary_folders(options,
                                symlink_folder,yolo_results_folder,
                                symlink_folder_is_temp_folder,yolo_folder_is_temp_folder):
    """
    Remove temporary symlink/results folders, unless the caller requested that we leave them in place.
    """

    if options.remove_symlink_folder:
        shutil.rmtree(symlink_folder)
    elif symlink_folder_is_temp_folder:
        print('Warning: using temporary symlink folder {}, but not removing it'.format(
            symlink_folder))

    if options.remove_yolo_results_folder:
        shutil.rmtree(yolo_results_folder)
    elif yolo_folder_is_temp_folder:
        print('Warning: using temporary YOLO results folder {}, but not removing it'.format(
            yolo_results_folder))



[docs]
def get_stats_for_category(filename,category='all'):
    """
    Retrieve statistics for a category from the YOLO console output
    stored in [filenam].

    Args:
        filename (str): a text file containing console output from a YOLO val run
        category (str, optional): a category name

    Returns:
        dict: a dict with fields n_images, n_labels, P, R, mAP50, and mAP50-95
    """

    with open(filename,'r',encoding='utf-8') as f:
        lines = f.readlines()

    # This is just a hedge to make sure there isn't some YOLO version floating
    # around that used different IoU thresholds in the console output.
    found_map50 = False
    found_map5095 = False

    for line in lines:

        s = line.strip()

        if ' map50 ' in s.lower() or ' map@.5 ' in s.lower():
            found_map50 = True
        if 'map50-95' in s.lower() or 'map@.5:.95' in s.lower():
            found_map5095 = True

        if not s.startswith(category):
            continue

        tokens = s.split(' ')
        tokens_filtered = list(filter(None,tokens))

        if len(tokens_filtered) != 7:
            continue

        assert found_map50 and found_map5095, \
            'Parsing error in YOLO console output file {}'.format(filename)

        to_return = {}
        to_return['category'] = category
        assert category == tokens_filtered[0]
        to_return['n_images'] = int(tokens_filtered[1])
        to_return['n_labels'] = int(tokens_filtered[2])
        to_return['P'] = float(tokens_filtered[3])
        to_return['R'] = float(tokens_filtered[4])
        to_return['mAP50'] = float(tokens_filtered[5])
        to_return['mAP50-95'] = float(tokens_filtered[6])
        return to_return

    # ...for each line

    return None



#%% Main function


[docs]
def run_inference_with_yolo_val(options):
    """
    Runs a folder of images through MegaDetector (or another YOLOv5/YOLOv8 model) with YOLO's
    val.py, converting the output to the standard MD format.

    Args:
        options (YoloInferenceOptions): all the parameters used to control this process,
            including filenames; see YoloInferenceOptions for details
    """

    ##%% Input and path handling

    default_options = YoloInferenceOptions()

    for k in options.__dict__.keys():
        if k not in default_options.__dict__:
            # Print warnings about unexpected variables, except for things like
            # "no_append_job_id_to_symlink_folder", which just negate existing objects
            if not k.startswith('no_'):
                print('Warning: unexpected variable {} in options object'.format(k))

    if options.model_type == 'yolov8':

        print('Warning: model type "yolov8" supplied, "ultralytics" is the preferred model ' + \
              'type string for YOLOv8 models')
        options.model_type = 'ultralytics'

    if (options.model_type == 'yolov5') and ('yolov8' in options.model_filename.lower()):
        print('\n\n*** Warning: model type set as "yolov5", but your model filename contains "yolov8"... ' + \
              'did you mean to use --model_type yolov8?" ***\n\n')

    if options.yolo_working_folder is None:
        assert options.model_type == 'ultralytics', \
            'A working folder is required to run YOLOv5 val.py'
    else:
        assert os.path.isdir(options.yolo_working_folder), \
            'Could not find working folder {}'.format(options.yolo_working_folder)

    if options.half_precision_enabled is not None:
        assert options.half_precision_enabled in (0,1), \
            'Invalid value {} for --half_precision_enabled (should be 0 or 1)'.format(
                options.half_precision_enabled)

    # If the model filename is a known model string (e.g. "MDv5A", download the model if necessary)
    model_filename = try_download_known_detector(options.model_filename)

    assert os.path.isfile(model_filename), \
        'Could not find model file {}'.format(model_filename)

    assert (options.input_folder is not None) or (options.image_filename_list is not None), \
        'You must specify a folder and/or a file list'

    if options.input_folder is not None:
        assert os.path.isdir(options.input_folder), 'Could not find input folder {}'.format(
            options.input_folder)

    if os.path.exists(options.output_file):
        if options.overwrite_handling == 'skip':
            print('Warning: output file {} exists, skipping'.format(options.output_file))
            return
        elif options.overwrite_handling == 'overwrite':
            print('Warning: output file {} exists, overwriting'.format(options.output_file))
        elif options.overwrite_handling == 'error':
            raise ValueError('Output file {} exists'.format(options.output_file))
        else:
            raise ValueError('Unknown output handling method {}'.format(options.overwrite_handling))

    output_dir = os.path.dirname(options.output_file)
    if len(output_dir) > 0:
        os.makedirs(output_dir, exist_ok=True)

    if options.input_folder is not None:
        options.input_folder = options.input_folder.replace('\\','/')


    ##%% Other input handling

    if isinstance(options.yolo_category_id_to_name,str):

        assert os.path.isfile(options.yolo_category_id_to_name)
        yolo_dataset_file = options.yolo_category_id_to_name
        options.yolo_category_id_to_name = \
            yolo_output_to_md_output.read_classes_from_yolo_dataset_file(yolo_dataset_file)
        print('Loaded {} category mappings from {}'.format(
            len(options.yolo_category_id_to_name),yolo_dataset_file))

    temporary_folder = None
    symlink_folder_is_temp_folder = False
    yolo_folder_is_temp_folder = False

    job_id = str(uuid.uuid1())

    def get_job_temporary_folder(tf):
        if tf is not None:
            return tf
        tempdir_base = tempfile.gettempdir()
        tf = os.path.join(tempdir_base,'md_to_yolo','md_to_yolo_' + job_id)
        os.makedirs(tf,exist_ok=True)
        return tf

    symlink_folder = options.symlink_folder
    yolo_results_folder = options.yolo_results_folder

    if symlink_folder is None:
        temporary_folder = get_job_temporary_folder(temporary_folder)
        symlink_folder = os.path.join(temporary_folder,'symlinks')
        symlink_folder_is_temp_folder = True

    if yolo_results_folder is None:
        temporary_folder = get_job_temporary_folder(temporary_folder)
        yolo_results_folder = os.path.join(temporary_folder,'yolo_results')
        yolo_folder_is_temp_folder = True

    if options.append_job_id_to_symlink_folder:
        # Attach a GUID to the symlink folder, regardless of whether we created it
        symlink_folder_inner = os.path.join(symlink_folder,job_id)
    else:
        print('Re-using existing symlink folder {}'.format(symlink_folder))
        symlink_folder_inner = symlink_folder

    os.makedirs(symlink_folder_inner,exist_ok=True)
    os.makedirs(yolo_results_folder,exist_ok=True)


    ##%% Enumerate images

    image_files_relative = None
    image_files_absolute = None

    # If the caller just provided a folder, not a list of files...
    if options.image_filename_list is None:

        assert options.input_folder is not None and os.path.isdir(options.input_folder), \
            'Could not find input folder {}'.format(options.input_folder)
        image_files_relative = path_utils.find_images(options.input_folder,
                                                      recursive=options.recursive,
                                                      return_relative_paths=True,
                                                      convert_slashes=True)
        image_files_absolute = [os.path.join(options.input_folder,fn) for \
                                fn in image_files_relative]

    else:

        # If the caller provided a list of image files (rather than a filename pointing
        # to a list of image files)...
        if is_iterable(options.image_filename_list) and not isinstance(options.image_filename_list,str):

            image_files_relative = options.image_filename_list

        # If the caller provided a filename pointing to a list of image files...
        else:

            assert isinstance(options.image_filename_list,str), \
                'Unrecognized image filename list object type: {}'.format(options.image_filename_list)
            assert os.path.isfile(options.image_filename_list), \
                'Could not find image filename list file: {}'.format(options.image_filename_list)
            ext = os.path.splitext(options.image_filename_list)[-1].lower()
            assert ext in ('.json','.txt'), \
                'Unrecognized image filename list file extension: {}'.format(options.image_filename_list)
            if ext == '.json':
                with open(options.image_filename_list,'r') as f:
                    image_files_relative = json.load(f)
                    assert is_iterable(image_files_relative)
            else:
                assert ext == '.txt'
                with open(options.image_filename_list,'r') as f:
                    image_files_relative = f.readlines()
                    image_files_relative = [s.strip() for s in image_files_relative]

        # ...whether the image filename list was supplied as list vs. a filename

        if options.input_folder is None:

            image_files_absolute = image_files_relative

        else:

            # The list should be relative filenames
            for fn in image_files_relative:
                assert not path_is_abs(fn), \
                    'When providing a folder and a list, paths in the list should be relative'

            image_files_absolute = \
                [os.path.join(options.input_folder,fn) for fn in image_files_relative]

        for fn in image_files_absolute:
            assert os.path.isfile(fn), 'Could not find image file {}'.format(fn)

    # ...whether the caller supplied a list of filenames

    image_files_absolute = [fn.replace('\\','/') for fn in image_files_absolute]

    del image_files_relative


    ##%% Recurse if necessary to handle checkpoints

    if options.checkpoint_frequency is not None and options.checkpoint_frequency > 0:

        chunks = split_list_into_fixed_size_chunks(image_files_absolute,options.checkpoint_frequency)

        chunk_output_files = []

        # i_chunk = 0; chunk_files_abs = chunks[i_chunk]
        for i_chunk,chunk_files_abs in enumerate(chunks):

            print('Processing {} images from chunk {} of {}'.format(
                len(chunk_files_abs),i_chunk,len(chunks)))

            chunk_options = copy.deepcopy(options)

            # Run each chunk without checkpointing
            chunk_options.checkpoint_frequency = None

            if options.input_folder is not None:
                chunk_files_relative = \
                    [os.path.relpath(fn,options.input_folder) for fn in chunk_files_abs]
                chunk_options.image_filename_list = chunk_files_relative
            else:
                chunk_options.image_filename_list = chunk_files_abs

            chunk_options.image_filename_list = \
                [fn.replace('\\','/') for fn in chunk_options.image_filename_list]

            chunk_string = 'chunk_{}'.format(str(i_chunk).zfill(5))
            chunk_options.yolo_results_folder = yolo_results_folder + '_' + chunk_string
            chunk_options.symlink_folder = symlink_folder + '_' + chunk_string

            # Put the output file in the parent job's scratch folder
            chunk_output_file = os.path.join(yolo_results_folder,chunk_string + '_results_md_format.json')
            chunk_output_files.append(chunk_output_file)
            chunk_options.output_file = chunk_output_file

            if os.path.isfile(chunk_output_file):

                print('Chunk output file {} exists, checking completeness'.format(chunk_output_file))

                with open(chunk_output_file,'r') as f:
                    chunk_results = json.load(f)
                images_in_this_chunk_results_file = [im['file'] for im in chunk_results['images']]
                assert len(images_in_this_chunk_results_file) == len(chunk_options.image_filename_list), \
                    f'Expected {len(chunk_options.image_filename_list)} images in ' + \
                    f'chunk results file {chunk_output_file}, found {len(images_in_this_chunk_results_file)}, ' + \
                     'possibly this is left over from a previous job?'
                for fn in images_in_this_chunk_results_file:
                    assert fn in chunk_options.image_filename_list, \
                        f'Unexpected image {fn} in chunk results file {chunk_output_file}, ' + \
                         'possibly this is left over from a previous job?'

                print('Chunk output file {} exists and is complete, skipping this chunk'.format(
                    chunk_output_file))

            # ...if the outptut file exists

            else:

                run_inference_with_yolo_val(chunk_options)

            # ...if we do/don't have to run this chunk

            assert os.path.isfile(chunk_options.output_file)

        # ...for each chunk

        # Merge
        _ = combine_batch_output_files(input_files=chunk_output_files,
                                 output_file=options.output_file,
                                 require_uniqueness=True,
                                 verbose=True)

        # Validate
        with open(options.output_file,'r') as f:
            combined_results = json.load(f)
        assert len(combined_results['images']) == len(image_files_absolute), \
            'Expected {} images in merged output file, found {}'.format(
                len(image_files_absolute),len(combined_results['images']))

        # Clean up
        _clean_up_temporary_folders(options,
                                    symlink_folder,yolo_results_folder,
                                    symlink_folder_is_temp_folder,yolo_folder_is_temp_folder)

        return

    # ...if we need to make recursive calls for file chunks


    ##%% Create symlinks (or copy images) to give a unique ID to each image

    # Maps YOLO image IDs (base filename without extension as it will appear in YOLO .json output)
    # to the *original full path* for each image (not the symlink path).
    image_id_to_file = {}

    # Maps YOLO image IDs (base filename without extension as it will appear in YOLO .json output)
    # to errors, including errors that happen before we run the model at all (e.g. file access errors).
    image_id_to_error = {}

    create_links = True

    if options.unique_id_strategy == 'links':

        create_links = True

    else:

        assert options.unique_id_strategy in ('auto','verify'), \
            'Unknown unique ID strategy {}'.format(options.unique_id_strategy)

        image_ids_are_unique = True

        for i_image,image_fn in tqdm(enumerate(image_files_absolute),total=len(image_files_absolute)):

            image_id = os.path.splitext(os.path.basename(image_fn))[0]

            # Is this image ID unique?
            if image_id in image_id_to_file:
                if options.unique_id_strategy == 'verify':
                    raise ValueError('"verify" specified for image uniqueness, but ' +
                                     'image ID {} occurs more than once:\n\n{}\n\n{}'.format(
                                         image_id,image_fn,image_id_to_file[image_id]))
                else:
                    assert options.unique_id_strategy == 'auto'
                    image_ids_are_unique = False
                    image_id_to_file = {}
                    break

            image_id_to_file[image_id] = image_fn

        # ...for each image

        if image_ids_are_unique:

            print('"{}" specified for image uniqueness and images are unique, skipping links'.format(
                options.unique_id_strategy))
            assert len(image_id_to_file) == len(image_files_absolute)
            create_links = False

        else:

            assert options.unique_id_strategy == 'auto'
            create_links = True
            link_type = 'copies'
            if options.use_symlinks:
                link_type = 'links'
            print('"auto" specified for image uniqueness and images are not unique, defaulting to {}'.format(
                link_type))

    # ...which unique ID strategy?

    if create_links:

        if options.use_symlinks:
            print('Creating {} symlinks in {}'.format(len(image_files_absolute),symlink_folder_inner))
        else:
            print('Symlinks disabled, copying {} images to {}'.format(len(image_files_absolute),symlink_folder_inner))

        link_full_paths = []

        # i_image = 0; image_fn = image_files_absolute[i_image]
        for i_image,image_fn in tqdm(enumerate(image_files_absolute),total=len(image_files_absolute)):

            ext = os.path.splitext(image_fn)[1]
            image_fn_without_extension = os.path.splitext(image_fn)[0]

            # YOLO .json output identifies images by the base filename without the extension
            image_id = str(i_image).zfill(10)
            image_id_to_file[image_id] = image_fn
            symlink_name = image_id + ext
            symlink_full_path = os.path.join(symlink_folder_inner,symlink_name)
            link_full_paths.append(symlink_full_path)

            # If annotation files exist, link those too; only useful if we're reading the computed
            # mAP value, but it doesn't hurt.
            annotation_fn = image_fn_without_extension + '.txt'
            annotation_file_exists = False
            if os.path.isfile(annotation_fn):
                annotation_file_exists = True
                annotation_symlink_name = image_id + '.txt'
                annotation_symlink_full_path = os.path.join(symlink_folder_inner,annotation_symlink_name)

            try:

                if options.use_symlinks:
                    path_utils.safe_create_link(image_fn,symlink_full_path)
                    if annotation_file_exists:
                        path_utils.safe_create_link(annotation_fn,annotation_symlink_full_path)
                else:
                    shutil.copyfile(image_fn,symlink_full_path)
                    if annotation_file_exists:
                        shutil.copyfile(annotation_fn,annotation_symlink_full_path)

            except Exception as e:

                error_string = str(e)
                image_id_to_error[image_id] = error_string

                # Always break if the user is trying to create symlinks on Windows without
                # permission, 100% of images will always fail in this case.
                if ('a required privilege is not held by the client' in error_string.lower()) or \
                   (not options.treat_copy_failures_as_warnings):

                       print('\nError copying/creating link for input file {}: {}'.format(
                           image_fn,error_string))

                       raise

                else:

                    print('Warning: error copying/creating link for input file {}: {}'.format(
                        image_fn,error_string))
                    continue

            # ...except

        # ...for each image

    # ...if we need to create links/copies


    ##%% Create the dataset file if necessary

    # This may have been passed in as a string, but at this point, we should have
    # loaded the dataset file.
    assert isinstance(options.yolo_category_id_to_name,dict)

    # Category IDs need to be continuous integers starting at 0
    category_ids = sorted(list(options.yolo_category_id_to_name.keys()))
    assert category_ids[0] == 0
    assert len(category_ids) == 1 + category_ids[-1]

    yolo_dataset_file = os.path.join(yolo_results_folder,'dataset.yaml')
    yolo_image_list_file = os.path.join(yolo_results_folder,'images.txt')

    with open(yolo_image_list_file,'w') as f:

        if create_links:
            image_files_to_write = link_full_paths
        else:
            image_files_to_write = image_files_absolute

        for fn_abs in image_files_to_write:
            # At least in YOLOv5 val (need to verify for YOLOv8 val), filenames in this
            # text file are treated as relative to the text file itself if they start with
            # "./", otherwise they're treated as absolute paths.  Since we don't want to put this
            # text file in the image folder, we'll use absolute paths.
            # fn_relative = os.path.relpath(fn_abs,options.input_folder)
            # f.write(fn_relative + '\n')
            f.write(fn_abs + '\n')

    if create_links:
        inference_folder = symlink_folder_inner
    else:
        # This doesn't matter, but it has to be a valid path
        inference_folder = options.yolo_results_folder

    with open(yolo_dataset_file,'w') as f:

        f.write('path: {}\n'.format(inference_folder))
        # These need to be valid paths, even if you're not using them, and "." is always safe
        f.write('train: .\n')
        f.write('val: .\n')
        f.write('test: {}\n'.format(yolo_image_list_file))
        f.write('\n')
        f.write('nc: {}\n'.format(len(options.yolo_category_id_to_name)))
        f.write('\n')
        f.write('names:\n')
        for category_id in category_ids:
            assert isinstance(category_id,int)
            f.write('  {}: {}\n'.format(category_id,
                                        options.yolo_category_id_to_name[category_id]))


    ##%% Prepare Python command or YOLO CLI command

    if options.image_size is None:
        if options.augment:
            image_size = default_image_size_with_augmentation
        else:
            image_size = default_image_size_with_no_augmentation
    else:
        image_size = options.image_size

    image_size_string = str(round(image_size))

    if options.model_type == 'yolov5':

        cmd = 'python val.py --task test --data "{}"'.format(yolo_dataset_file)
        cmd += ' --weights "{}"'.format(model_filename)
        cmd += ' --batch-size {} --imgsz {} --conf-thres {}'.format(
            options.batch_size,image_size_string,options.conf_thres)
        cmd += ' --device "{}" --save-json'.format(options.device_string)
        cmd += ' --project "{}" --name "{}" --exist-ok'.format(yolo_results_folder,'yolo_results')

        # This is the NMS IoU threshold
        # cmd += ' --iou-thres 0.6'

        if options.augment:
            cmd += ' --augment'

        # --half is a store_true argument for YOLOv5's val.py
        if (options.half_precision_enabled is not None) and (options.half_precision_enabled == 1):
            cmd += ' --half'

        # Sometimes useful for debugging
        # cmd += ' --save_conf --save_txt'

    elif options.model_type == 'ultralytics':

        if options.augment:
            augment_string = 'augment'
        else:
            augment_string = ''

        cmd = 'yolo val {} model="{}" imgsz={} batch={} data="{}" project="{}" name="{}" device="{}"'.\
            format(augment_string,model_filename,image_size_string,options.batch_size,
                   yolo_dataset_file,yolo_results_folder,'yolo_results',options.device_string)
        cmd += ' save_json exist_ok'

        if (options.half_precision_enabled is not None):
            if options.half_precision_enabled == 1:
                cmd += ' --half=True'
            else:
                assert options.half_precision_enabled == 0
                cmd += ' --half=False'

        # Sometimes useful for debugging
        # cmd += ' save_conf save_txt'

    else:

        raise ValueError('Unrecognized model type {}'.format(options.model_type))

    # print(cmd); import clipboard; clipboard.copy(cmd)


    ##%% Run YOLO command

    if options.yolo_working_folder is not None:
        current_dir = os.getcwd()
        os.chdir(options.yolo_working_folder)

    print('Running YOLO inference command:\n{}\n'.format(cmd))

    if options.preview_yolo_command_only:

        if options.remove_symlink_folder:
            try:
                print('Removing YOLO symlink folder {}'.format(symlink_folder))
                shutil.rmtree(symlink_folder)
            except Exception:
                print('Warning: error removing symlink folder {}'.format(symlink_folder))
                pass
        if options.remove_yolo_results_folder:
            try:
                print('Removing YOLO results folder {}'.format(yolo_results_folder))
                shutil.rmtree(yolo_results_folder)
            except Exception:
                print('Warning: error removing YOLO results folder {}'.format(yolo_results_folder))
                pass

        # sys.exit()
        return

    execution_result = process_utils.execute_and_print(cmd,encoding='utf-8',verbose=True)
    assert execution_result['status'] == 0, 'Error running {}'.format(options.model_type)
    yolo_console_output = execution_result['output']

    if options.save_yolo_debug_output:

        with open(os.path.join(yolo_results_folder,'yolo_console_output.txt'),'w',encoding='utf-8') as f:
            for s in yolo_console_output:
                f.write(s + '\n')
        ct_utils.write_json(os.path.join(yolo_results_folder,'image_id_to_file.json'), image_id_to_file)
        ct_utils.write_json(os.path.join(yolo_results_folder,'image_id_to_error.json'), image_id_to_error)


    # YOLO console output contains lots of ANSI escape codes, remove them for easier parsing
    yolo_console_output = [string_utils.remove_ansi_codes(s) for s in yolo_console_output]

    # Find errors that occurred during the initial corruption check; these will not be included in the
    # output.  Errors that occur during inference will be handled separately.
    yolo_read_failures = []

    for line in yolo_console_output:

        #
        # Lines indicating read failures look like:
        #
        # For ultralytics val:
        #
        # line = "val: WARNING ⚠️ /a/b/c/d.jpg: ignoring corrupt image/label: [Errno 13] Permission denied: '/a/b/c/d.jpg'"
        #
        # For yolov5 val.py:
        #
        # line = "test: WARNING: a/b/c/d.jpg: ignoring corrupt image/label: cannot identify image file '/a/b/c/d.jpg'"
        #
        # In both cases, when we are using symlinks, the first filename is the symlink name, the
        # second filename is the target, e.g.:
        #
        # line = "test: WARNING: /tmp/md_to_yolo/md_to_yolo_xyz/symlinks/xyz/0000000004.jpg: ignoring corrupt image/label: cannot identify image file '/tmp/md-tests/md-test-images/corrupt-images/real-file.jpg'"
        #
        # Windows example:
        #
        # line = "test: WARNING: g:\\temp\\md-test-images\\corrupt-images\\irfanview-can-still-read-me-caltech_camera_traps_5a0e37cc-23d2-11e8-a6a3-ec086b02610b.jpg: ignoring corrupt image/label: cannot identify image file 'g:\\\\temp\\\\md-test-images\\\\corrupt-images\\\\irfanview-can-still-read-me-caltech_camera_traps_5a0e37cc-23d2-11e8-a6a3-ec086b02610b.jpg'"
        #

        line = line.replace('⚠️',':')
        if 'ignoring corrupt image/label' in line:

            line_tokens = line.split('ignoring corrupt image/label')
            assert len(line_tokens) == 2

            tokens = line_tokens[0].split(':') # ,maxsplit=3)
            tokens = [s.strip() for s in tokens]

            # ['test', ' WARNING', ' a/b/c/d.jpg', ' ']
            assert len(tokens[-1]) == 0
            tokens = tokens[:-1]
            assert 'warning' in tokens[1].lower()

            if len(tokens) == 3:
                image_name = tokens[2].strip()
            else:
                # Windows filenames have one extra colon
                assert len(tokens) == 4
                assert len(tokens[2]) == 1
                image_name = ':'.join(tokens[2:4])

            yolo_read_failures.append(image_name)

        # ...if this line indicated a corrupt image

    # ...for each line in the console output

    # image_file = yolo_read_failures[0]
    for image_file in yolo_read_failures:
        image_id = os.path.splitext(os.path.basename(image_file))[0]
        assert image_id in image_id_to_file, 'Unexpected image ID {}'.format(image_id)
        if image_id not in image_id_to_error:
            image_id_to_error[image_id] = 'YOLO read failure'

    if options.yolo_working_folder is not None:
        os.chdir(current_dir)


    ##%% Convert results to MD format

    json_files = glob.glob(yolo_results_folder + '/yolo_results/*.json')
    assert len(json_files) == 1
    yolo_json_file = json_files[0]

    # Map YOLO image IDs to paths
    image_id_to_relative_path = {}
    for image_id in image_id_to_file:
        fn = image_id_to_file[image_id].replace('\\','/')
        assert path_is_abs(fn)
        if options.input_folder is not None:
            assert os.path.isdir(options.input_folder)
            assert options.input_folder in fn, 'Internal error: base folder {} not in file {}'.format(
                options.input_folder,fn)
            relative_path = os.path.relpath(fn,options.input_folder)
        else:
            # We'll use the absolute path as a relative path, and pass '/'
            # as the base path in this case.
            relative_path = fn
        image_id_to_relative_path[image_id] = relative_path

    # Are we working with a base folder?
    if options.input_folder is not None:
        assert os.path.isdir(options.input_folder)
        image_base = options.input_folder
    else:
        image_base = '/'

    yolo_output_to_md_output.yolo_json_output_to_md_output(
        yolo_json_file=yolo_json_file,
        image_folder=image_base,
        output_file=options.output_file,
        yolo_category_id_to_name=options.yolo_category_id_to_name,
        detector_name=os.path.basename(model_filename),
        image_id_to_relative_path=image_id_to_relative_path,
        image_id_to_error=image_id_to_error,
        offset_yolo_class_ids=options.offset_yolo_category_ids)


    ##%% Clean up

    _clean_up_temporary_folders(options,
                                symlink_folder,yolo_results_folder,
                                symlink_folder_is_temp_folder,yolo_folder_is_temp_folder)


# ...def run_inference_with_yolo_val()


#%% Command-line driver

def main(): # noqa

    options = YoloInferenceOptions()

    parser = argparse.ArgumentParser()
    parser.add_argument(
        'model_filename',type=str,
        help='model file name')
    parser.add_argument(
        'input_folder',type=str,
        help='folder on which to recursively run the model, or a .json or .txt file ' + \
             'containing a list of absolute image paths')
    parser.add_argument(
        'output_file',type=str,
        help='.json file where output will be written')

    parser.add_argument(
        '--image_filename_list',type=str,default=None,
        help='.json or .txt file containing a list of relative image filenames within [input_folder]')
    parser.add_argument(
        '--yolo_working_folder',type=str,default=None,
        help='folder in which to execute val.py (not necessary for YOLOv8 inference)')
    parser.add_argument(
        '--image_size', default=None, type=int,
        help='image size for model execution (default {} when augmentation is enabled, else {})'.format(
            default_image_size_with_augmentation,default_image_size_with_no_augmentation))
    parser.add_argument(
        '--conf_thres', default=options.conf_thres, type=float,
        help='confidence threshold for including detections in the output file (default {})'.format(
            options.conf_thres))
    parser.add_argument(
        '--batch_size', default=options.batch_size, type=int,
        help='inference batch size (default {})'.format(options.batch_size))
    parser.add_argument(
        '--half_precision_enabled', default=None, type=int,
        help='use half-precision-inference (1 or 0) (default is the underlying model\'s default, ' + \
             'probably full for YOLOv8 and half for YOLOv5')
    parser.add_argument(
        '--device_string', default=options.device_string, type=str,
        help='CUDA device specifier, typically "0" or "1" for CUDA devices, "mps" for ' + \
             'M1/M2 devices, or "cpu" (default {})'.format(
            options.device_string))
    parser.add_argument(
        '--overwrite_handling', default=options.overwrite_handling, type=str,
        help='action to take if the output file exists (skip, error, overwrite) (default {})'.format(
            options.overwrite_handling))
    parser.add_argument(
        '--yolo_dataset_file', default=None, type=str,
        help='YOLOv5 dataset.yaml file from which we should load category information ' + \
            '(otherwise defaults to MD categories)')
    parser.add_argument(
        '--model_type', default=options.model_type, type=str,
        help='model type ("yolov5" or "ultralytics" ("yolov8" behaves the same as "ultralytics")) (default {})'.format(
            options.model_type))

    parser.add_argument('--unique_id_strategy', default=options.unique_id_strategy, type=str,
        help='how should we ensure that unique filenames are passed to the YOLO val script, ' + \
             'can be "verify", "auto", or "links", see options class docs for details (default {})'.format(
                 options.unique_id_strategy))
    parser.add_argument(
        '--symlink_folder', default=None, type=str,
        help='temporary folder for symlinks (defaults to a folder in the system temp dir)')
    parser.add_argument(
        '--yolo_results_folder', default=None, type=str,
        help='temporary folder for YOLO intermediate output (defaults to a folder in the system temp dir)')
    parser.add_argument(
        '--no_use_symlinks', action='store_true',
        help='copy files instead of creating symlinks when preparing the yolo input folder')
    parser.add_argument(
        '--no_remove_symlink_folder', action='store_true',
        help='don\'t remove the temporary folder full of symlinks')
    parser.add_argument(
        '--no_remove_yolo_results_folder', action='store_true',
        help='don\'t remove the temporary folder full of YOLO intermediate files')
    parser.add_argument(
        '--save_yolo_debug_output', action='store_true',
        help='write yolo console output to a text file in the results folder, along with additional debug files')
    parser.add_argument(
        '--checkpoint_frequency', default=options.checkpoint_frequency, type=int,
        help='break the job into chunks with no more than this many images (default {})'.format(
            options.checkpoint_frequency))
    parser.add_argument(
        '--no_append_job_id_to_symlink_folder', action='store_true',
        help="don't append a unique job ID to the symlink folder name")
    parser.add_argument(
        '--nonrecursive', action='store_true',
        help='disable recursive folder processing')
    parser.add_argument(
        '--no_offset_class_ids', action='store_true',
        help='disable class ID offsetting')

    parser.add_argument(
        '--preview_yolo_command_only', action='store_true',
        help='don\'t run inference, just preview the YOLO inference command (still creates symlinks)')

    if options.augment:
        default_augment_enabled = 1
    else:
        default_augment_enabled = 0

    parser.add_argument(
        '--augment_enabled', default=default_augment_enabled, type=int,
        help='enable/disable augmentation (default {})'.format(default_augment_enabled))

    if len(sys.argv[1:]) == 0:
        parser.print_help()
        parser.exit()

    args = parser.parse_args()

    # If the caller hasn't specified an image size, choose one based on whether augmentation
    # is enabled.
    if args.image_size is None:
        assert args.augment_enabled in (0,1), \
            'Illegal augment_enabled value {}'.format(args.augment_enabled)
        if args.augment_enabled == 1:
            args.image_size = default_image_size_with_augmentation
        else:
            args.image_size = default_image_size_with_no_augmentation
        augment_enabled_string = 'enabled'
        if not args.augment_enabled:
            augment_enabled_string = 'disabled'
        print('Augmentation is {}, using default image size {}'.format(
            augment_enabled_string,args.image_size))

    args_to_object(args, options)

    if args.yolo_dataset_file is not None:
        options.yolo_category_id_to_name = args.yolo_dataset_file

    # The function convention is that input_folder should be None when we want to use a list of
    # absolute paths, but the CLI convention is that the required argument is always valid, whether
    # it's a folder or a list of absolute paths.
    if os.path.isfile(options.input_folder):
        assert options.image_filename_list is None, \
            'image_filename_list should not be specified when input_folder is a file'
        options.image_filename_list = options.input_folder
        options.input_folder = None

    options.recursive = (not options.nonrecursive)
    options.append_job_id_to_symlink_folder = (not options.no_append_job_id_to_symlink_folder)
    options.remove_symlink_folder = (not options.no_remove_symlink_folder)
    options.remove_yolo_results_folder = (not options.no_remove_yolo_results_folder)
    options.use_symlinks = (not options.no_use_symlinks)
    options.augment = (options.augment_enabled > 0)
    options.offset_yolo_category_ids = (not options.no_offset_class_ids)

    del options.nonrecursive
    del options.no_remove_symlink_folder
    del options.no_remove_yolo_results_folder
    del options.no_use_symlinks
    del options.augment_enabled
    del options.yolo_dataset_file
    del options.no_offset_class_ids

    print(options.__dict__)

    run_inference_with_yolo_val(options)

if __name__ == '__main__':
    main()


#%% Interactive driver

if False:


    #%% Debugging

    input_folder = r'g:\temp\md-test-images'
    model_filename = 'MDV5A'
    output_folder = r'g:\temp\yolo-test-out'
    yolo_working_folder = r'c:\git\yolov5-md'
    dataset_file = r"g:\temp\md-test-images\dataset.yaml"
    job_name = 'yolo-debug'
    symlink_folder = os.path.join(output_folder,'symlinks')
    yolo_results_folder = os.path.join(output_folder,'yolo_results')
    model_name = os.path.splitext(os.path.basename(model_filename))[0]

    output_file = os.path.join(output_folder,'{}_{}-md_format.json'.format(
        job_name,model_name))

    options = YoloInferenceOptions()

    options.yolo_working_folder = yolo_working_folder
    options.input_folder = input_folder
    options.output_file = output_file

    options.yolo_category_id_to_name = dataset_file
    options.augment = False
    options.conf_thres = '0.001'
    options.batch_size = 1
    options.device_string = '0'
    options.unique_id_strategy = 'auto'
    options.overwrite_handling = 'overwrite'

    if options.augment:
        options.image_size = round(1280 * 1.3)
    else:
        options.image_size = 1280

    options.model_filename = model_filename

    options.yolo_results_folder = yolo_results_folder # os.path.join(output_folder + 'yolo_results')
    options.symlink_folder = symlink_folder # os.path.join(output_folder,'symlinks')
    options.use_symlinks = False

    options.remove_symlink_folder = True
    options.remove_yolo_results_folder = True

    options.checkpoint_frequency = None

    cmd = f'python run_inference_with_yolov5_val.py {model_filename} {input_folder} ' + \
          f'{output_file} --yolo_working_folder {yolo_working_folder} ' + \
          f' --image_size {options.image_size} --conf_thres {options.conf_thres} ' + \
          f' --batch_size {options.batch_size} ' + \
          f' --symlink_folder {options.symlink_folder} --yolo_results_folder {options.yolo_results_folder} ' + \
          f' --yolo_dataset_file {options.yolo_category_id_to_name} ' + \
          f' --unique_id_strategy {options.unique_id_strategy} --overwrite_handling {options.overwrite_handling}'

    if not options.remove_symlink_folder:
        cmd += ' --no_remove_symlink_folder'
    if not options.remove_yolo_results_folder:
        cmd += ' --no_remove_yolo_results_folder'
    if options.checkpoint_frequency is not None:
        cmd += f' --checkpoint_frequency {options.checkpoint_frequency}'
    if not options.use_symlinks:
        cmd += ' --no_use_symlinks'
    if not options.augment:
        cmd += ' --augment_enabled 0'

    print(cmd)
    execute_in_python = False
    if execute_in_python:
        run_inference_with_yolo_val(options)
    else:
        import clipboard; clipboard.copy(cmd)



    #%% Run inference on a folder

    input_folder = r'g:\temp\tegu-val-mini'.replace('\\','/')
    model_filename = r'g:\temp\usgs-tegus-yolov5x-231003-b8-img1280-e3002-best.pt'
    output_folder = r'g:\temp\tegu-scratch'
    yolo_working_folder = r'c:\git\yolov5-tegus'
    dataset_file = r'g:\temp\dataset.yaml'

    # This only impacts the output file name, it's not passed to the inference function
    job_name = 'yolo-inference-test'

    model_name = os.path.splitext(os.path.basename(model_filename))[0]

    symlink_folder = os.path.join(output_folder,'symlinks')
    yolo_results_folder = os.path.join(output_folder,'yolo_results')

    output_file = os.path.join(output_folder,'{}_{}-md_format.json'.format(
        job_name,model_name))

    options = YoloInferenceOptions()

    options.yolo_working_folder = yolo_working_folder
    options.input_folder = input_folder
    options.output_file = output_file

    pass_image_filename_list = False
    pass_relative_paths = True

    if pass_image_filename_list:
        if pass_relative_paths:
            options.image_filename_list =  [
                r"val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(05) 18AUG17 - 05SEP17 FTC AEG#MFDC1949_000065.JPG",
                r"val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(04) 27JUL17 - 18AUG17 FTC AEG#MFDC1902_000064.JPG"
            ]
        else:
            options.image_filename_list =  [
                r"g:/temp/tegu-val-mini/val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(05) 18AUG17 - 05SEP17 FTC AEG#MFDC1949_000065.JPG",
                r"g:/temp/tegu-val-mini/val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(04) 27JUL17 - 18AUG17 FTC AEG#MFDC1902_000064.JPG"
            ]
    else:
        options.image_filename_list = None

    options.yolo_category_id_to_name = dataset_file
    options.augment = False
    options.conf_thres = '0.001'
    options.batch_size = 1
    options.device_string = '0'
    options.unique_id_strategy = 'auto'
    options.overwrite_handling = 'overwrite'

    if options.augment:
        options.image_size = round(1280 * 1.3)
    else:
        options.image_size = 1280

    options.model_filename = model_filename

    options.yolo_results_folder = yolo_results_folder # os.path.join(output_folder + 'yolo_results')
    options.symlink_folder = symlink_folder # os.path.join(output_folder,'symlinks')
    options.use_symlinks = False

    options.remove_symlink_folder = True
    options.remove_yolo_results_folder = True

    options.checkpoint_frequency = 5

    cmd = f'python run_inference_with_yolov5_val.py {model_filename} {input_folder} ' + \
          f'{output_file} --yolo_working_folder {yolo_working_folder} ' + \
          f' --image_size {options.image_size} --conf_thres {options.conf_thres} ' + \
          f' --batch_size {options.batch_size} ' + \
          f' --symlink_folder {options.symlink_folder} --yolo_results_folder {options.yolo_results_folder} ' + \
          f' --yolo_dataset_file {options.yolo_category_id_to_name} ' + \
          f' --unique_id_strategy {options.unique_id_strategy} --overwrite_handling {options.overwrite_handling}'

    if not options.remove_symlink_folder:
        cmd += ' --no_remove_symlink_folder'
    if not options.remove_yolo_results_folder:
        cmd += ' --no_remove_yolo_results_folder'
    if options.checkpoint_frequency is not None:
        cmd += f' --checkpoint_frequency {options.checkpoint_frequency}'
    if not options.use_symlinks:
        cmd += ' --no_use_symlinks'
    if not options.augment:
        cmd += ' --augment_enabled 0'

    print(cmd)
    execute_in_python = False
    if execute_in_python:
        run_inference_with_yolo_val(options)
    else:
        import clipboard; clipboard.copy(cmd)